fcp2021 / real_data / data_metrics_1.py
data_metrics_1.py
Raw
#%%
from requests import get
import json
from json import dumps
import pandas as pd

class Get_covid_api_data:
    
    def __init__(self):
        self.ENDPOINT = "https://api.coronavirus.data.gov.uk/v1/data"
        #can be nation / region / utla / ltla
        self.AREA_TYPE = "region" 
        #if nation:
        #>>> england / scotland / wales
        #if region:
        #>>> London / Southwest / Northeast
        self.AREA_NAME = "South West" 
        self.filters = [
        f"areaType={ self.AREA_TYPE }",
        f"areaName={ self.AREA_NAME }"
        ]
        self.Structure = {
        "date": "date",
        "name": "areaName",
        "code": "areaCode",
        "dailyCases": "newCasesByPublishDate",
        "cumulativeCases": "cumCasesByPublishDate",
        "dailyDeaths": "newDeaths28DaysByPublishDate",
        "cumulativeDeaths": "cumDeaths28DaysByPublishDate"
    }
    
    def get_metrics(self):
        api_params = {
            "filters": str.join(";", self.filters),
            "structure": dumps(self.Structure, separators=(",", ":"))
        }
        response = get(self.ENDPOINT, params=api_params, timeout=10)
        if response.status_code >= 400:
            raise RuntimeError(f'Request failed: { response.text }')
        print(response.status_code)
        print(response.json())
        return response

class Metrics_to_PD(Get_covid_api_data):

    def __init__(self):
        super().__init__() #make this a child class of Get_covid_api_data
    
    def dump_to_json(self, response): 
        with open ("covid_data_southwest.json", 'w') as f:
            #can change file name to whatever region / nation
            json.dump(response.json(), f, indent=4)
        return response.json()
    
    def build_template(self, results):
        #big_dictionary = results
        small_list = results.json()["data"] #type = list; access each day by indexing list
        #output template
        template = {
            "date": [],
            "name": [],
            "dailyCases": [],
            "cumulativeCases": [],
            "dailyDeaths": [],
            "cumulativeDeaths": []
        }
        for item in small_list: #access dictionary for each (days) element in list
            for key, value in item.items():
                if key == "code":
                    pass
                elif value == None:
                    template[key].append(0)
                else:
                    template[key].append(value)
        return template

    
    def fill_cumulative_data(self, template: dict):   
        ori_cases_cum = template["cumulativeCases"] #copy of cum_cases
        ori_cases_day = template["dailyCases"] #copy of daily_cases
        list_1 = [ori_cases_cum[0]] #first element
        
        #filled in the missing cumulative cases
        for cum_cases in ori_cases_cum:
            if cum_cases == 0:
                list_1.append(list_1[-1] - ori_cases_day[len(list_1)-1]) 
                #last element of the list MINUS the corresponding daily cases
        
        template.update({"cumulativeCases": list_1})
        return template
    
    def fill_recovery_data(self, template, rec_rate: int):
        ori_cases_cum = template["cumulativeCases"]
        list_recovery = [round(x*rec_rate) for x in ori_cases_cum] #insert the element of recovery rate
        list_recovery_new = [x if x != 2935 else 0 for x in list_recovery] #remove repeated numbers
        for x in list_recovery_new:
            if x == 0:
                #the previous element times 0.948 (after repeated numbers)
                list_recovery_new[list_recovery_new.index(x)] = round(0.845*list_recovery_new[list_recovery_new.index(x)-1])
        template.update({"recovered_cases": list_recovery_new})
        
        return template
        
    def main(self):
        r = self.get_metrics() #results variable
        self.dump_to_json(r) #write results to json file
        template = self.build_template(r) #assign template to output results
        enhanced_template = self.fill_cumulative_data(template) #fill with cumulative data
        further_template = self.fill_recovery_data(enhanced_template, 0.75) #fill with recovery data
        DF = pd.DataFrame.from_dict(further_template)

        
        #print(DF['recovered_cases'].to_list())
        
        print(DF.head(), DF.tail()) #print first 5 days, last 5 days


if __name__ == "__main__":
    uk_covid_data = Metrics_to_PD() #assign uk_covid_data to class
    uk_covid_data.main()
# %%