package edu.upenn.cit594.datamanagement; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import edu.upenn.cit594.ui.UserInterface; import edu.upenn.cit594.util.CovidData; public class CSVCovidReader implements CovidReader { protected static String filename; public CSVCovidReader (String name) { filename = name; } /** * Reads covid_data.csv file and stores the values in the CovidData Objects * @param filename: covid data.csv file * @return an ArrayList of CovidData objects */ public static ArrayList readCovidCSV (){ File csvFile = new File(filename); if(!csvFile.exists()) { UserInterface.print("error: covid csv file does not exist."); return null; } else if (!csvFile.canRead()) { UserInterface.print("error: covid csv file can not be read."); return null; } //To store the CovidData Object Entries ArrayList CovidData = new ArrayList(); //Stores BufferedReader line String line = ""; try { try (BufferedReader br = new BufferedReader(new FileReader(csvFile))) { //Initialize column indexes int zipCodeIndex = -1; int partialVaxIndex = -1; int fullVaxIndex= -1; int boostedIndex = -1; int timeStampIndex = -1; //Reads the first line to get header row out of the way @SuppressWarnings("unused") String headerLine = br.readLine(); //Loop through the rest of csv file while((line = br.readLine()) != null) { //Splits by "," but ignores any commas surrounded by double quotes String[] colValues = line.split(",(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*$)"); //Set any null or empty string values to 0 for (int i=0; i headerColsMap = new HashMap(); headerColsMap = readHeader(filename); //Check column name exists for Object Fields boolean zipCodeExists = headerColsMap.containsKey("\"zip_code\""); boolean partialVaxExists = headerColsMap.containsKey("\"partially_vaccinated\""); boolean fullVaxExists = headerColsMap.containsKey("\"fully_vaccinated\""); boolean boostedExists = headerColsMap.containsKey("\"boosted\""); boolean timeStampExists = headerColsMap.containsKey("\"etl_timestamp\""); //Set column indexes //if any of the columns are missing from the csv file, display a warning message if (zipCodeExists) { zipCodeIndex = headerColsMap.get("\"zip_code\""); }else { UserInterface.print("Warning: csv file is missing a zip_code column."); } if (partialVaxExists) { partialVaxIndex = headerColsMap.get("\"partially_vaccinated\""); }else { UserInterface.print("Warning: csv file is missing a partially_vaccinated column."); } if (fullVaxExists) { fullVaxIndex = headerColsMap.get("\"fully_vaccinated\""); }else { UserInterface.print("Warning: csv file is missing a fully_vaccinated column."); } if (boostedExists) { boostedIndex = headerColsMap.get("\"boosted\""); }else { UserInterface.print("Warning: csv file is missing a boosted column."); } if (timeStampExists) { timeStampIndex = headerColsMap.get("\"etl_timestamp\""); }else { UserInterface.print("Warning: csv file is missing an etl_timestamp column."); } //If zipcode length is not equal to 5, then skip to next covid data row if(colValues[zipCodeIndex].strip().length() != 5) { continue; } //If timestamp is not in specified format (“YYYY- MM-DD hh:mm:ss”), then skip row String pattern = "[0-9]{4}-(0[1-9]|1[0-2])-(0[1-9]|[1-2][0-9]|3[0-1]) (2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]"; Pattern r = Pattern.compile(pattern); Matcher m = r.matcher(colValues[timeStampIndex]); if(colValues[timeStampIndex].equals("0") || !m.find()) { continue; } //If any of the indexes are still -1: means that the column name did not exist. Set the field's value to 0 if (zipCodeIndex == -1) { colValues[zipCodeIndex] = "0"; } if (partialVaxIndex == -1) { colValues[partialVaxIndex] = "0"; } if (fullVaxIndex == -1) { colValues[fullVaxIndex] = "0"; } if (boostedIndex == -1) { colValues[boostedIndex] = "0"; } if (timeStampIndex == -1) { colValues[timeStampIndex] = "0"; } //Add a new CovidData set CovidData.add(new CovidData( Integer.parseInt(colValues[zipCodeIndex]), // set zip code Integer.parseInt(colValues[partialVaxIndex]), // set partialVax Integer.parseInt(colValues[fullVaxIndex]), // set fullVax Integer.parseInt(colValues[boostedIndex]), // set boosted count (colValues[timeStampIndex]) // set time stamp )); } br.close(); //return CovidData Object return CovidData; } catch (NumberFormatException e) { e.printStackTrace(); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } //If we don't make it into the try, then returns empty ArrayList return CovidData; } /** * Get the column index of every column's name * @param filename: the csv file * @return a HashMap where the key is the column's name and the value is it's index */ public static HashMap readHeader (String filename){ File csvFile = new File(filename); if(!csvFile.exists()) { UserInterface.print("error: csv file does not exist."); return null; } else if (!csvFile.canRead()) { UserInterface.print("error: csv file can not be read."); return null; } //Keys: column's name //Values: column's index HashMap headerColsMap = new HashMap(); try { try (BufferedReader br = new BufferedReader(new FileReader(csvFile))) { String headerLiner = br.readLine(); //Splits by "," but ignores any commas surrounded by double quotes String[] headerArray = headerLiner.split(",(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*$)"); //Stores index of column name int i = 0; for(String col : headerArray) { headerColsMap.put(col, i); i++; } br.close(); } return headerColsMap; } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return headerColsMap; } }