package edu.upenn.cit594.datamanagement; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import edu.upenn.cit594.ui.UserInterface; import edu.upenn.cit594.util.Population; public class PopulationsReader { protected static String filename; public PopulationsReader(String name) { filename = name; } public ArrayList<Population> readPopulationCSV (){ File csvFile = new File(filename); if(!csvFile.exists()) { UserInterface.print("error: Population csv file does not exist."); return null; } else if (!csvFile.canRead()) { UserInterface.print("error: Population csv file can not be read."); return null; } //To store the Population Object Entries ArrayList<Population> Population = new ArrayList<Population>(); //Stores BufferedReader line String line = ""; try { try (BufferedReader br = new BufferedReader(new FileReader(csvFile))) { //Get Key:Val pair of columns Name:Index HashMap<String, Integer> headerColsMap = new HashMap<String, Integer>(); headerColsMap = readHeader(filename); int numColumns = headerColsMap.size(); //Initialize column indexes int zipCodeIndex = -1; int popIndex = -1; //Reads the first line to get header row out of the way @SuppressWarnings("unused") String headerLine = br.readLine(); //Loop through the rest of csv file while((line = br.readLine()) != null) { //Splits by "," but ignores any commas surrounded by double quotes //String[] colValues = line.split(",(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*$)"); //Stores column values String[] colValues = new String[numColumns]; int startIndex = 0; //start index for character int endIndex = 0; //end index for character int currentIndex = 0; //column index //Character Reader //Splits by "," but ignores any commas surrounded by double quotes for (int i=0; i < line.length(); i++) { //cast character to string String c = Character.toString(line.charAt(i)); boolean inQuotes = false; //Any time you see quotes, quotes boolean get's toggled if (c.equals("\"")) { inQuotes = !inQuotes; //if we get to the end of the line, then parse last column out } else if (i == line.length()-1) { //if the last character is a comma if (c.equals(",")) { //store value as an empty string colValues[colValues.length-1] = ""; } else { //store value as whatever the string is colValues[colValues.length-1] = line.substring(startIndex); } //if we see a comma and not in quotes in the middle of the line }else if ((c.equals(",") && inQuotes == false)) { //index i will be the comma endIndex = i; //if index is at end of the column if (currentIndex >= numColumns) { break; } //add the column value colValues[currentIndex] = line.substring(startIndex, endIndex); currentIndex++; // new start index to next character index startIndex = endIndex+1; endIndex = startIndex; } } //Set any null or empty string values to 0 for (int i=0; i<colValues.length; i++) { if(colValues[i] == null || colValues[i].equals("")) { colValues[i] = "0"; } } //Check column name exists for Object Fields boolean zipCodeExists = headerColsMap.containsKey("\"zip_code\""); boolean popExists = headerColsMap.containsKey("\"population\""); //Set column indexes //if any of the columns are missing from the csv file, display a warning message if (zipCodeExists) { zipCodeIndex = headerColsMap.get("\"zip_code\""); }else { UserInterface.print("Warning: csv file is missing a zip_code column."); } if (popExists) { popIndex = headerColsMap.get("\"population\""); }else { UserInterface.print("Warning: csv file is missing a population column."); } //If zip code length is not equal to 5, null, or empty then skip to next population data row //Takes out quotation marks if(colValues[zipCodeIndex].strip().substring(1,colValues[zipCodeIndex].length()-1).length() != 5 || colValues[zipCodeIndex].equals("0")) { continue; } //If population is not an integer, then skip row if(!isInt(colValues[popIndex])) { continue; } //If any of the indexes are still -1: means that the column name did not exist. Set the field's value to -1 if (zipCodeIndex == -1) { colValues[zipCodeIndex] = "-1"; } if (popIndex == -1) { colValues[popIndex] = "-1"; } //Add a new Population object Population.add(new Population( Integer.parseInt(colValues[zipCodeIndex].substring(1, 6)), // set zip code Integer.parseInt(colValues[popIndex]) // set population )); } br.close(); //return CovidData Object return Population; } catch (NumberFormatException e) { e.printStackTrace(); } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } //If we don't make it into the try, then returns empty ArrayList return Population; } /** * Get the column index of every column's name * @param filename: the csv file * @return a HashMap where the key is the column's name and the value is it's index */ public static HashMap<String, Integer> readHeader (String filename){ File csvFile = new File(filename); if(!csvFile.exists()) { UserInterface.print("error: csv file does not exist."); return null; } else if (!csvFile.canRead()) { UserInterface.print("error: csv file can not be read."); return null; } //Keys: column's name //Values: column's index HashMap<String, Integer> headerColsMap = new HashMap<String, Integer>(); try { try (BufferedReader br = new BufferedReader(new FileReader(csvFile))) { String headerLiner = br.readLine(); //Splits by "," but ignores any commas surrounded by double quotes String[] headerArray = headerLiner.split(",(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*$)"); //Stores index of column name int i = 0; for(String col : headerArray) { headerColsMap.put(col, i); i++; } br.close(); } return headerColsMap; } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return headerColsMap; } public static boolean isInt(String str) { return str.matches("[+-]?[0-9]+"); //match a number with optional negative } }