OpenDataPhillyFinal / src / edu / upenn / cit594 / datamanagement / PopulationsReader.java
PopulationsReader.java
Raw
package edu.upenn.cit594.datamanagement;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;

import edu.upenn.cit594.ui.UserInterface;
import edu.upenn.cit594.util.Population;

public class PopulationsReader {
	
	protected static String filename;
	public PopulationsReader(String name) {
		filename = name;
	}

	public ArrayList<Population> readPopulationCSV (){
		File csvFile = new File(filename);
		if(!csvFile.exists()) {
			UserInterface.print("error: Population csv file does not exist.");
			return null;
		} else if (!csvFile.canRead()) {
			UserInterface.print("error: Population csv file can not be read.");
			return null;
		}

		//To store the Population Object Entries
		ArrayList<Population> Population = new ArrayList<Population>();
		//Stores BufferedReader line
		String line = "";
		
		try {
			try (BufferedReader br = new BufferedReader(new FileReader(csvFile))) {
				
				//Get Key:Val pair of columns Name:Index
				HashMap<String, Integer> headerColsMap = new HashMap<String, Integer>();
				headerColsMap = readHeader(filename);
				int numColumns = headerColsMap.size();
				
				//Initialize column indexes
				int zipCodeIndex = -1;
				int popIndex = -1;
				
				//Reads the first line to get header row out of the way
				@SuppressWarnings("unused")
				String headerLine = br.readLine();
				
				//Loop through the rest of csv file
				while((line = br.readLine()) != null) {
					
					//Splits by "," but ignores any commas surrounded by double quotes
					//String[] colValues = line.split(",(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*$)");
					
					//Stores column values
					String[] colValues = new String[numColumns];
					
					int startIndex = 0; 	//start index for character
					int endIndex = 0; 		//end index for character
					int currentIndex = 0;	//column index
					
					//Character Reader
					//Splits by "," but ignores any commas surrounded by double quotes
					for (int i=0; i < line.length(); i++) {
						
						//cast character to string
						String c = Character.toString(line.charAt(i));
						boolean inQuotes = false;
						
						//Any time you see quotes, quotes boolean get's toggled
						if (c.equals("\"")) {
							inQuotes = !inQuotes;
						//if we get to the end of the line, then parse last column out
						} else if (i == line.length()-1) {
							
							//if the last character is a comma
							if (c.equals(",")) {
								//store value as an empty string
								colValues[colValues.length-1] = "";
							} else {
								//store value as whatever the string is
								colValues[colValues.length-1] = line.substring(startIndex);
							}
						//if we see a comma and not in quotes in the middle of the line
						}else if ((c.equals(",") && inQuotes == false)) {
							//index i will be the comma
							endIndex = i;
							
							//if index is at end of the column
							if (currentIndex >= numColumns) {
								break;
							}
							//add the column value
							colValues[currentIndex] = line.substring(startIndex, endIndex);
							currentIndex++;
							// new start index to next character index
							startIndex = endIndex+1;
							endIndex = startIndex;
						} 
						
					}
					
					//Set any null or empty string values to 0
					for (int i=0; i<colValues.length; i++) {
						if(colValues[i] == null || colValues[i].equals("")) {
							colValues[i] = "0";
						}
					}
					
					
					//Check column name exists for Object Fields
					boolean zipCodeExists = headerColsMap.containsKey("\"zip_code\"");
					boolean popExists = headerColsMap.containsKey("\"population\"");
					

					//Set column indexes
					//if any of the columns are missing from the csv file, display a warning message
					if (zipCodeExists) {
						zipCodeIndex = headerColsMap.get("\"zip_code\"");
					}else {
						UserInterface.print("Warning: csv file is missing a zip_code column.");
					}
					if (popExists) {
						popIndex = headerColsMap.get("\"population\"");
					}else {
						UserInterface.print("Warning: csv file is missing a population column.");
					}
					
					
					//If zip code length is not equal to 5, null, or empty then skip to next population data row
					//Takes out quotation marks
					if(colValues[zipCodeIndex].strip().substring(1,colValues[zipCodeIndex].length()-1).length() != 5 || colValues[zipCodeIndex].equals("0")) { continue; }			
					
					//If population is not an integer, then skip row
					if(!isInt(colValues[popIndex])) { continue; }
					
					//If any of the indexes are still -1: means that the column name did not exist. Set the field's value to -1
					if (zipCodeIndex == -1) { colValues[zipCodeIndex] = "-1"; }
					if (popIndex == -1) { colValues[popIndex] = "-1"; }
					
					
					//Add a new Population object
					Population.add(new Population(
						Integer.parseInt(colValues[zipCodeIndex].substring(1, 6)), // set zip code
						Integer.parseInt(colValues[popIndex]) 						// set population
						));
				
				}
				br.close();
				//return CovidData Object
				return Population;
			} catch (NumberFormatException e) {
				e.printStackTrace();
			}
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		//If we don't make it into the try, then returns empty ArrayList
		return Population;			
	}
	
	/**
	 * Get the column index of every column's name
	 * @param filename: the csv file
	 * @return a HashMap where the key is the column's name and the value is it's index 
	 */
	public static HashMap<String, Integer> readHeader (String filename){
		
		File csvFile = new File(filename);
		if(!csvFile.exists()) {
			UserInterface.print("error: csv file does not exist.");
			return null;
		} else if (!csvFile.canRead()) {
			UserInterface.print("error: csv file can not be read.");
			return null;
		}
	
		//Keys: column's name 
		//Values: column's index
		HashMap<String, Integer> headerColsMap = new HashMap<String, Integer>();
		
		try {
			try (BufferedReader br = new BufferedReader(new FileReader(csvFile))) {
				
				String headerLiner = br.readLine();
				//Splits by "," but ignores any commas surrounded by double quotes
				String[] headerArray = headerLiner.split(",(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*$)");
				
				//Stores index of column name
				int i = 0;
				
				for(String col : headerArray) {
					headerColsMap.put(col, i);
					i++;
				}
				br.close();
			}			
			return headerColsMap;
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return headerColsMap;
	}


	public static boolean isInt(String str) {
		  return str.matches("[+-]?[0-9]+");  //match a number with optional negative
		}
	

}