src/edu/upenn/cit594/datamanagement/PropertiesReader.java · OpenDataPhillyFinal

package edu.upenn.cit594.datamanagement;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;

import edu.upenn.cit594.ui.UserInterface;
import edu.upenn.cit594.util.Property;

public class PropertiesReader {
	
	protected static String filename;
	public PropertiesReader(String name) {
		filename = name;
	}
	
	/**
	 * 
	 * @param filename
	 * @return
	 */
	public ArrayList<Property> readPropertiesCSV (){
		File csvFile = new File(filename);
		
		if(!csvFile.exists()) {
			UserInterface.print("error: Properties csv file does not exist.");
			return null;
		} else if (!csvFile.canRead()) {
			UserInterface.print("error: Propertie csv file can not be read.");
			return null;
		}
		
		//To store the Property Object Entries
		ArrayList<Property> Properties = new ArrayList<Property>();
		//Stores BufferedReader line
		String line = "";
		
		try {
			try (BufferedReader br = new BufferedReader(new FileReader(csvFile))) {
				
				//Get Key:Val pair of columns Name:Index
				HashMap<String, Integer> headerColsMap = new HashMap<String, Integer>();
				headerColsMap = readHeader(filename);
				int numColumns = headerColsMap.size();
				
				//Initialize column indexes
				int zipCodeIndex = -1;
				int livableAreaIndex = -1;
				int marketValueIndex= -1;

			
				//Reads the first line to get header row out of the way
				@SuppressWarnings("unused")
				String headerLine = br.readLine();
				
				//Loop through the rest of csv file
				while((line = br.readLine()) != null) {
										
					//Stores column values
					String[] colValues = new String[numColumns];
					
					int startIndex = 0; 	//start index for character
					int endIndex = 0; 		//end index for character
					int currentIndex = 0;	//column index
					
					//Character Reader
					//Splits by "," but ignores any commas surrounded by double quotes
					for (int i=0; i < line.length(); i++) {
						
						//cast character to string
						String c = Character.toString(line.charAt(i));
						boolean inQuotes = false;
						
						//Any time you see quotes, quotes boolean get's toggled
						if (c.equals("\"")) {
							inQuotes = !inQuotes;
						//if we get to the end of the line, then parse last column out
						} else if (i == line.length()-1) {
							
							//if the last character is a comma
							if (c.equals(",")) {
								//store value as an empty string
								colValues[colValues.length-1] = "";
							} else {
								//store value as whatever the string is
								colValues[colValues.length-1] = line.substring(startIndex);
							}
						//if we see a comma and not in quotes in the middle of the line
						}else if ((c.equals(",") && inQuotes == false)) {
							//index i will be the comma
							endIndex = i;
							
							//if index is at end of the column
							if (currentIndex >= numColumns) {
								break;
							}
							//add the column value
							colValues[currentIndex] = line.substring(startIndex, endIndex);
							currentIndex++;
							// new start index to next character index
							startIndex = endIndex+1;
							endIndex = startIndex;
						} 
						
					}
					
					//Check column name exists for Object Fields
					boolean zipCodeExists = headerColsMap.containsKey("zip_code");
					boolean livableAreaExists = headerColsMap.containsKey("total_livable_area");
					boolean marketValueExists = headerColsMap.containsKey("market_value");
					
					//Set column indexes
					//if any of the necessary columns are missing from the csv file, display warning message.
					if (zipCodeExists) {
						zipCodeIndex = headerColsMap.get("zip_code");
					}else {
						UserInterface.print("Warning: csv file is missing a \"zip_code\" column.");
					}
					if (livableAreaExists) {
						livableAreaIndex = headerColsMap.get("total_livable_area");
					}else {
						UserInterface.print("Warning: csv file is missing a \"total_livable_area\" column.");
					}
					if (marketValueExists) {
						marketValueIndex = headerColsMap.get("market_value");
					}else {
						UserInterface.print("Warning: csv file is missing a \"market_value\" column.");
					}
					
					
					//If the zipcode is missing or non-numeric, store a "-1"
					if(colValues[zipCodeIndex] == null || colValues[zipCodeIndex].equals("") || !isNumeric(colValues[zipCodeIndex])) {
						colValues[zipCodeIndex] = "-1";
					}
					//If the total livable area is missing or non-numeric, store a "-1"
					if(colValues[livableAreaIndex] == null || colValues[livableAreaIndex].equals("") || !isNumeric(colValues[livableAreaIndex])) {
						colValues[livableAreaIndex] = "-1";
					}
					//If the market value is missing or non-numeric, store a "-1"
					//NOTE: REMINDER THAT WHEN WE DO MATH TO IGNORE ANY VALUES WHERE 
					if(colValues[marketValueIndex] == null || colValues[marketValueIndex].equals("") || !isNumeric(colValues[marketValueIndex])) {
						colValues[marketValueIndex] = "-1";
					}
					
					//If the ZIP Code has fewer than 5 characters or
					//the first 5 characters are not all numeric, then skip to next data row
					if(colValues[zipCodeIndex].strip().length() < 5 || !isNumeric(colValues[zipCodeIndex].substring(0, 5))) {
						continue; 
					}

					
					//Add a new CovidData set
					Properties.add(new Property(
						Integer.parseInt(colValues[zipCodeIndex].substring(0, 5)), 	// set zip code (only first 5 digits)
						Double.parseDouble(colValues[livableAreaIndex]), 			// set total livable area
						Double.parseDouble(colValues[marketValueIndex]) 			// set market value
						));
				
				}
				br.close();
				return Properties;
			} catch (NumberFormatException e) {
				e.printStackTrace();
			}
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		//If we don't make it into the try, then returns empty ArrayList
		return Properties;			
		
	}
	
	/**
	 * Get the column index of every column's name
	 * @param filename: the csv file
	 * @return a HashMap where the key is the column's name and the value is it's index 
	 */
	public static HashMap<String, Integer> readHeader (String filename){
		
		File csvFile = new File(filename);
		if(!csvFile.exists()) {
			UserInterface.print("error: csv file does not exist.");
			return null;
		} else if (!csvFile.canRead()) {
			UserInterface.print("error: csv file can not be read.");
			return null;
		}
	
		//Keys: column's name 
		//Values: column's index
		HashMap<String, Integer> headerColsMap = new HashMap<String, Integer>();
		
		try {
			try (BufferedReader br = new BufferedReader(new FileReader(csvFile))) {
				
				String headerLiner = br.readLine();
				//Splits by "," but ignores any commas surrounded by double quotes
				String[] headerArray = headerLiner.split(",(?=(?:[^\\\"]*\\\"[^\\\"]*\\\")*[^\\\"]*$)");
				
				//Stores index of column name
				int i = 0;
				
				for(String col : headerArray) {
					headerColsMap.put(col, i);
					i++;
				}
				br.close();
			}			
			return headerColsMap;
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return headerColsMap;
	}

	/**
	 * checks if a string is a number
	 * @param string input
	 * @return true is string is numeric, else false
	 */
	public static boolean isNumeric(String str) {
		  return str.matches("^-?[0-9]\\d*(\\.\\d+)?$");  //match a number with optional decimal ad negative.
		}
	
}