const pdfTable = require('pdf-table-extractor'); const fs = require('fs'); const pool = require('./db').pool; const dbf = require('./db'); const path = require('path'); const { getCoordsFromAddress } = require('./apis'); const { notifyKPD } = require('./notifications'); let flag=false; let zoneFlag=false; // Class 0 : No threat // Class 1 : Minor threat // Class 2 : Moderate, Not life threatening // Class 3 : Moderate threat, Potentially life threatening // Class 4 : Critical threat, Life threatening // Class 5 : Extreme threat, Immediate life threatening // Class 6 : Unknown threat function parseKBDCIP(runDate, check=true, test=false) { if(runDate==undefined){ return console.error("[-]Error parsing CIP: Invalid params. Last kpd_cip entry date is undefined.") } let dupCount = 0; console.log("[+]Parsing CIP data...") const incidentClasses = { //Sets class type for each incident code class0:[ "1070", "1071", "1078", "1082", "1087", "1015", "1027", "1027A", "1017", ], class1: [ "1057S", "TESTF", "1059", "1060A", "1060", "1083A", "C1083", "1057", "1090", "1089", "1042", "1057A", "1085", "1085B", "1085A", "1049", "1049A", "1061", "1088", "1088A", "1053", "1053A", "HAZMAT", "FIRE", ], class2: [ "1066", "1068", "1094", "1094A", "1023", "1041", "C1041", "1044", "C1044", "1056", "1055", "1055A", "1068A", "1044A", "1093", "ALERT3", "1066B", ], class3: ["1065", "1063", "1072", "1067", "1067A","1067B", "1091","1066A","1051", "1051A"], class4: ["1081", "1080", "1064", "1052","1099", "1040", "1041A", "1064A"], class5: ["1062", "1079", "1052A", "1081A", "1080A"], }; // For reducing api calls, only parse data that is newer than the last parse //let lastParse = runDate.setHours(runDate.getHours() - 2); let lastParse = new Date(runDate); pdfTable(path.join(__dirname,"KPDCIP.pdf"), function(result){ let classCount = { 0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0 }; for(let i=0;i{ console.error("Error notifying users: ",error) }) } if(test){ //Deletes file when test running fs.unlink(path.join(__dirname, "KPDCIP.pdf"), (err) => { if (err) { console.error("[-]Error deleting test PDF: ", err); } }); }else{ fs.rename(path.join(__dirname,'KPDCIP.pdf'), './crimedata/kpd_cip/parsed/KPDCIP-'+ Date.now() +'.pdf', function(err){ if(err){ if(err.code==='ENOENT'){ // Directory does not exist, create it fs.mkdirSync('./crimedata/kpd_cip/parsed', {recursive:true}); }else{ //Error renaming and moving file console.error("[-]Error moving CIP: ",err); } } }); } return; } let code = row[3]; if(blacklisted.includes(code))continue;//Skips rows for unimportant calls let address; if(row[1].length>3){ address = row[1].replace("xx", "00") + " Knoxville, TN"; }else{ address = row[2].replace("/", " and ") + " Knoxville, TN"; // console.log("Cross street no addy: ", row[1], " -> ",address) } let classType="0"; if(incidentClasses.class1.includes(code)){ classType="1"; }else if(incidentClasses.class2.includes(code)){ classType="2"; }else if(incidentClasses.class3.includes(code)){ classType="3"; }else if(incidentClasses.class4.includes(code)){ classType="4"; }else if(incidentClasses.class5.includes(code)){ classType="5"; }else if(!incidentClasses.class0.includes(code)){ classType="6"; } classCount[classType] = classCount[classType]+1; // console.log(classType+" : "+classCount[classType]) try{ if(test){ continue; } // Calls google api to get coords from address getCoordsFromAddress(address, flag).then((coords)=>{ if(!coords.dat){ console.log("[~]GEOCODE API: Returning due to undefined values:", coords) return; } let formatAdd = coords.dat.formatted_address; let lat = coords.dat.geometry.location.lat; let lon = coords.dat.geometry.location.lng; let type = coords.dat.types[0]; date = parseInt(date.getTime().toString().substring(0,10)); let params = [date,address,formatAdd, row[2], row[3], row[4], lat, lon, type, classType, 1, new Date().toISOString()]; dbf.incrementClassCount(params[9]); let ent = `INSERT INTO entity(id, owned_by, likes, comments, obj) VALUES(DEFAULT, 'sherlock', 0, 0, 'kpd_cip') RETURNING id;`; pool.query(ent, function(error, result){ if(error){ flag=true; console.error("[-]",error); console.log("[-]Error creating entity: ", row); } params.splice(0, 0, result.rows[0]["id"]); pool.query(insert, params, function(error){ if(error){ if(error.code=='23505'){ console.log("[#]Duplicate entry: ",row[0], row[1], row[2], row[3], row[4]); dupCount++; if(dupCount>2){ console.log("[#]Duplicate count over 10. Ending parse.") flag=true; throw new error("Too many duplicate entries.") return; } }else{ flag=true; console.error("[-]",error); console.log("[-]Error inserting row: ", row); } } }) }) }).catch((err)=>{ console.error("[-]Error geocoding cords: ",err) flag=true; return; }) }catch(err){ console.error("[-]Error parsing CIP at gecoode: ",err) flag=true; return; } } } console.log("[+]Finished parsing CIP data.") console.log("[+]Added:\n\t"+classCount[6]+" class 6 crimes.\n\t"+classCount[5]+" class 5 crimes.\n\t"+classCount[4]+" class 4 crimes.\n\t"+classCount[3]+" class 3 crimes.\n\t"+classCount[2]+ " class 2 crimes.\n\t" +classCount[1]+" class 1 crimes.\n\t"+classCount[0]+" class 0 crimes.\n\n") //Notifies users notifyKPD(classCount).catch((error)=>{ console.error("Error notifying users: ",error) }) // If there were no already parsed entries, move the file // Aka no time overlap. (right now more of whether check is true or false) fs.rename(path.join(__dirname,'KPDCIP.pdf'), './crimedata/kpd_cip/parsed/KPDCIP-'+ Date.now() +'.pdf', function(err){ if(err){ console.log(err.code, err.name) if(err.code=='ENOENT'){ // Directory does not exist, create it fs.mkdirSync('./crimedata/kpd_cip/parsed', {recursive:true}); }else{ //Error renaming and moving file console.error("[-]Error moving CIP: ",err); } } }); }, function(error){ console.error("[-]",error); }) } function parseZones(runDate){ skip = [ "FIRECRACKERS", "FLOODED ROAD", "SCHOOL BUS WRECK WITHOUT INJURY", "WELFARE CHECK ONLY", "WRECK PROPERTY DAMAGE", "DOE DRILL", "LINES DOWN", 'FIRE ALARM', "ACCIDENT ON WATERWAY", "LOST OR FOUND PROPERTY", "EMERGENCY MEDICAL SERVICES", "Motor Vehicle Accident (MVA)", "NOISE DISTURBANCE", "PRIVATE PULL", "PROPERTY CHECK", "PROPERTY CHECK SUBDIVISION", "PUBLIC DRUNK", "REPOSESSED VEHICLE", "STANDBY", "PHONE REPORT", "TRAFFIC PROBLEM", "TREE DOWN IN ROAD", "UNWARRANTED CALL INFORMATION", "USER REQUEST FOR INFORMATION", "WANT OFFICER FOR INVESTIGATION", "WRECK/INJURY", "WRECK/PROPERTY DAMAGE", "HUMANE", "INJURED PERSON", "LAW ENFORCEMENT REQUESTED", "LOCATION DETAIL - ONLY", "SCAN UNIT CHECK", "BE ON LOOKOUT", "BOLO", "POLE DOWN", "MENTAL TRANSPORT", "MENTAL TRANSPORT LONGHAUL", "PICK UP PRISONER", "PUBLIC ASSIST", "UNWARRANTED CALL UTILITY", "CONVEY/ESCORT", "INJURED PERSON (CHILD 5 YEARS OLD OR UNDER)", "TRAFFIC LIGHT OUT", "HAZ MAT", "BUSINESS CHECK", "VANDALISM PROGRESS" ]; class0 = [ "WARRANT SERVICE", "DISTURBANCE", "ALARM", "SHOPLIFTER", "BUSINESS HOLD UP ALARM", "UNKNOWN OVERDOSE", "VANDALISM", "FIGHT", ]; class1 = [ "FIRE", "FIRE EVENT", "DISTURBANCE IN PROGRESS", "FORGERY/FRAUD", "HIT AND RUN", "HIT & RUN W/INJURY", "HIT & RUN", "SUSPICIOUS PERSON", "DRUNK DRIVER", "MISSING PERSON", "FORGERY & FRAUD URGENT IN PROGRESS", ]; class2 = [ "INDECENT EXPOSURE", "PROWLER ON PREMISES", "THEFT", "ASSAULT", "BURGLARY/VEHICLE", "DRUNK DRIVER IN PROGRESS", "STOLEN VEHICLE", "STOLEN VEHICLE IN PROGRESS", "ASSAULT WHEN MEDICAL ATTENTION REQUIRED", "BURGLARY BUSINESS", "BURGLARY BUSINESS ATTEMPT ONLY", "BURGLARY RESIDENCE ATTEMPTED", "THEFT IN PROGRESS", ]; class3 = [ "BURGLARY BUSINESS IN PROGRESS", "BURGLARY RESIDENCE", ]; class4 = [ "BURGLARY RESIDENCE IN PROGRESS", "SHOOTING SCRAPE", "SHOOTING", "RAPE", ]; class5 = [ "SHOOTING SCRAPE IN PROGRESS/SUSPECT ON SCENE", "CUTTING SCRAPE IN PROGRESS/SUSPECT ON SCENE", "MURDER", "MURDER IN PROGRESS", "MURDER ATTEMPTED" ]; let addAbb = [ 'DR', 'LN', 'ST', 'WAY', 'BLVD', 'CIR', 'RD', 'PIKE', 'HWY', 'CT', 'TRL', 'FWY', 'AVE', 'ALY', 'BLF', 'BRG', 'BYP', 'CRK', 'CRES', 'CRST', 'EST', 'FRK', 'GDN', 'GRV', 'HBR', 'HTS', 'HL', 'JCT', 'KY', 'LNDG', 'MDW', 'MTWY', 'RGD', 'PKWY', "RAMP", ] zonePaths = [ //Paths to each zone pdf. "zone100.pdf", "zone101.pdf", "zone102.pdf", "zone104.pdf", "zone200.pdf", "zone201.pdf", "zone202.pdf", "zone300.pdf", "zone302.pdf", "zone400.pdf", "zone401.pdf", "zone402.pdf", "zone403.pdf", "zone404.pdf", ]; extractZoneWrapper(path.join(__dirname, zonePaths[0]), addAbb, skip, class0, class1, class2, class3, class4, class5,zoneFlag, runDate); extractZoneWrapper(path.join(__dirname, zonePaths[1]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate); extractZoneWrapper(path.join(__dirname, zonePaths[2]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate); extractZoneWrapper(path.join(__dirname, zonePaths[3]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate); extractZoneWrapper(path.join(__dirname, zonePaths[4]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate); extractZoneWrapper(path.join(__dirname, zonePaths[5]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate); extractZoneWrapper(path.join(__dirname, zonePaths[6]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate); extractZoneWrapper(path.join(__dirname, zonePaths[7]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate); extractZoneWrapper(path.join(__dirname, zonePaths[8]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate); extractZoneWrapper(path.join(__dirname, zonePaths[9]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate); extractZoneWrapper(path.join(__dirname, zonePaths[10]), addAbb, skip, class0, class1, class2, class3, class4, class5,zoneFlag, runDate); extractZoneWrapper(path.join(__dirname, zonePaths[11]), addAbb, skip, class0, class1, class2, class3, class4, class5,zoneFlag, runDate); extractZoneWrapper(path.join(__dirname, zonePaths[12]), addAbb, skip, class0, class1, class2, class3, class4, class5,zoneFlag, runDate); extractZoneWrapper(path.join(__dirname, zonePaths[13]), addAbb, skip, class0, class1, class2, class3, class4, class5,zoneFlag, runDate); } // Needed to set up in a wrapper to be able to parse for every zone without running into async issues async function extractZoneWrapper(path, addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate,check=true){ let date, zone, classNum; zone = path.substring(path.length-7).replace('.pdf','') runDate = new Date(runDate); let lastParse= new Date(runDate.setDate(runDate.getDate()-1)); let classCount ={0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, total:0}; extractPdf(path).then(async (data)=>{ let currentIncidentType ='' for(let i=0;i='0' && data[i].charAt(0)<='9': // If current incident is in skip array, skip if(currentIncidentType==='SKIP'){ break; } let temp = data[i].split(' '); temp = temp.filter(elm=>elm) // If the only data is time, skip if(temp.length<2){ // console.log("Skipping:"+ data[i]+"|") // If rows were split, join them together if(data[i+1] && data[i+1].charAt(0).match(/[a-z]/i)){ if(data[i+2].charAt(0)>='0'&&data[i+2].charAt(0)<='9' && data[i+1].includes('@')){ temp = data[i].concat(data[i+1]).split(' '); console.log("DATA SPLIT, MERGED: ", data[i]+" WITH "+data[i+1], "|"+currentIncidentType) data[i+1]='1' //To make sure it is captured by this case and discarded } }else{ break; } } let dateT = temp[0]; dateT = dateT.split('/') date='' // Splits and formats date since there it is too inconsistent to use substring if(dateT[0].length==1){ date += '0'+dateT[0]; }else{date+= dateT[0];} if(dateT[1].length==1){ date += '/0'+dateT[1]; }else{date+= '/'+dateT[1];} dateT = dateT[2].split(':'); date += '/'+dateT[0].substring(0,4) + " " + dateT[0].substring(4) + ':' + dateT[1] + ':' + dateT[2].substring(0,2); let convDate = new Date(date); // Checks to see if this entry was already parsed in the last job/ newer than last parse if(convDate < lastParse && check){ // console.log("Entry at "+date+". Already ran at "+ new Date(lastParse).toLocaleString() +". Breaking...") break; } let address = dateT[2].substring(2); let cross1 = '' let cross2 = '' let longFlag = false; // Makes sure address field is not empty if(address!=''){ // Fixes incorrect address formatting from pdf for(let l=0;l1){t=' '} cross2 +=t+temp[k+j]; } // Sets first cross street // Adds values before @ // Uses new variable to avoid adding extra space let jStart=1; //If the address is 3 words long, needs to start one later if(longFlag){jStart=2} for(let j=jStart;jjStart){t=' '} cross1+=t+temp[j]; } } } // Makes sure to use a query address that will give the most accurate results let queryAddress = ''; // If address is empty, use cross streets intersection as this happens in the data sometimes if(address=='' && (cross2!='' && cross2!='DEAD END') && (cross1!='' && cross1!='DEAD END')){ queryAddress = cross1 + " & " + cross2 + " Knox, TN"; } // Use cross street 2 as intersection point, since is it least likely to be a dead end else if(cross2 != '' && cross2 != 'DEAD END' && address!=''){ queryAddress = address+ ' & ' + cross2 + " Knox, TN"; }else if(cross1 != '' && cross1!='DEAD END'&&address!=''){ //If cross street 2 is a dead end, use cross street 1 queryAddress = address+ ' & ' + cross1 + " Knox, TN"; }else if(address != ''){ //If both cross streets are dead ends, use just address queryAddress = address + " Knox, TN"; }else{ //If there is no address for some reason, skip row break; } // console.log("Querying: ",queryAddress, currentIncidentType) let insert = `INSERT INTO knox_zones (id, date,zone, class, incident, address, cross_street1, cross_street2, formatted_address, latitude, longitude, type, source, date_added) VALUES ($1, $2, $3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13, $14)`; if(zoneFlag){return "Zone flag is true.";} try{ let coords = await getCoordsFromAddress(queryAddress, zoneFlag, {zone:zone, classNum:classNum, currentIncidentType: currentIncidentType, queryAddress:queryAddress, cross1:cross1, cross2:cross2, date:date}).catch((e)=>{ console.error('Error geocoding in zones:',e); zoneFlag=true; }); // getCoordsFromAddress(queryAddress, zoneFlag, {zone:zone, classNum:classNum, currentIncidentType: currentIncidentType, queryAddress:queryAddress, cross1:cross1, cross2:cross2, date:date}).then((coords)=>{ if('error_message' in coords.dat){ return new error("Error getting coords for zones: ",coords.dat.error_message); } let formatAdd = coords.dat.formatted_address; let lat = coords.dat.geometry.location.lat; let lon = coords.dat.geometry.location.lng; let type = coords.dat.types[0]; let finalDate = new Date(coords.passed.date); finalDate=parseInt(finalDate.getTime().toString().substring(0,10)); let params = [finalDate, coords.passed.zone, coords.passed.classNum, coords.passed.currentIncidentType, coords.passed.queryAddress, coords.passed.cross1, coords.passed.cross2, formatAdd, lat, lon, type, 2, new Date().toISOString()]; classCount[coords.passed.classNum]++; classCount.total++; // dbf.incrementClassCount(coords.passed.classNum); let ent = `INSERT INTO entity(id, owned_by, likes, comments, obj) VALUES(DEFAULT, 'sherlock', 0, 0, 'knox_zones') RETURNING id;`; pool.query(ent, function(error, result){ if(error){ zoneFlag=true; console.error("[-]Error creating entity: ",error) return; } params.splice(0, 0, result.rows[0]["id"]); pool.query(insert, params, function(error){ if(error){ zoneFlag=true; console.error("[-]Error inserting row: ", params); } }) }) // }).catch((err)=>{ // console.error("[-]Error getting coords for zones: ",err) // zoneFlag=true; // return; // }) }catch(err){ console.error("[-]Error parsing zones: ",err) zoneFlag=true; return; } break; // If current data is in skip array case skip.includes(data[i]): currentIncidentType='SKIP' break; // Checks if current data is in any classes case class0.includes(data[i]): classNum=0; currentIncidentType=data[i] break; case class1.includes(data[i]): classNum = 1; currentIncidentType=data[i] break; case class2.includes(data[i]): classNum = 2; currentIncidentType=data[i] break; case class3.includes(data[i]): classNum = 3; currentIncidentType=data[i] break; case class4.includes(data[i]): classNum = 4; currentIncidentType=data[i] break; case class5.includes(data[i]): classNum = 5; currentIncidentType=data[i] break; // Class 6, for unaccounted data default: // Makes sure to skip any data that is not an incident that somehow got split from date if(data[i].includes('@')){ break; } console.log("[#]Unknown class in "+zone+": ",data[i]) console.log("[#]BEFORE: ", data[i-1]) classNum = 6; currentIncidentType = data[i]; break; } } console.log("[#]Inserted into knox zones: ",classCount) //Updates counts after parsing to avoid deadlocks dbf.incrementClassCount(0, classCount[0]); dbf.incrementClassCount(1, classCount[1]); dbf.incrementClassCount(2, classCount[2]); dbf.incrementClassCount(3, classCount[3]); dbf.incrementClassCount(4, classCount[4]); dbf.incrementClassCount(5, classCount[5]); dbf.incrementClassCount(6, classCount[6]); let zoneCount = `UPDATE counts SET count = count + $1 WHERE "for"=$2;`; pool.query(zoneCount, [classCount.total, 'knox_zones'], function(error){ if(error){ zoneFlag=true; console.error("[-]Error updating zone count: ",error) } }) // After parsing, moves file to parsed category. // May change to just delete file after as there isn't much reason to keep it outside of a dev environment fs.rename(path, './crimedata/knox_sher/parsed/zone'+zone+'-'+Date.now()+'.pdf', function(err){ if(err){ if(err.code==='ENOENT'){ // Directory does not exist, create it fs.mkdirSync('./crimedata/knox_sher/parsed', {recursive:true}); }else{ //Error renaming and moving file console.error("[-]Error moving zone files: ",err); } } }) console.log("[+]Finished parsing zone: ",zone) }).catch((err)=>{ console.log("[-]Error extracting pdf text for ",err) }) } // Asynchronously extracts text and filters it for more parsing async function extractPdf(path){ console.log("EXTRACTING FROM: ",path) const pdf = require('pdf-parse') const dataBuffer = fs.readFileSync(path) const data = await pdf(dataBuffer) let filler = ['30 Days Activity', 'Zone 100 ', 'Zone 101 ', 'Zone 102 ', 'Zone 104 ', 'Zone 200 ', 'Zone 201 ', 'Zone 202 ', 'Zone 300 ', 'Zone 302 ', 'Zone 400 ', 'Zone 401 ', 'Zone 402 ', 'Zone 403 ', 'Zone 404 ', ] let temp = data.text.split('\n'); let final=[]; for(let i=3;i