const pdfTable = require('pdf-table-extractor');
const fs = require('fs');
const pool = require('./db').pool;
const dbf = require('./db');
const path = require('path');
const { getCoordsFromAddress } = require('./apis');
const { notifyKPD } = require('./notifications');
let flag=false;
let zoneFlag=false;
// Class 0 : No threat
// Class 1 : Minor threat
// Class 2 : Moderate, Not life threatening
// Class 3 : Moderate threat, Potentially life threatening
// Class 4 : Critical threat, Life threatening
// Class 5 : Extreme threat, Immediate life threatening
// Class 6 : Unknown threat
function parseKBDCIP(runDate, check=true, test=false) {
if(runDate==undefined){
return console.error("[-]Error parsing CIP: Invalid params. Last kpd_cip entry date is undefined.")
}
let dupCount = 0;
console.log("[+]Parsing CIP data...")
const incidentClasses = {
//Sets class type for each incident code
class0:[
"1070",
"1071",
"1078",
"1082",
"1087",
"1015",
"1027",
"1027A",
"1017",
],
class1: [
"1057S",
"TESTF",
"1059",
"1060A",
"1060",
"1083A",
"C1083",
"1057",
"1090",
"1089",
"1042",
"1057A",
"1085",
"1085B",
"1085A",
"1049",
"1049A",
"1061",
"1088",
"1088A",
"1053",
"1053A",
"HAZMAT",
"FIRE",
],
class2: [
"1066",
"1068",
"1094",
"1094A",
"1023",
"1041",
"C1041",
"1044",
"C1044",
"1056",
"1055",
"1055A",
"1068A",
"1044A",
"1093",
"ALERT3",
"1066B",
],
class3: ["1065", "1063", "1072", "1067", "1067A","1067B", "1091","1066A","1051", "1051A"],
class4: ["1081", "1080", "1064", "1052","1099", "1040", "1041A", "1064A"],
class5: ["1062", "1079", "1052A", "1081A", "1080A"],
};
// For reducing api calls, only parse data that is newer than the last parse
//let lastParse = runDate.setHours(runDate.getHours() - 2);
let lastParse = new Date(runDate);
pdfTable(path.join(__dirname,"KPDCIP.pdf"), function(result){
let classCount = { 0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0 };
for(let i=0;i<result.pageTables.length;i++){
for(let j=0;j<result.pageTables[i].tables.length;j++){
if(i==0&&j==0)continue;
let row = result.pageTables[i].tables[j];
let insert = `INSERT INTO kpd_cip (id, date, address, formatted_address, cross_street, code, incident, latitude, longitude, type, class, source, date_added) VALUES ($1, $2, $3, $4,$5,$6,$7,$8,$9,$10,$11,$12, $13)`;
let blacklisted = [
"C1045",
"FALARM",
"CBOLO",
"1045B",
"PUBLIC-\nASSIST",
"C1024",
"C1025",
"EMS",
"1034",
"C1043",
"UCU",
"1084T",
"1043C",
"1058",
"1043WH",
"1053T",
"1082",
"1083",
"1086",
"1083B",
"1084C",
"LAW",
"UCI",
"TEL",
"URFI",
"1043H",
"LD",
"1096",
"1076",
"1075",
"1045",
"1045E",
"1016",
"1046",
"1046S",
"1043",
"1038",
"1038C",
"1018",
"1024",
"1037",
"1077",
"1074",
"1084",
"1095",
"1097",
"1014",
"C1014M",
"1029",
"1046W",
"1048",
"1047",
"1035",
"1036",
"1073",
"1039",
"1025",
"FLOOD",
"TLITE",
"TEST",
"TREE",
"BOLO",
"LINES",
"POLE"
];
let date = new Date(row[0]);
// console.log("Last parse: ", new Date(lastParse).toLocaleString(), "Latest entry: ", date.toLocaleString())
if(date <= lastParse && check){
console.log("[+]",i+":"+j," entry at "+date.toLocaleString()+". Already ran at "+ lastParse.toLocaleString() +". Returning...")
console.log("[+]Added:\n\t"+classCount[6]+" class 6 crimes.\n\t"+classCount[5]+" class 5 crimes.\n\t"+classCount[4]+" class 4 crimes.\n\t"+classCount[3]+" class 3 crimes.\n\t"+classCount[2]+ " class 2 crimes.\n\t" +classCount[1]+" class 1 crimes.\n\t"+classCount[0]+" class 0 crimes.\n\n")
//Notifies users
//Checks for testing bedore sending notifications
if(!test){
notifyKPD(classCount).catch((error)=>{
console.error("Error notifying users: ",error)
})
}
if(test){
//Deletes file when test running
fs.unlink(path.join(__dirname, "KPDCIP.pdf"), (err) => {
if (err) {
console.error("[-]Error deleting test PDF: ", err);
}
});
}else{
fs.rename(path.join(__dirname,'KPDCIP.pdf'), './crimedata/kpd_cip/parsed/KPDCIP-'+ Date.now() +'.pdf', function(err){
if(err){
if(err.code==='ENOENT'){
// Directory does not exist, create it
fs.mkdirSync('./crimedata/kpd_cip/parsed', {recursive:true});
}else{
//Error renaming and moving file
console.error("[-]Error moving CIP: ",err);
}
}
});
}
return;
}
let code = row[3];
if(blacklisted.includes(code))continue;//Skips rows for unimportant calls
let address;
if(row[1].length>3){
address = row[1].replace("xx", "00") + " Knoxville, TN";
}else{
address = row[2].replace("/", " and ") + " Knoxville, TN";
// console.log("Cross street no addy: ", row[1], " -> ",address)
}
let classType="0";
if(incidentClasses.class1.includes(code)){
classType="1";
}else if(incidentClasses.class2.includes(code)){
classType="2";
}else if(incidentClasses.class3.includes(code)){
classType="3";
}else if(incidentClasses.class4.includes(code)){
classType="4";
}else if(incidentClasses.class5.includes(code)){
classType="5";
}else if(!incidentClasses.class0.includes(code)){
classType="6";
}
classCount[classType] = classCount[classType]+1;
// console.log(classType+" : "+classCount[classType])
try{
if(test){
continue;
}
// Calls google api to get coords from address
getCoordsFromAddress(address, flag).then((coords)=>{
if(!coords.dat){
console.log("[~]GEOCODE API: Returning due to undefined values:", coords)
return;
}
let formatAdd = coords.dat.formatted_address;
let lat = coords.dat.geometry.location.lat;
let lon = coords.dat.geometry.location.lng;
let type = coords.dat.types[0];
date = parseInt(date.getTime().toString().substring(0,10));
let params = [date,address,formatAdd, row[2], row[3], row[4], lat, lon, type, classType, 1, new Date().toISOString()];
dbf.incrementClassCount(params[9]);
let ent = `INSERT INTO entity(id, owned_by, likes, comments, obj) VALUES(DEFAULT, 'sherlock', 0, 0, 'kpd_cip') RETURNING id;`;
pool.query(ent, function(error, result){
if(error){
flag=true;
console.error("[-]",error);
console.log("[-]Error creating entity: ", row);
}
params.splice(0, 0, result.rows[0]["id"]);
pool.query(insert, params, function(error){
if(error){
if(error.code=='23505'){
console.log("[#]Duplicate entry: ",row[0], row[1], row[2], row[3], row[4]);
dupCount++;
if(dupCount>2){
console.log("[#]Duplicate count over 10. Ending parse.")
flag=true;
throw new error("Too many duplicate entries.")
return;
}
}else{
flag=true;
console.error("[-]",error);
console.log("[-]Error inserting row: ", row);
}
}
})
})
}).catch((err)=>{
console.error("[-]Error geocoding cords: ",err)
flag=true;
return;
})
}catch(err){
console.error("[-]Error parsing CIP at gecoode: ",err)
flag=true;
return;
}
}
}
console.log("[+]Finished parsing CIP data.")
console.log("[+]Added:\n\t"+classCount[6]+" class 6 crimes.\n\t"+classCount[5]+" class 5 crimes.\n\t"+classCount[4]+" class 4 crimes.\n\t"+classCount[3]+" class 3 crimes.\n\t"+classCount[2]+ " class 2 crimes.\n\t" +classCount[1]+" class 1 crimes.\n\t"+classCount[0]+" class 0 crimes.\n\n")
//Notifies users
notifyKPD(classCount).catch((error)=>{
console.error("Error notifying users: ",error)
})
// If there were no already parsed entries, move the file
// Aka no time overlap. (right now more of whether check is true or false)
fs.rename(path.join(__dirname,'KPDCIP.pdf'), './crimedata/kpd_cip/parsed/KPDCIP-'+ Date.now() +'.pdf', function(err){
if(err){
console.log(err.code, err.name)
if(err.code=='ENOENT'){
// Directory does not exist, create it
fs.mkdirSync('./crimedata/kpd_cip/parsed', {recursive:true});
}else{
//Error renaming and moving file
console.error("[-]Error moving CIP: ",err);
}
}
});
}, function(error){
console.error("[-]",error);
})
}
function parseZones(runDate){
skip = [
"FIRECRACKERS",
"FLOODED ROAD",
"SCHOOL BUS WRECK WITHOUT INJURY",
"WELFARE CHECK ONLY",
"WRECK PROPERTY DAMAGE",
"DOE DRILL",
"LINES DOWN",
'FIRE ALARM',
"ACCIDENT ON WATERWAY",
"LOST OR FOUND PROPERTY",
"EMERGENCY MEDICAL SERVICES",
"Motor Vehicle Accident (MVA)",
"NOISE DISTURBANCE",
"PRIVATE PULL",
"PROPERTY CHECK",
"PROPERTY CHECK SUBDIVISION",
"PUBLIC DRUNK",
"REPOSESSED VEHICLE",
"STANDBY",
"PHONE REPORT",
"TRAFFIC PROBLEM",
"TREE DOWN IN ROAD",
"UNWARRANTED CALL INFORMATION",
"USER REQUEST FOR INFORMATION",
"WANT OFFICER FOR INVESTIGATION",
"WRECK/INJURY",
"WRECK/PROPERTY DAMAGE",
"HUMANE",
"INJURED PERSON",
"LAW ENFORCEMENT REQUESTED",
"LOCATION DETAIL - ONLY",
"SCAN UNIT CHECK",
"BE ON LOOKOUT",
"BOLO",
"POLE DOWN",
"MENTAL TRANSPORT",
"MENTAL TRANSPORT LONGHAUL",
"PICK UP PRISONER",
"PUBLIC ASSIST",
"UNWARRANTED CALL UTILITY",
"CONVEY/ESCORT",
"INJURED PERSON (CHILD 5 YEARS OLD OR UNDER)",
"TRAFFIC LIGHT OUT",
"HAZ MAT",
"BUSINESS CHECK",
"VANDALISM PROGRESS"
];
class0 = [
"WARRANT SERVICE",
"DISTURBANCE",
"ALARM",
"SHOPLIFTER",
"BUSINESS HOLD UP ALARM",
"UNKNOWN OVERDOSE",
"VANDALISM",
"FIGHT",
];
class1 = [
"FIRE",
"FIRE EVENT",
"DISTURBANCE IN PROGRESS",
"FORGERY/FRAUD",
"HIT AND RUN",
"HIT & RUN W/INJURY",
"HIT & RUN",
"SUSPICIOUS PERSON",
"DRUNK DRIVER",
"MISSING PERSON",
"FORGERY & FRAUD URGENT IN PROGRESS",
];
class2 = [
"INDECENT EXPOSURE",
"PROWLER ON PREMISES",
"THEFT",
"ASSAULT",
"BURGLARY/VEHICLE",
"DRUNK DRIVER IN PROGRESS",
"STOLEN VEHICLE",
"STOLEN VEHICLE IN PROGRESS",
"ASSAULT WHEN MEDICAL ATTENTION REQUIRED",
"BURGLARY BUSINESS",
"BURGLARY BUSINESS ATTEMPT ONLY",
"BURGLARY RESIDENCE ATTEMPTED",
"THEFT IN PROGRESS",
];
class3 = [
"BURGLARY BUSINESS IN PROGRESS",
"BURGLARY RESIDENCE",
];
class4 = [
"BURGLARY RESIDENCE IN PROGRESS",
"SHOOTING SCRAPE",
"SHOOTING",
"RAPE",
];
class5 = [
"SHOOTING SCRAPE IN PROGRESS/SUSPECT ON SCENE",
"CUTTING SCRAPE IN PROGRESS/SUSPECT ON SCENE",
"MURDER",
"MURDER IN PROGRESS",
"MURDER ATTEMPTED"
];
let addAbb = [
'DR',
'LN',
'ST',
'WAY',
'BLVD',
'CIR',
'RD',
'PIKE',
'HWY',
'CT',
'TRL',
'FWY',
'AVE',
'ALY',
'BLF',
'BRG',
'BYP',
'CRK',
'CRES',
'CRST',
'EST',
'FRK',
'GDN',
'GRV',
'HBR',
'HTS',
'HL',
'JCT',
'KY',
'LNDG',
'MDW',
'MTWY',
'RGD',
'PKWY',
"RAMP",
]
zonePaths = [ //Paths to each zone pdf.
"zone100.pdf",
"zone101.pdf",
"zone102.pdf",
"zone104.pdf",
"zone200.pdf",
"zone201.pdf",
"zone202.pdf",
"zone300.pdf",
"zone302.pdf",
"zone400.pdf",
"zone401.pdf",
"zone402.pdf",
"zone403.pdf",
"zone404.pdf",
];
extractZoneWrapper(path.join(__dirname, zonePaths[0]), addAbb, skip, class0, class1, class2, class3, class4, class5,zoneFlag, runDate);
extractZoneWrapper(path.join(__dirname, zonePaths[1]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate);
extractZoneWrapper(path.join(__dirname, zonePaths[2]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate);
extractZoneWrapper(path.join(__dirname, zonePaths[3]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate);
extractZoneWrapper(path.join(__dirname, zonePaths[4]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate);
extractZoneWrapper(path.join(__dirname, zonePaths[5]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate);
extractZoneWrapper(path.join(__dirname, zonePaths[6]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate);
extractZoneWrapper(path.join(__dirname, zonePaths[7]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate);
extractZoneWrapper(path.join(__dirname, zonePaths[8]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate);
extractZoneWrapper(path.join(__dirname, zonePaths[9]), addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate);
extractZoneWrapper(path.join(__dirname, zonePaths[10]), addAbb, skip, class0, class1, class2, class3, class4, class5,zoneFlag, runDate);
extractZoneWrapper(path.join(__dirname, zonePaths[11]), addAbb, skip, class0, class1, class2, class3, class4, class5,zoneFlag, runDate);
extractZoneWrapper(path.join(__dirname, zonePaths[12]), addAbb, skip, class0, class1, class2, class3, class4, class5,zoneFlag, runDate);
extractZoneWrapper(path.join(__dirname, zonePaths[13]), addAbb, skip, class0, class1, class2, class3, class4, class5,zoneFlag, runDate);
}
// Needed to set up in a wrapper to be able to parse for every zone without running into async issues
async function extractZoneWrapper(path, addAbb, skip, class0, class1, class2, class3, class4, class5, zoneFlag, runDate,check=true){
let date, zone, classNum;
zone = path.substring(path.length-7).replace('.pdf','')
runDate = new Date(runDate);
let lastParse= new Date(runDate.setDate(runDate.getDate()-1));
let classCount ={0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, total:0};
extractPdf(path).then(async (data)=>{
let currentIncidentType =''
for(let i=0;i<data.length;i++){
switch(true){
// If starts with number. Most likely normal entry
case data[i].charAt(0)>='0' && data[i].charAt(0)<='9':
// If current incident is in skip array, skip
if(currentIncidentType==='SKIP'){
break;
}
let temp = data[i].split(' ');
temp = temp.filter(elm=>elm)
// If the only data is time, skip
if(temp.length<2){
// console.log("Skipping:"+ data[i]+"|")
// If rows were split, join them together
if(data[i+1] && data[i+1].charAt(0).match(/[a-z]/i)){
if(data[i+2].charAt(0)>='0'&&data[i+2].charAt(0)<='9' && data[i+1].includes('@')){
temp = data[i].concat(data[i+1]).split(' ');
console.log("DATA SPLIT, MERGED: ", data[i]+" WITH "+data[i+1], "|"+currentIncidentType)
data[i+1]='1' //To make sure it is captured by this case and discarded
}
}else{
break;
}
}
let dateT = temp[0];
dateT = dateT.split('/')
date=''
// Splits and formats date since there it is too inconsistent to use substring
if(dateT[0].length==1){
date += '0'+dateT[0];
}else{date+= dateT[0];}
if(dateT[1].length==1){
date += '/0'+dateT[1];
}else{date+= '/'+dateT[1];}
dateT = dateT[2].split(':');
date += '/'+dateT[0].substring(0,4) + " " + dateT[0].substring(4) + ':' + dateT[1] + ':' + dateT[2].substring(0,2);
let convDate = new Date(date);
// Checks to see if this entry was already parsed in the last job/ newer than last parse
if(convDate < lastParse && check){
// console.log("Entry at "+date+". Already ran at "+ new Date(lastParse).toLocaleString() +". Breaking...")
break;
}
let address = dateT[2].substring(2);
let cross1 = ''
let cross2 = ''
let longFlag = false;
// Makes sure address field is not empty
if(address!=''){
// Fixes incorrect address formatting from pdf
for(let l=0;l<addAbb.length;l++){
abb = addAbb[l];
if(temp[1].startsWith(abb)){
address+=" "+abb;
temp[1]=temp[1].substring(abb.length);
break;
}
// If none of the abbreviations match, it is a 3 word+ long address. Need to check
if(l==addAbb.length-1){
for(let g =0;g<addAbb.length;g++){
abb=addAbb[g];
if(temp[2].startsWith(abb)){
address+= " "+temp[1] + " " +abb;
temp[2]=temp[2].substring(abb.length);
longFlag=true;
// console.log("LONG ADDRESS: ",address)
break;
}
}
}
}
}
// Runs through each value until finds the @ to set both cross streets
for(let k=0;k<temp.length;k++){
if(temp[k] === '@'){
// Sets second cross street
// Goes through each value after @ until the end
for(let j=1;j<temp.length-k;j++){
let t='';
if(j>1){t=' '}
cross2 +=t+temp[k+j];
}
// Sets first cross street
// Adds values before @
// Uses new variable to avoid adding extra space
let jStart=1;
//If the address is 3 words long, needs to start one later
if(longFlag){jStart=2}
for(let j=jStart;j<k;j++){
let t='';
if(j>jStart){t=' '}
cross1+=t+temp[j];
}
}
}
// Makes sure to use a query address that will give the most accurate results
let queryAddress = '';
// If address is empty, use cross streets intersection as this happens in the data sometimes
if(address=='' && (cross2!='' && cross2!='DEAD END') && (cross1!='' && cross1!='DEAD END')){
queryAddress = cross1 + " & " + cross2 + " Knox, TN";
}
// Use cross street 2 as intersection point, since is it least likely to be a dead end
else if(cross2 != '' && cross2 != 'DEAD END' && address!=''){
queryAddress = address+ ' & ' + cross2 + " Knox, TN";
}else if(cross1 != '' && cross1!='DEAD END'&&address!=''){ //If cross street 2 is a dead end, use cross street 1
queryAddress = address+ ' & ' + cross1 + " Knox, TN";
}else if(address != ''){ //If both cross streets are dead ends, use just address
queryAddress = address + " Knox, TN";
}else{ //If there is no address for some reason, skip row
break;
}
// console.log("Querying: ",queryAddress, currentIncidentType)
let insert = `INSERT INTO knox_zones (id, date,zone, class, incident, address, cross_street1, cross_street2, formatted_address, latitude, longitude, type, source, date_added) VALUES ($1, $2, $3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13, $14)`;
if(zoneFlag){return "Zone flag is true.";}
try{
let coords = await getCoordsFromAddress(queryAddress, zoneFlag, {zone:zone, classNum:classNum, currentIncidentType: currentIncidentType, queryAddress:queryAddress, cross1:cross1, cross2:cross2, date:date}).catch((e)=>{
console.error('Error geocoding in zones:',e);
zoneFlag=true;
});
// getCoordsFromAddress(queryAddress, zoneFlag, {zone:zone, classNum:classNum, currentIncidentType: currentIncidentType, queryAddress:queryAddress, cross1:cross1, cross2:cross2, date:date}).then((coords)=>{
if('error_message' in coords.dat){
return new error("Error getting coords for zones: ",coords.dat.error_message);
}
let formatAdd = coords.dat.formatted_address;
let lat = coords.dat.geometry.location.lat;
let lon = coords.dat.geometry.location.lng;
let type = coords.dat.types[0];
let finalDate = new Date(coords.passed.date);
finalDate=parseInt(finalDate.getTime().toString().substring(0,10));
let params = [finalDate, coords.passed.zone, coords.passed.classNum, coords.passed.currentIncidentType, coords.passed.queryAddress, coords.passed.cross1, coords.passed.cross2, formatAdd, lat, lon, type, 2, new Date().toISOString()];
classCount[coords.passed.classNum]++;
classCount.total++;
// dbf.incrementClassCount(coords.passed.classNum);
let ent = `INSERT INTO entity(id, owned_by, likes, comments, obj) VALUES(DEFAULT, 'sherlock', 0, 0, 'knox_zones') RETURNING id;`;
pool.query(ent, function(error, result){
if(error){
zoneFlag=true;
console.error("[-]Error creating entity: ",error)
return;
}
params.splice(0, 0, result.rows[0]["id"]);
pool.query(insert, params, function(error){
if(error){
zoneFlag=true;
console.error("[-]Error inserting row: ", params);
}
})
})
// }).catch((err)=>{
// console.error("[-]Error getting coords for zones: ",err)
// zoneFlag=true;
// return;
// })
}catch(err){
console.error("[-]Error parsing zones: ",err)
zoneFlag=true;
return;
}
break;
// If current data is in skip array
case skip.includes(data[i]):
currentIncidentType='SKIP'
break;
// Checks if current data is in any classes
case class0.includes(data[i]):
classNum=0;
currentIncidentType=data[i]
break;
case class1.includes(data[i]):
classNum = 1;
currentIncidentType=data[i]
break;
case class2.includes(data[i]):
classNum = 2;
currentIncidentType=data[i]
break;
case class3.includes(data[i]):
classNum = 3;
currentIncidentType=data[i]
break;
case class4.includes(data[i]):
classNum = 4;
currentIncidentType=data[i]
break;
case class5.includes(data[i]):
classNum = 5;
currentIncidentType=data[i]
break;
// Class 6, for unaccounted data
default:
// Makes sure to skip any data that is not an incident that somehow got split from date
if(data[i].includes('@')){
break;
}
console.log("[#]Unknown class in "+zone+": ",data[i])
console.log("[#]BEFORE: ", data[i-1])
classNum = 6;
currentIncidentType = data[i];
break;
}
}
console.log("[#]Inserted into knox zones: ",classCount)
//Updates counts after parsing to avoid deadlocks
dbf.incrementClassCount(0, classCount[0]);
dbf.incrementClassCount(1, classCount[1]);
dbf.incrementClassCount(2, classCount[2]);
dbf.incrementClassCount(3, classCount[3]);
dbf.incrementClassCount(4, classCount[4]);
dbf.incrementClassCount(5, classCount[5]);
dbf.incrementClassCount(6, classCount[6]);
let zoneCount = `UPDATE counts SET count = count + $1 WHERE "for"=$2;`;
pool.query(zoneCount, [classCount.total, 'knox_zones'], function(error){
if(error){
zoneFlag=true;
console.error("[-]Error updating zone count: ",error)
}
})
// After parsing, moves file to parsed category.
// May change to just delete file after as there isn't much reason to keep it outside of a dev environment
fs.rename(path, './crimedata/knox_sher/parsed/zone'+zone+'-'+Date.now()+'.pdf', function(err){
if(err){
if(err.code==='ENOENT'){
// Directory does not exist, create it
fs.mkdirSync('./crimedata/knox_sher/parsed', {recursive:true});
}else{
//Error renaming and moving file
console.error("[-]Error moving zone files: ",err);
}
}
})
console.log("[+]Finished parsing zone: ",zone)
}).catch((err)=>{
console.log("[-]Error extracting pdf text for ",err)
})
}
// Asynchronously extracts text and filters it for more parsing
async function extractPdf(path){
console.log("EXTRACTING FROM: ",path)
const pdf = require('pdf-parse')
const dataBuffer = fs.readFileSync(path)
const data = await pdf(dataBuffer)
let filler = ['30 Days Activity', 'Zone 100 ', 'Zone 101 ', 'Zone 102 ', 'Zone 104 ', 'Zone 200 ', 'Zone 201 ', 'Zone 202 ', 'Zone 300 ', 'Zone 302 ', 'Zone 400 ', 'Zone 401 ', 'Zone 402 ', 'Zone 403 ', 'Zone 404 ', ]
let temp = data.text.split('\n');
let final=[];
for(let i=3;i<temp.length;i++){
if(filler.includes(temp[i]) || temp[i].length<3){
continue;
}
final.push(temp[i]);
}
return final;
}
function parseClery(runDate){
let lastParse = runDate.setDate(runDate.getDate() - 1);
pdfTable(path.join(__dirname,"clery.pdf"), function(result){
// Goes through each row of the pdf table
let edge = [
"Unknown Residence Hall",
"Geier and Robinson Halls",
"Unknown date and time",
"(unknown time)",
"On Campus Locations",
];
let blacklisted = [
"Liquor Law Violation",
"Disturbing Containers",
"Criminal Trespass",
"Illegal Camping",
"Driving While License Cancelled, Suspended or Revoked",
"Public Intoxication",
"No Proof of Insurance",
"Misuse of Registration",
"License Required",
"Expired Registration",
"Lights Required on Motor Vehicles",
"False Pretense/Swindle/Confidence Game",
"Registration Required before Operation",
"Fail to Surrender Auto License or Registration Upon Revocation",
"Driving on the Right",
"Disobeying Traffic Signal",
"Confiscation",
"Obstructing Highway or Other Passageway",
"Failure to Maintain Lane",
"Pedestrians on Roadways",
"Criminal Littering - More Than 5lbs - Less than 10lbs",
"Criminal Littering",
"Driving on Wrong Side of the Road",
"Stop/Stand/Park Prohibited Specific Places",
"Driving or Moving Unregistered Vehicle Upon Highway",
"Improper Turn",
"Evading Arrest",
"Following Too Closely",
"Improper Display of Tag",
"Underage Consumption",
"Improper U-Turn",
"Disorderly Conduct",
"Disregarding Stop Sign",
"Violation of Implied Consent Law",
"Stop, Stand, Park Prohibited Specific Places",
"Improper Registration",
"911 Calls in Nonemergency Situations",
"Pedestrians In The Roadway",
"Seatbelt Violation",
"Headlights on Motor Vehicles - Operation During Inclement Weather",
"Aggravated Criminal Trespass",
"Prohibited Purchase of Tobaco Products by Minors",
]
let class0=[
"Warrant Served",
"Drug Law Violation",
"Panhandling",
"Shoplifting",
"Arrest On Outstanding Warrant",
"Unlawful Drug Paraphernalia",
"Crossing at Other Than Crosswalks",
"Vandalism",
"Fraud",
]
let class1=[
"Intimidation",
"Driving Under the Influence",
"Driving Under the Influence 2nd Offense",
"Driving Under the Influence 3rd Offense",
"All Other Larceny",
"Harassment",
"Theft of Bicycle",
"Theft Under $500",
"Theft Under $1000",
"Possession with Intent to Manufacture, Deliver, Sell",
"Unlawful Carrying or Possession of a Weapon",
"Simple Possession/Casual Exchange",
"Domestic Violence",
"Simple Possession",
"Possession of a Handgun While Under the Influence",
"Sale or Possession of Methamphetamines",
"Schedule IV Drug Violations",
"Manufacturing/Delivering/Selling/Possession of Controlled Substance",
"Open Container",
]
let class2=[
"Theft",
"Simple Assault",
"Theft of Vehicle Parts/Accessories",
"Theft from Yards",
"Theft from Motor Vehicle",
"Motor Vehicle Theft",
"Theft From Buildings",
"Stalking",
"Theft $10,000-$60,000",
"Indecent Exposure",
"Vehicle Burglary",
"Theft of Property",
]
let class3=[
"Aggravated Assault",
"Fondling",
"Burglary",
"Arson",
]
let class4=[
"Sexual Assault",
"Sexual Assault with an Object",
"Robbery",
"Kidnapping",
"Firearm Used in Dangerous Felony",
]
let class5=[
"Murder",
]
for(let i =0;i<result.pageTables.length;i++){
for(let j=0;j<result.pageTables[i].tables.length;j++){
// Skips starting row of table description
if(i==0&&j==0){continue;}
let row = result.pageTables[i].tables[j];
let incident = row[0];
let reported = row[1];
let occurred = row[2];
let incidentNumber = row[3];
let location = row[4];
let misc = row[5];
let disposition = row[6];
let update= row[7];
let incidentClass = "6";
let tempIncident;
if(incident.includes(';')){
tempIncident = incident.split(';')
}else{
tempIncident = incident.split(',')
}
for(let k=0;k<tempIncident.length;k++){
}
// Classifies incident
if (class5.includes(incident)){incidentClass="5"}
else if(class4.includes(incident)){incidentClass="4"}
else if(class3.includes(incident)){incidentClass="3"}
else if(class2.includes(incident)){incidentClass="2"}
else if(class1.includes(incident)){incidentClass="1"}
else if(class0.includes(incident)){incidentClass="0"}
else if(!blacklisted.includes(incident)){incidentClass="6"}
else{continue;}//Moves to next incident if this one is blacklisted
// Formats reported date
// Will be using this for searching as occurred is inconsistent
reported = reported.split(" ");
reported = reported[0] + reported[1].substring(0,1) + ":"+reported[1].substring(1);
reported = new Date(reported);
}
}
console.log("[#]",result.pageTables[0].tables)
})
}
module.exports = { parseKBDCIP, parseZones, parseClery};