Sherlock-backend / components / downloadPDF.js
downloadPDF.js
Raw

const https = require('https');
const fs = require('fs');
const path = require('path');
const os = require('os');
const homeDir = os.homedir();
const { parseKBDCIP, parseZones } = require('./parse');

process.env.PUPPETEER_EXECUTABLE_PATH="/usr/bin/chromium-browser"
const pup = require('puppeteer');

//Function to retry a download in case of failure
async function retry(cb, count){
    try{
        return await cb();
    }catch(e){
        if(count<=0){
            console.error("[-]Download failed after 3 attempts. Returning...");
            throw e;
        }
        // page.pdf({path: path.join(__dirname, 'PupError.pdf')});

        console.log("[-]Download failed with error: ", e);
        console.log("[~]Retrying download. "+count+" attempts remaining...");
        return await retry(cb, count-1);
    }
}

//General helper function to download a PDF file from a URL
async function downloadPdf(pdfUrl, filename, test=false) {
    const fn = path.join(__dirname,filename);
    
    return new Promise((resolve, reject)=>{
        https.get(pdfUrl, (response)=>{
            if(!test){
                console.log("[+]Downloading PDF from " + pdfUrl + " to " + fn + "...");
            }

            const file = fs.createWriteStream(fn);
            response.pipe(file);
            console.log("RESPONSE: ",response.statusCode, response.statusMessage)
            file.on('finish', ()=>{
                file.close();
                resolve("Downloaded PDF successfully.")

                if(test){
                    //If this was a test case, delete the downloaded file
                    fs.unlink(fn, (err)=>{
                        if(err){
                            console.error("[-]Error deleting test PDF: ", err);
                        }
                    })
                }
            })
        }).on('error', (err)=>{
            fs.unlink(filename, ()=>{
                reject("Error downloading PDF: ", err)
            });
        })
    })
}

//Download the KPD CIP PDF
async function downloadKPDCIP(fireDate, test=false, check=true){
    let url = "https://knoxville.sharepoint.com/:b:/s/KnoxvillePublicShare/ESbv9pzfdNdEkhttuTX4-cYBkp3Z9gm9IKxRF_4chfjBVg?e=d0c3DZ"

    // console.log("Puppet browser launched.");
    retry(async ()=>{
        
        const browser = await pup.launch({headless: true, args:['--no-sandbox', '--disable-gpu']});
        const page = await browser.newPage();
        await page.goto(url, {waitUntil: 'domcontentloaded', timeout:60000});
        // console.log("Page loaded.");

        // page.pdf({path: path.join(__dirname, 'PupSS.pdf')});

        await page.waitForSelector('button#downloadCommand');

        const downloadButton = await page.$('button#downloadCommand');
        if(downloadButton){
            await downloadButton.click();
            // console.log("Download Button clicked.");
        }else{
            console.error("[-]Download button not found.");
        }

        //Waits 3 seconds for download to complete
        await new Promise(resolve => setTimeout(resolve, 3000));
        await browser.close();
    }, 3).then(()=>{
        const dpath = path.join(__dirname, 'KPDCIP.pdf');

        try{
            // let p = '/home/githubActions/Downloads';
            let p = path.join(homeDir, 'Downloads');

            fs.renameSync(path.join(p, 'MediaRpt.pdf'), dpath);
        }catch(e){
            console.error("[-]Error renaming 'downloaded' CIP PDF: ", e);
            return -1;
        }
        console.log("[+]Downloaded PDF successfully.");
        if (test) {
            // //If this was a test case, delete the downloaded file
            // fs.unlink(path.join(__dirname, "KPDCIP.pdf"), (err) => {
            //     if (err) {
            //         console.error("[-]Error deleting test PDF: ", err);
            //     }   
            // });

            parseKBDCIP(fireDate, check, true)

            return;
        }
        parseKBDCIP(fireDate);
    }).catch(e=>{
        console.error("[-]KPD CIP download failed. Final error: ", e);
        return -1;
    })

}

//Download the KCSO Zone PDFs
async function downloadKCSOZones(fireDate, test=false){
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(100).pdf", "zone100.pdf", test);
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(101).pdf", "zone101.pdf", test);
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(102).pdf", "zone102.pdf", test);
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(104).pdf", "zone104.pdf", test);
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(200).pdf", "zone200.pdf", test);
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(201).pdf", "zone201.pdf", test);
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(202).pdf", "zone202.pdf", test);
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(300).pdf", "zone300.pdf", test);
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(302).pdf", "zone302.pdf", test);
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(400).pdf", "zone400.pdf", test);
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(401).pdf", "zone401.pdf", test);
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(402).pdf", "zone402.pdf", test);
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(403).pdf", "zone403.pdf", test);
    await downloadPdf("https://knoxsheriff.org/wp-content/zoneactivity/Zone%20History%20(404).pdf", "zone404.pdf", test);

    return new Promise((resolve, reject)=>{
        try{
            if(!test){
                parseZones(fireDate);
                resolve("Downloaded and parsed KCSO Zones successfully.")
            }else{
                resolve("Connection to KCSO Zones successful.")
            }
        }catch(e){
            reject(e);
        }
    })
}

//Download the UTPD Clery PDF
async function downloadClery(fireDate){
    await downloadPdf("https://safety.utk.edu/police/wp-content/uploads/sites/2/2021/03/60-Day-Log.pdf", "clery.pdf")
}

module.exports = { downloadKPDCIP, downloadKCSOZones, downloadClery, downloadPdf };