from flask import Flask, render_template, jsonify, request from flask_cors import CORS, cross_origin # to avoid cross domain errors import requests from urllib.request import urlopen as uReq from bs4 import BeautifulSoup as bs # import pymongo # Initialize the flask app app = Flask(__name__) # CORS(app) # Display home page @app.route('/', methods=['GET']) # @cross_origin # to avoid cross domain errors def homepage(): return render_template("index.html") # Scrape and show results @app.route('/reviews', methods=['POST']) def index(): if request.method == "POST": searchRes = request.form['content'] # get search string searchStr = searchRes.replace(' ','') # remove spaces try: # # Create a DB connection to MongoDB # dbConn = pymongo.MongoClient("mongodb://localhost:27017/") # # Create/Connect to a DB: crawlerDB # db = dbConn['crawlerDB'] # # Search for a collection in DB with the name of search string # reviews = db[searchStr].find({}) # # if collection exists and it has records, # if reviews.count() > 0: # # return the results as html with reviews in it # return render_template('results.html', reviews=reviews) # # else: # Create product search url srchURL = "https://www.flipkart.com/search?q=" + searchStr # Request the search results web page uClient = uReq(srchURL) # Read the page srchPage = uClient.read() # Close the connection uClient.close() # Parse the page to HTML using beautiful soup srch_html = bs(srchPage, 'html.parser') # Find the div class tag for all products on the page prod_boxes = srch_html.find_all('div', {"class" : "_1AtVbE col-12-12"}) # Ignore the first 2-3 products del prod_boxes[0:3] # Select a product for demo prod = prod_boxes[0] # Extract the product link prodURL = "https://www.flipkart.com" + prod.div.div.div.a['href'] # Extract full product name prodName = srch_html.find_all('div', {"class" : "_4rR01T"})[0].text # Open the product page prodRes = requests.get(prodURL) # Parse product page as HTML prod_html = bs(prodRes.text, "html.parser") # Find sections (class tag) with customer reviews comments = prod_html.find_all('div', {'class':'_16PBlm'}) # Create a collection (table) in the DB with name of search string # table = db[searchStr] # create an empty list of reviews reviews = [] # Iterate over each review and for each review for comment in comments: try: # Find class tag containing name name = comment.div.div.find_all('p', {'class':'_2sc7ZR _2V5EHH'})[0].text except: name = "Not available" try: # Find tag with rating rating = comment.div.div.div.div.text except: rating = "Not available" try: # Find tag with Headline commentHead = comment.div.div.div.p.text except: commentHead = "Not Available" try: # Find tag with comment text commTag = comment.div.div.find_all('div', {'class':''}) custComm = commTag[0].div.text except: custComm = "Not Available" # Save the details in a dict reviewDict = {'Product Name':prodName, 'Link':prodURL, # "Product":searchRes, "Buyer Name":name, "Rating":rating, "CommentHead":commentHead, "Comment":custComm} # # Add record to the DB collection # table.insert_one(reviewDict) # Append record to reviews list reviews.append(reviewDict) # Return results by rendering result HTML return render_template('results.html', reviews=reviews[0:(len(reviews)-1)]) except: # Return something is wrong error = "Oops... something went wrong. " \ "Please search for a different product or try one of the keywords mentioned above." print(error) return render_template('index.html', error=error) # # else: # return render_template('index.html') if __name__ == "__main__": app.run(debug=True) # App Link on Heroku: https://protected-island-35343.herokuapp.com/