""" Functions allowing us to interact with and gather data from the steam servers. This data is then saved to a db (steam.db) in the project root folder. Attributes: STEAM_REVIEW_SORT_FILTERS (lst(str)): The different steam api sort modes. """ import datetime import logging import json import urllib import re import os import time import sys import common import db_common import review_model STEAM_REVIEW_SORT_FILTERS = ( "recent", "updated", "all" ) class ConnectionError(Exception): """Error indicating we were unable to communicate with steam servers.""" pass def parse_reviews_for_app(appid): """ Gathers game info and reviews for a given appid. Args: appid (int): App/Game id. Returns: int: Number of requested reviews. """ appinfo = get_steam_game_info(appid) app_name = appinfo["name"] logging.info("Retrieving and parsing reviews for '{0}' ({1}) {2}...".format( app_name, appid, STEAM_REVIEW_SORT_FILTERS[0])) db_common.insert_or_update_app(appid, app_name) languages = common.get_settings().get_tracked_languages() num_added_reviews = review_parse_loop(appid, languages, STEAM_REVIEW_SORT_FILTERS[0]) db_common.insert_or_update_languages(languages) logging.info("---------------------------") logging.info("Added in total {} reviews".format(num_added_reviews)) logging.info("---------------------------") return num_added_reviews def get_steam_game_info(appid): """ Query Steam api for game data. Args: appid (int): App/Game id. Returns: dict: Parsed json game data. """ url = "http://store.steampowered.com/api/appdetails?appids={}".format(appid) response = urllib.urlopen(url) response_code = response.getcode() response_content = response.read() if response_code == 200: data = json.loads(response_content) if str(appid) in data.keys(): game_data = data.get(str(appid)).get("data") if not game_data: raise ValueError("Provided appid ({}) is not a valid steam id.".format(appid)) return game_data else: raise ConnectionError( "Could not contact steam api. Response code is {}".format(response_code)) def review_parse_loop(appid, languages, sort_by): """ Main parse review loop. Queries the Steam api for new reviews then stores/updates the sqlite db with the new data. Args: appid (int): App/Game id. languages (lst(Language)): List of language objects. sort_by (str): Steam review sort filter, ex 'all'. Returns: int: Number of gathered reviews. """ current_cursor = "*" seen_cursors = [] language_keys = [lang.steam_key for lang in languages] total_reviews = "Unknown" num_added = 0 percent = 0 updated_time = int(time.time()) while True: reviews, current_cursor, t = get_reviews_from_api( appid, language_keys, 100, sort_by, current_cursor) num_added = num_added + len(reviews) if t is not None: total_reviews = t if total_reviews > 0: percent = round((float(num_added) / float(total_reviews)) * 100) db_common.insert_or_update_reviews( reviews, updated_time, include_user_input_columns=False ) if num_added % 1000 == 0: if os.getenv("scraper_show_progressbar", "0") == "1": sys.stdout.write("\n") logging.info("{0}%: {1}/{2} reviews saved to db".format( percent, num_added, total_reviews) ) if os.getenv("scraper_show_progressbar", "0") == "1": sys.stdout.write("\r %d%% [%-100s] %d/%d reviews saved to db" % ( percent, "="*int(percent), num_added, total_reviews) ) sys.stdout.flush() if current_cursor in seen_cursors: logging.info("breaking on seen cursor {}. No more reviews to add".format(current_cursor)) break if current_cursor != "*": seen_cursors.append(current_cursor) return num_added def get_reviews_from_api( steam_appid, languages=[], num_per_page=20, filter=STEAM_REVIEW_SORT_FILTERS[-1], cursor="*"): """Query the Steam api for reviews. Args: steam_appid (int): The game/app id. languages (lst(str)): Languages in format ex 'english'. num_per_page (int): Page count. Will be the number of returned reviews. Max 100 as per Steam limits. filter (str): Filter, ex 'all'. cursor (str): Current cursor. New cursor is returned after every request. Returns: (lst(SteamReview), str, int): Requested reviews, new cursor and total review count. """ delta = datetime.datetime.now().date() - datetime.date(1993, 1, 1) options = { "json":"1", "cursor": cursor, "language":"all" if len(languages) == 0 else ",".join(languages), "filter":filter, "review_type":"all", "purchase_type":"all", "num_per_page":num_per_page, "day_range": delta.days } reviews = [] url = "http://store.steampowered.com/appreviews/{0}?json=1&{1}".format( steam_appid, urllib.urlencode(options)) response = urllib.urlopen(url) response_code = response.getcode() response_content = response.read() response_cursor = None total_reviews = None if response_code == 200: response_data = json.loads(response_content) reviews_data = response_data["reviews"] if "query_summary" in response_data: total_reviews = response_data["query_summary"].get("total_reviews", None) if "cursor" in response_data: response_cursor = response_data["cursor"] for review in reviews_data: if not review: continue review_id = review["recommendationid"] if languages and review["language"] not in languages: logging.info("Skipping review {0}, {1} not in language list".format( review_id, review["language"])) continue output = construct_steamreview(steam_appid, review) reviews.append(output) else: raise ConnectionError("Could not contact steam api. Response code is {}".format(response_code)) return (reviews, response_cursor, total_reviews) def construct_steamreview(steam_appid, review): """ Construct and return a new SteamReview. Args: steam_appid (int): App/Game id. review (dict): Parsed json review object. Returns: SteamReview: New steam review object. """ recommendation_id = review["recommendationid"] recommended = review["voted_up"] hours_played = review["author"]["playtime_forever"] helpful_amount = review["votes_up"] helpful_total = review["votes_up"] + review["votes_funny"] games_owned = review["author"]["num_games_owned"] early_access_review = review["written_during_early_access"] lang_key = review["language"] received_compensation = review["received_for_free"] review_url = "https://steamcommunity.com/profiles/{}/recommended/{}".format( review["author"]["steamid"], steam_appid ) user_name = review["author"]["steamid"] user_link = "http://steamcommunity.com/profiles/{}".format( review["author"]["steamid"] ) date_posted = datetime.datetime.fromtimestamp( review.get("timestamp_created", 0) ) date_updated = datetime.datetime.fromtimestamp( review.get("timestamp_updated", 0) ) content = review.get("review", "") responded_by = review.get("developer_response", None) responded_date = None responded_date_str = review.get("timestamp_dev_responded", None) if responded_date_str is not None: responded_date = datetime.datetime.fromtimestamp( review.get("timestamp_dev_responded", None) ) return review_model.SteamReview( recommendation_id, review_url, steam_appid, recommended=recommended, hours_played=hours_played, helpful_amount=helpful_amount, helpful_total=helpful_total, games_owned=games_owned, early_access_review=early_access_review, lang_key=lang_key, received_compensation=received_compensation, user_name=user_name, user_link=user_link, date_posted=date_posted, date_updated=date_updated, content=content, responded_by=responded_by, responded_date=responded_date ) def remove_deleted_reviews(steam_appid, compare_time): """ Remove reviews not updated after last sync. Args: steam_appid (int): App/Game id. compare_time (int): Epoch time of before last update. """ languages = common.get_settings().get_tracked_languages() language_keys = [lang.steam_key for lang in languages] logging.info("Checking for deleted reviews (for {0}). Languages: {1}".format( steam_appid, ",".join(language_keys))) num_deleted = db_common.delete_all_unchanged_reviews( steam_appid, language_keys, compare_time ) logging.info("Deleted {} reviews".format(num_deleted))