"""
Functions allowing us to interact with and gather
data from the steam servers. This data is then saved
to a db (steam.db) in the project root folder.
Attributes:
STEAM_REVIEW_SORT_FILTERS (lst(str)): The different steam api sort modes.
"""
import datetime
import logging
import json
import urllib
import re
import os
import time
import sys
import common
import db_common
import review_model
STEAM_REVIEW_SORT_FILTERS = (
"recent",
"updated",
"all"
)
class ConnectionError(Exception):
"""Error indicating we were unable to communicate with steam servers."""
pass
def parse_reviews_for_app(appid):
"""
Gathers game info and reviews for a given appid.
Args:
appid (int): App/Game id.
Returns:
int: Number of requested reviews.
"""
appinfo = get_steam_game_info(appid)
app_name = appinfo["name"]
logging.info("Retrieving and parsing reviews for '{0}' ({1}) {2}...".format(
app_name, appid, STEAM_REVIEW_SORT_FILTERS[0]))
db_common.insert_or_update_app(appid, app_name)
languages = common.get_settings().get_tracked_languages()
num_added_reviews = review_parse_loop(appid, languages, STEAM_REVIEW_SORT_FILTERS[0])
db_common.insert_or_update_languages(languages)
logging.info("---------------------------")
logging.info("Added in total {} reviews".format(num_added_reviews))
logging.info("---------------------------")
return num_added_reviews
def get_steam_game_info(appid):
""" Query Steam api for game data.
Args:
appid (int): App/Game id.
Returns:
dict: Parsed json game data.
"""
url = "http://store.steampowered.com/api/appdetails?appids={}".format(appid)
response = urllib.urlopen(url)
response_code = response.getcode()
response_content = response.read()
if response_code == 200:
data = json.loads(response_content)
if str(appid) in data.keys():
game_data = data.get(str(appid)).get("data")
if not game_data:
raise ValueError("Provided appid ({}) is not a valid steam id.".format(appid))
return game_data
else:
raise ConnectionError(
"Could not contact steam api. Response code is {}".format(response_code))
def review_parse_loop(appid, languages, sort_by):
""" Main parse review loop.
Queries the Steam api for new reviews then stores/updates
the sqlite db with the new data.
Args:
appid (int): App/Game id.
languages (lst(Language)): List of language objects.
sort_by (str): Steam review sort filter, ex 'all'.
Returns:
int: Number of gathered reviews.
"""
current_cursor = "*"
seen_cursors = []
language_keys = [lang.steam_key for lang in languages]
total_reviews = "Unknown"
num_added = 0
percent = 0
updated_time = int(time.time())
while True:
reviews, current_cursor, t = get_reviews_from_api(
appid, language_keys, 100, sort_by, current_cursor)
num_added = num_added + len(reviews)
if t is not None:
total_reviews = t
if total_reviews > 0:
percent = round((float(num_added) / float(total_reviews)) * 100)
db_common.insert_or_update_reviews(
reviews,
updated_time,
include_user_input_columns=False
)
if num_added % 1000 == 0:
if os.getenv("scraper_show_progressbar", "0") == "1":
sys.stdout.write("\n")
logging.info("{0}%: {1}/{2} reviews saved to db".format(
percent, num_added, total_reviews)
)
if os.getenv("scraper_show_progressbar", "0") == "1":
sys.stdout.write("\r %d%% [%-100s] %d/%d reviews saved to db" % (
percent, "="*int(percent), num_added, total_reviews)
)
sys.stdout.flush()
if current_cursor in seen_cursors:
logging.info("breaking on seen cursor {}. No more reviews to add".format(current_cursor))
break
if current_cursor != "*":
seen_cursors.append(current_cursor)
return num_added
def get_reviews_from_api(
steam_appid,
languages=[],
num_per_page=20,
filter=STEAM_REVIEW_SORT_FILTERS[-1],
cursor="*"):
"""Query the Steam api for reviews.
Args:
steam_appid (int): The game/app id.
languages (lst(str)): Languages in format ex 'english'.
num_per_page (int): Page count. Will be the number of returned reviews. Max 100 as per Steam limits.
filter (str): Filter, ex 'all'.
cursor (str): Current cursor. New cursor is returned after every request.
Returns:
(lst(SteamReview), str, int): Requested reviews, new cursor and total review count.
"""
delta = datetime.datetime.now().date() - datetime.date(1993, 1, 1)
options = {
"json":"1",
"cursor": cursor,
"language":"all" if len(languages) == 0 else ",".join(languages),
"filter":filter,
"review_type":"all",
"purchase_type":"all",
"num_per_page":num_per_page,
"day_range": delta.days
}
reviews = []
url = "http://store.steampowered.com/appreviews/{0}?json=1&{1}".format(
steam_appid, urllib.urlencode(options))
response = urllib.urlopen(url)
response_code = response.getcode()
response_content = response.read()
response_cursor = None
total_reviews = None
if response_code == 200:
response_data = json.loads(response_content)
reviews_data = response_data["reviews"]
if "query_summary" in response_data:
total_reviews = response_data["query_summary"].get("total_reviews", None)
if "cursor" in response_data:
response_cursor = response_data["cursor"]
for review in reviews_data:
if not review:
continue
review_id = review["recommendationid"]
if languages and review["language"] not in languages:
logging.info("Skipping review {0}, {1} not in language list".format(
review_id, review["language"]))
continue
output = construct_steamreview(steam_appid, review)
reviews.append(output)
else:
raise ConnectionError("Could not contact steam api. Response code is {}".format(response_code))
return (reviews, response_cursor, total_reviews)
def construct_steamreview(steam_appid, review):
""" Construct and return a new SteamReview.
Args:
steam_appid (int): App/Game id.
review (dict): Parsed json review object.
Returns:
SteamReview: New steam review object.
"""
recommendation_id = review["recommendationid"]
recommended = review["voted_up"]
hours_played = review["author"]["playtime_forever"]
helpful_amount = review["votes_up"]
helpful_total = review["votes_up"] + review["votes_funny"]
games_owned = review["author"]["num_games_owned"]
early_access_review = review["written_during_early_access"]
lang_key = review["language"]
received_compensation = review["received_for_free"]
review_url = "https://steamcommunity.com/profiles/{}/recommended/{}".format(
review["author"]["steamid"], steam_appid
)
user_name = review["author"]["steamid"]
user_link = "http://steamcommunity.com/profiles/{}".format(
review["author"]["steamid"]
)
date_posted = datetime.datetime.fromtimestamp(
review.get("timestamp_created", 0)
)
date_updated = datetime.datetime.fromtimestamp(
review.get("timestamp_updated", 0)
)
content = review.get("review", "")
responded_by = review.get("developer_response", None)
responded_date = None
responded_date_str = review.get("timestamp_dev_responded", None)
if responded_date_str is not None:
responded_date = datetime.datetime.fromtimestamp(
review.get("timestamp_dev_responded", None)
)
return review_model.SteamReview(
recommendation_id,
review_url,
steam_appid,
recommended=recommended,
hours_played=hours_played,
helpful_amount=helpful_amount,
helpful_total=helpful_total,
games_owned=games_owned,
early_access_review=early_access_review,
lang_key=lang_key,
received_compensation=received_compensation,
user_name=user_name,
user_link=user_link,
date_posted=date_posted,
date_updated=date_updated,
content=content,
responded_by=responded_by,
responded_date=responded_date
)
def remove_deleted_reviews(steam_appid, compare_time):
"""
Remove reviews not updated after last sync.
Args:
steam_appid (int): App/Game id.
compare_time (int): Epoch time of before last update.
"""
languages = common.get_settings().get_tracked_languages()
language_keys = [lang.steam_key for lang in languages]
logging.info("Checking for deleted reviews (for {0}). Languages: {1}".format(
steam_appid, ",".join(language_keys)))
num_deleted = db_common.delete_all_unchanged_reviews(
steam_appid,
language_keys,
compare_time
)
logging.info("Deleted {} reviews".format(num_deleted))