Maps / data / __init__.py
__init__.py
Raw
import collections
import os

from abstractions import *
import data.jsonl

DATA_DIRECTORY = 'data'
USER_DIRECTORY = 'users'

def load_data(user_dataset, review_dataset, restaurant_dataset):
    with open(os.path.join(DATA_DIRECTORY, user_dataset)) as f:
        user_data = jsonl.load(f)
    with open(os.path.join(DATA_DIRECTORY, review_dataset)) as f:
        review_data = jsonl.load(f)
    with open(os.path.join(DATA_DIRECTORY, restaurant_dataset)) as f:
        restaurant_data = jsonl.load(f)

    # Load users.
    userid_to_user = {}
    for user in user_data:
        name = user['name']
        _user_id = user['user_id']
        user = make_user(name, []) # MISSING: reviews

        userid_to_user[_user_id] = user

    # Load restaurants.
    busid_to_restaurant = {}
    for restaurant in restaurant_data:
        name = restaurant['name']
        location = float(restaurant['latitude']), float(restaurant['longitude'])
        categories = restaurant['categories']
        price = restaurant['price']
        if price is not None:
            price = int(price)
        num_reviews = int(restaurant['review_count'])

        _business_id = restaurant['business_id']
        restaurant = make_restaurant(name, location, categories, price, []) # MISSING: reviews
        busid_to_restaurant[_business_id] = restaurant

    # Load reviews.
    reviews = []
    busid_to_reviews = collections.defaultdict(list)
    userid_to_reviews = collections.defaultdict(list)
    for review in review_data:
        _user_id = review['user_id']
        _business_id = review['business_id']

        restaurant = restaurant_name(busid_to_restaurant[_business_id])
        rating = float(review['stars'])

        review = make_review(restaurant, rating)
        reviews.append(review)
        busid_to_reviews[_business_id].append(review)
        userid_to_reviews[_user_id].append(review)
    # Reviews done.

    restaurants = {}
    for busid, restaurant in busid_to_restaurant.items():
        name = restaurant_name(restaurant)
        location = list(restaurant_location(restaurant))
        categories = restaurant_categories(restaurant)
        price = restaurant_price(restaurant)
        restaurant_reviews = busid_to_reviews[busid]

        restaurant = make_restaurant(name, location, categories, price, restaurant_reviews)
        restaurants[name] = restaurant
    # Restaurants done.

    users = []
    for userid, user in userid_to_user.items():
        name = user_name(user)
        user_reviews = userid_to_reviews[userid]

        user = make_user(name, user_reviews)
        users.append(user)
    # Users done.

    return users, reviews, list(restaurants.values())

USERS, REVIEWS, ALL_RESTAURANTS = load_data('users.json', 'reviews.json', 'restaurants.json')
CATEGORIES = {c for r in ALL_RESTAURANTS for c in restaurant_categories(r)}

def load_user_file(user_file):
    with open(os.path.join(USER_DIRECTORY, user_file)) as f:
        return eval(f.read())

import glob
USER_FILES = [f[6:-4] for f in glob.glob('users/*.dat')]