ValueNet4SPARQL / src / tools / orphan_tables_check.py
orphan_tables_check.py
Raw
import json
data= 'data/spider/dev.json'
table= 'spider/tables.json'
d= open(data)
t=open(table)
queries= json.load(d)
schema=json.load(t)

# function to add to JSON

#value_list=[]

#db_list=['flight_2', 'wta_1']
#db_list=['concert_singer', 'pets_1', 'car_1', 'flight_2', 'cre_Doc_Template_Mgt', 'course_teach', 'world_1', 'network_1', 'dog_kennels', 'battle_death',
#         'employee_hire_evaluation', 'museum_visit','orchestra','poker_player','real_estate_properties', 'singer', 'student_transcripts_tracking', 'tvshow', 'wta_1']
i= 0
for d in queries:
    j = 0
    pk_fk_tables = []
    db_id_queries = queries[i]["db_id"]
    db_id_list = [d for d in schema if d['db_id'] in db_id_queries]
    for pk_fk in db_id_list[0]["foreign_keys"]:
        for idx, value in enumerate(db_id_list[0]['column_names']):
            if pk_fk[0] == idx or pk_fk[1] == idx:
                if value[0] not in  pk_fk_tables:
                    pk_fk_tables.append(value[0])
    if len(queries[i]['sql']["from"]["conds"]) != 0:
        pk= queries[i]['sql']['from']['conds'][0][2][1][1]
        fk= queries[i]['sql']["from"]["conds"][0][3][1]
        if [pk, fk] in db_id_list[0]["foreign_keys"] or [fk, pk] in db_id_list[0]["foreign_keys"]:
            i += 1
        elif (db_id_list[0]['column_names'][pk][0] in pk_fk_tables) and (db_id_list[0]['column_names'][fk][0] in pk_fk_tables):
            i += 1
        else:
            print(db_id_list[0]["db_id"], [fk, pk], d['query'])
            i += 1
    else:
        i += 1
    # if len(value_list) == 0 :
    #     value_list.append(q)
    # elif (q["query"] not in value_list[i]['query']) and (q['db_id'] in db_list):
    #     value_list.append(q)
    #     i += 1


#with open("data/spider/dev_noDuplicates.json", 'w') as new_file:
 #   json.dump(value_list, new_file, indent=4) #indent formats it into a nice cute json