Lancelot / test / ssb / converttbltocsv.py
converttbltocsv.py
Raw
#run python util.py ssb 16 transform

def convert_tbl_to_csv(filename, header):
    csv = open("".join([filename, ".csv"]), "w+")
    csv.write(header + "\n")
    tbl = open("".join([filename, ".tbl"]), "r")
    lines = tbl.readlines()
    for line in lines:
        length = len(line)
        line = line[:length - 2] + line[length-1:]
        line = line.replace(",","N")
        line = line.replace("|",",")
        csv.write(line)
    tbl.close()
    csv.close()

def convert_tblp_to_csv(filename, header):
    csv = open("".join([filename, ".csv"]), "w+")
    csv.write(header + "\n")
    tbl = open("".join([filename, ".tbl.p"]), "r")
    lines = tbl.readlines()
    for line in lines:
        length = len(line)
        line = line[:length - 2] + line[length-1:]
        line = line.replace(",","N")
        line = line.replace("|",",")
        csv.write(line)
    tbl.close()
    csv.close()


if __name__ == "__main__":
    for filename in ["lineorder", "date"]:
        if filename == "lineorder":
            header = "LO_ORDERKEY,LO_LINENUMBER,LO_CUSTKEY,LO_PARTKEY,LO_SUPPKEY,LO_ORDERDATE,LO_ORDPRIORITY,LO_SHIPPRIORITY,LO_QUANTITY,LO_EXTENDEDPRICE,LO_ORDTOTALPRICE,LO_DISCOUNT,LO_REVENUE,LO_SUPPLYCOST,LO_TAX,LO_COMMITDATE,LO_SHIPMODE"
        elif filename == "date":
            header = "D_DATEKEY,D_DATE,D_DAYOFWEEK,D_MONTH,D_YEAR,D_YEARMONTHNUM,D_YEARMONTH,D_DAYNUMINWEEK,D_DAYNUMINMONTH,D_DAYNUMINYEAR,D_MONTHNUMINYEAR,D_WEEKNUMINYEAR,D_SELLINGSEASON,D_LASTDAYINWEEKFL,D_LASTDAYINWEEKFL,D_HOLIDAYFL,D_HOLIDAYFL"
        convert_tbl_to_csv("data/s320/"+filename, header)

    for filename in ["customer", "part", "supplier"]:
        if filename == "customer":
            header = "C_CUSTKEY,C_NAME,C_ADDRESS,C_CITY,C_NATION,C_REGION,C_PHONE,C_MKTSEGMENT"
        elif filename == "part":
            header = "P_PARTKEY,P_NAME,P_MFGR,P_CATEGORY,P_BRAND1,P_COLOR,P_TYPE,P_SIZE,P_CONTAINER"
        elif filename == "supplier":
            header = "S_SUPPKEY,S_NAME,S_ADDRESS,S_CITY,S_NATION,S_REGION,S_PHONE"
        convert_tblp_to_csv("data/s320/"+filename, header)