from partition_algorithm import PartitionAlgorithm
import logging
from colorlog import ColoredFormatter
handler = logging.StreamHandler()
handler.setLevel(logging.DEBUG)
formatter = ColoredFormatter(
"%(log_color)s%(levelname)-8s:%(name)s:%(message)s",
log_colors={
"DEBUG": "cyan",
"INFO": "green",
"WARNING": "yellow",
"ERROR": "red",
"CRITICAL": "bold_red",
},
)
handler.setFormatter(formatter)
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.handlers = []
logger.addHandler(handler)
def test_bounding_split_effect():
"""
Test the effect of bounding split when enabled and disabled
"""
benchmark = "tpch"
benchmark_dict = {
"tpch": [
"lineitem",
"orders",
"customer",
# "nation",
# "region",
# "part",
# "supplier",
# "partsupp",
],
"imdb": ["title", "movie_companies", "cast_info", "name"],
"tpcds": [
"store",
"item",
"household_demographics",
"customer",
],
}
trees = {}
pa = PartitionAlgorithm(benchmark=benchmark)
pa.load_join_query(join_indeuced="PAW")
cost_dict = dict()
table_list = set()
for join_query in pa.join_queries:
if join_query["join_relations"]:
for join_op in join_query["join_relations"]:
for join_table, join_col in join_op.items():
table_list.add(join_table)
for tablename in benchmark_dict[benchmark]:
pa.table_name = tablename
pa.load_data()
pa.load_query(join_indeuced="PAW")
for if_bounding_split in [True, False]:
pa.InitializeWithJT(
enable_bounding_split=if_bounding_split, enable_median_extend=False
)
bounding_flag = 1 if if_bounding_split else 0
cost_dict.setdefault(bounding_flag, {})
trees.setdefault(bounding_flag, {})
trees[bounding_flag][tablename] = pa.partition_tree
trees[bounding_flag][tablename].name = "PAC-Tree"
tree_depth = pa.evaluate_tree_depth(pa.partition_tree.pt_root, 0)
tot_cost = pa.evaluate_single_table_access_cost()
cost_dict[bounding_flag][tablename] = tot_cost
logging.info(
f"enable bounding: {if_bounding_split}, {cost_dict[bounding_flag][tablename]}, max_depth:{tree_depth}"
)
for bounding_flag in cost_dict.keys():
avg_cost = sum(cost_dict[bounding_flag].values()) / len(cost_dict[bounding_flag])
logger.info(
f"Average cost for bounding split {'enabled' if bounding_flag == 1 else 'disabled'}: {avg_cost}"
)
if __name__ == "__main__":
test_bounding_split_effect()
"""
bounding split test result:
_______imdb_______________
___table __enable __disable
title: 0.022 0.02495
movie_companies: 0.04585 0.069358
cast_info: 0.0083 0.0083
name: 0.0414 0.041
________tpch______________
___table __enable __disable
lineitem:0.313 0.313
orders: 0.0527 0.0687
customer:0.070 0.07009
_______tpcds_______________
store: 0.17105 0.17105
item: 0.1307 0.1307
household_demographics: 0.4026 0.4026
customer: 0.314 0.3148
"""