import numpy as np import torch import requests import time HOST = 'http://localhost:6725' def exec_analyze_query(sql: str, db: str, settings: dict): sql = "explain analyze json=1 " + sql settings['database'] = db time1 = time.time() r = requests.post(HOST, data=sql, params=settings) time2 = time.time() r.encoding = 'utf-8' resp = r.json() return resp['plan'], time2-time1 NodeTypes = [ "Projection", "MergingAggregated", "Exchange", "Aggregating", "Join", "Filter", "TableScan" ] RowsNorm = 1e7 CostNorm = 1e8 TimeNorm = 1e4 def plan_tree_to_vecs(plan): res = [] nodes= [plan] while len(nodes) > 0: node = nodes.pop() arr = [0. for _ in range(9)] stats = node['Statistic'] arr[len(NodeTypes)] = 0. if 'RowCount' not in stats else stats['RowCount'] / RowsNorm arr[len(NodeTypes) + 1] = 0. if 'Cost' not in stats else stats['Cost'] / CostNorm res.append(arr) if 'Children' in node: nodes = node['Children'] + nodes assert len(res) <= 100 if len(res) < 100: for _ in range(100-len(res)): res.append([0. for _ in range(9)]) return res def generate_plan_tree(plan): arr = [0. for _ in range(9)] # type one-hot + rows + cost if plan['NodeType'] in NodeTypes: arr[NodeTypes.index(plan['NodeType'])] = 1. stats = plan['Statistic'] arr[len(NodeTypes)] = 0. if 'RowCount' not in stats else stats['RowCount'] / RowsNorm arr[len(NodeTypes) + 1] = 0. if 'Cost' not in stats else stats['Cost'] / CostNorm items = [arr] if 'Children' in plan: for next in plan['Children']: items.append(generate_plan_tree(next)) while len(items) < 3: items.append([[0. for _ in range(9)]]) return items def generate_latency(plan): profiles = plan['Profiles'] arr = [0.] if 'WallTimeMs' not in profiles else [profiles['WallTimeMs'] / TimeNorm] items = [arr] if 'Children' in plan: for next in plan['Children']: items.append(generate_latency(next)) while len(items) < 3: items.append([[0.]]) return items class TreeConvolutionError(Exception): pass def _is_leaf(x, left_child, right_child): has_left = left_child(x) is not None has_right = right_child(x) is not None if has_left != has_right: raise TreeConvolutionError( "All nodes must have both a left and a right child or no children" ) return not has_left def _flatten(root, transformer, left_child, right_child): """ turns a tree into a flattened vector, preorder """ if not callable(transformer): raise TreeConvolutionError( "Transformer must be a function mapping a tree node to a vector" ) if not callable(left_child) or not callable(right_child): raise TreeConvolutionError( "left_child and right_child must be a function mapping a " + "tree node to its child, or None" ) accum = [] def recurse(x): if _is_leaf(x, left_child, right_child): accum.append(transformer(x)) return accum.append(transformer(x)) recurse(left_child(x)) recurse(right_child(x)) recurse(root) try: accum = [np.zeros(len(accum[0]))] + accum except: raise TreeConvolutionError( "Output of transformer must have a .shape (e.g., numpy array)" ) return np.array(accum) def _preorder_indexes(root, left_child, right_child, idx=1): """ transforms a tree into a tree of preorder indexes """ if not callable(left_child) or not callable(right_child): raise TreeConvolutionError( "left_child and right_child must be a function mapping a " + "tree node to its child, or None" ) if _is_leaf(root, left_child, right_child): # leaf return idx def rightmost(tree): if isinstance(tree, tuple): return rightmost(tree[2]) return tree left_subtree = _preorder_indexes(left_child(root), left_child, right_child, idx=idx+1) max_index_in_left = rightmost(left_subtree) right_subtree = _preorder_indexes(right_child(root), left_child, right_child, idx=max_index_in_left + 1) return (idx, left_subtree, right_subtree) def _tree_conv_indexes(root, left_child, right_child): """ Create indexes that, when used as indexes into the output of `flatten`, create an array such that a stride-3 1D convolution is the same as a tree convolution. """ if not callable(left_child) or not callable(right_child): raise TreeConvolutionError( "left_child and right_child must be a function mapping a " + "tree node to its child, or None" ) index_tree = _preorder_indexes(root, left_child, right_child) def recurse(root): if isinstance(root, tuple): my_id = root[0] left_id = root[1][0] if isinstance(root[1], tuple) else root[1] right_id = root[2][0] if isinstance(root[2], tuple) else root[2] yield [my_id, left_id, right_id] yield from recurse(root[1]) yield from recurse(root[2]) else: yield [root, 0, 0] return np.array(list(recurse(index_tree))).flatten().reshape(-1, 1) def _pad_and_combine(x): assert len(x) >= 1 assert len(x[0].shape) == 2 for itm in x: if itm.dtype == np.dtype("object"): raise TreeConvolutionError( "Transformer outputs could not be unified into an array. " + "Are they all the same size?" ) second_dim = x[0].shape[1] for itm in x[1:]: assert itm.shape[1] == second_dim max_first_dim = max(arr.shape[0] for arr in x) vecs = [] for arr in x: padded = np.zeros((max_first_dim, second_dim)) padded[0:arr.shape[0]] = arr vecs.append(padded) return np.array(vecs) def prepare_trees(trees, transformer, left_child, right_child, cuda=False): flat_trees = [_flatten(x, transformer, left_child, right_child) for x in trees] flat_trees = _pad_and_combine(flat_trees) flat_trees = torch.Tensor(flat_trees) # flat trees is now batch x max tree nodes x channels flat_trees = flat_trees.transpose(1, 2) if cuda: flat_trees = flat_trees.cuda() indexes = [_tree_conv_indexes(x, left_child, right_child) for x in trees] indexes = _pad_and_combine(indexes) indexes = torch.Tensor(indexes).long() if cuda: indexes = indexes.cuda() return (flat_trees, indexes) def flatten_tree(tree, flat_list): flatten_list(tree, flat_list) return flat_list def flatten_list(lst, flat_list): for item in lst: if isinstance(item, list) and len(item) != 9: flatten_list(item, flat_list) else: flat_list.append(item)