""" ******************************************************* Owner: Ashutosh Jha For: APPCAIR BITS Pilani Goa Campus, Reflexis Systems Function Name: main() Driver Code for running Ripple Down Rules script. ******************************************************* """ import pandas as pd import numpy as np import argparse import time import os import sys import ete3 from get_target import get_target from exec_node import exec_node from CreateFirstRule import CreateFirstRule from CheckIfBelongs import CheckIfBelongs from askForRule import askForRule from addRule import addRule from PreOrderTraversal import PreOrderTraversal from Tree import Tree from newick_format import newick_format def main(): #Parser Function parser = argparse.ArgumentParser(description='Ripple Down Rules - Single Class.') parser.add_argument('DataSet', metavar='DS', type=str, help='DataSet: complete directory address of the dataset') parser.add_argument('typeOfFile',metavar='T',type=str,choices=["excel","csv"], help='Type: specify the type of file (excel or csv)') args = parser.parse_args() if(args.typeOfFile == "excel"): df = pd.read_excel(args.DataSet) if(args.typeOfFile == "csv"): df = pd.read_csv(args.DataSet) #df_exp = df.head(10) #df = df.head(100) #print(df.head()) col_list = df.columns print("Column names from the given Dataset:") for i in range(0,len(col_list)): print(col_list[i],end=" ") print("\n") cur_row = {} target_col = get_target(col_list) last_true = None maximal_set = {} head = None root = None noOfNodes = 0 conclusion_dict = {'Conclusion':['None']*df.shape[0]} wantToStopAddingRulesUnsup = "n" first_row = df.to_dict('records')[0] print("Add First Rule.") print("\nFirst Row:\n",first_row,"\n") root = CreateFirstRule(first_row,target_col,col_list,maximal_set,0) conclusion_dict['Conclusion'][0]= root.pred_label noOfNodes = noOfNodes+1 z=1 #print(c0.name,c0.inp_cond,c0.belongs) #print(df_exp.shape[0]) while (z!=df.shape[0]): #print(z) cur_row = df.to_dict('records')[z] #print(cur_row) #root = c0 head = root while((head!=None)): belongsToRule = CheckIfBelongs(head,cur_row,z,target_col) if (belongsToRule == 1): conclusion_dict['Conclusion'][z]= head.pred_label head=None continue elif(belongsToRule == 2): if(head.right == None): #addRuleAsk = askForRule() if(askForRule(cur_row)): head.right = Tree() head = head.right else: head = None break else: last_true = head head = head.right continue elif(belongsToRule == 4): if(head.right == None): print("No Target column, so Predicted Label=",head.pred_label) print(wantToStopAddingRulesUnsup) if((wantToStopAddingRulesUnsup != "y") and (wantToStopAddingRulesUnsup != "Y")): print("The program will ask if you are satisfied with the predicted label everytime as target column is not available," + " If you trust the classification of the rules if an example belongs to then then:") wantToStopAddingRulesUnsup = input("\nDo you want to classify the remaining instances with current set of rules," + " if they belong to one of them without checking the predicted label?(y or n)") if((wantToStopAddingRulesUnsup == "y") or (wantToStopAddingRulesUnsup == "Y")): #print("No Target column, so Predicted Label=",head.pred_label) conclusion_dict['Conclusion'][z] = head.pred_label head = None continue else: if(askForRule(cur_row)): head.right = Tree() head = head.right else: conclusion_dict['Conclusion'][z] = head.pred_label head = None break else: last_true=head head = head.right continue else: if(head.left == None): if(askForRule(cur_row)): head.left = Tree() head = head.left else: head=None break else: head = head.left continue #print(head) if(head != None): noOfNodes = noOfNodes+1 addRule(head,last_true,noOfNodes,cur_row,target_col,col_list,maximal_set,z) if((head.inp_cond == None) and (head.data == None)): head.delete(root) noOfNodes = noOfNodes -1 else: conclusion_dict['Conclusion'][z] = head.pred_label print("\nCurrent Tree:") root.display() time.sleep(0.8) z = z+1 print("\n\n") NodeInfo = PreOrderTraversal(root) print("RDR Information:") print(NodeInfo) if(target_col == None): print("\nSince There was no target column.") print("Unique Class Value assigned by you:",NodeInfo['Node Predicted Label'].unique()) conclusion_df = pd.DataFrame(data=conclusion_dict) correct = 0 try: correct = (conclusion_df.shape[0] - conclusion_df.Conclusion.value_counts()['None']) except KeyError: correct = df.shape[0] print("\nAll examples classified correctly, Accuracy: 100 %") if(correct != df.shape[0]): print("\nOut of",df.shape[0],"examples, the RDR was able to correctly classify",correct,"examples") print("Accuracy:",((correct/conclusion_df.shape[0])*100),"%") print("\n\n") print("Final Tree:") root.display() if ('Results' in os.listdir()): os.system('rm -rf Results') os.system('mkdir Results') save_prev = input("If you wish to save the results of any previous runs in the Results directory in" + " present working directory, please do so they will be replaced with current reseults. Then press ENTER.") NodeInfo.to_csv("Results/RDR_Tree_Data.csv") conclusion_df.to_csv("Results/Conclusion.csv") newick = [] newick.append(";") newick_format(root,newick) i=0 #print(newick) #while(i<(len(newick)-1)): # if((newick[i] == '(') and (newick[i+1] == ',')): # newick.pop(i) # i = i+1 #print(newick) newick_str ="" for i in range(0,len(newick)): newick_str = newick_str + newick[len(newick)-1-i] #print(newick_str) t = None try: t = ete3.Tree(newick_str,format=8) except ete3.parser.newick.NewickError: print("Not a well structured tree. Tree rendering with Conditions not available. Check Results Folder.") sys.stdout = open("Results/RDR_tree_name_structure.txt",'w') root.display() if t: ts = ete3.TreeStyle() ts.show_branch_length = False ts.show_leaf_name = False ts.show_branch_support = False #ts.rotation = 90 ts.branch_vertical_margin = 20 ts.scale = 120 ts.optimal_scale_level = "full" def findInpCond(name,r): if r: #print(name,r.name) if(r.name == name): #print(r.inp_cond) return r.inp_cond res1 = findInpCond(name,r.left) if res1: return res1 res2 = findInpCond(name,r.right) if res2: return res2 def my_layout(node): inp_condition = findInpCond(node.name,root) #print(node.name,inp_condition) F = ete3.TextFace(inp_condition, tight_text=True) ete3.add_face_to_node(F, node, column=0, position="branch-right") ts.layout_fn = my_layout t.render("Results/RDR_tree.png",tree_style=ts) if __name__=="__main__": main()