"""
*******************************************************
Owner: Ashutosh Jha
For: APPCAIR BITS Pilani Goa Campus, Reflexis Systems
Function Name: main()
Driver Code for running Ripple Down Rules script.
*******************************************************
"""
import pandas as pd
import numpy as np
import argparse
import time
import os
import sys
import ete3
from get_target import get_target
from exec_node import exec_node
from CreateFirstRule import CreateFirstRule
from CheckIfBelongs import CheckIfBelongs
from askForRule import askForRule
from addRule import addRule
from PreOrderTraversal import PreOrderTraversal
from Tree import Tree
from newick_format import newick_format
def main():
#Parser Function
parser = argparse.ArgumentParser(description='Ripple Down Rules - Single Class.')
parser.add_argument('DataSet', metavar='DS', type=str,
help='DataSet: complete directory address of the dataset')
parser.add_argument('typeOfFile',metavar='T',type=str,choices=["excel","csv"],
help='Type: specify the type of file (excel or csv)')
args = parser.parse_args()
if(args.typeOfFile == "excel"):
df = pd.read_excel(args.DataSet)
if(args.typeOfFile == "csv"):
df = pd.read_csv(args.DataSet)
#df_exp = df.head(10)
#df = df.head(100)
#print(df.head())
col_list = df.columns
print("Column names from the given Dataset:")
for i in range(0,len(col_list)):
print(col_list[i],end=" ")
print("\n")
cur_row = {}
target_col = get_target(col_list)
last_true = None
maximal_set = {}
head = None
root = None
noOfNodes = 0
conclusion_dict = {'Conclusion':['None']*df.shape[0]}
wantToStopAddingRulesUnsup = "n"
first_row = df.to_dict('records')[0]
print("Add First Rule.")
print("\nFirst Row:\n",first_row,"\n")
root = CreateFirstRule(first_row,target_col,col_list,maximal_set,0)
conclusion_dict['Conclusion'][0]= root.pred_label
noOfNodes = noOfNodes+1
z=1
#print(c0.name,c0.inp_cond,c0.belongs)
#print(df_exp.shape[0])
while (z!=df.shape[0]):
#print(z)
cur_row = df.to_dict('records')[z]
#print(cur_row)
#root = c0
head = root
while((head!=None)):
belongsToRule = CheckIfBelongs(head,cur_row,z,target_col)
if (belongsToRule == 1):
conclusion_dict['Conclusion'][z]= head.pred_label
head=None
continue
elif(belongsToRule == 2):
if(head.right == None):
#addRuleAsk = askForRule()
if(askForRule(cur_row)):
head.right = Tree()
head = head.right
else:
head = None
break
else:
last_true = head
head = head.right
continue
elif(belongsToRule == 4):
if(head.right == None):
print("No Target column, so Predicted Label=",head.pred_label)
print(wantToStopAddingRulesUnsup)
if((wantToStopAddingRulesUnsup != "y") and (wantToStopAddingRulesUnsup != "Y")):
print("The program will ask if you are satisfied with the predicted label everytime as target column is not available," +
" If you trust the classification of the rules if an example belongs to then then:")
wantToStopAddingRulesUnsup = input("\nDo you want to classify the remaining instances with current set of rules," +
" if they belong to one of them without checking the predicted label?(y or n)")
if((wantToStopAddingRulesUnsup == "y") or (wantToStopAddingRulesUnsup == "Y")):
#print("No Target column, so Predicted Label=",head.pred_label)
conclusion_dict['Conclusion'][z] = head.pred_label
head = None
continue
else:
if(askForRule(cur_row)):
head.right = Tree()
head = head.right
else:
conclusion_dict['Conclusion'][z] = head.pred_label
head = None
break
else:
last_true=head
head = head.right
continue
else:
if(head.left == None):
if(askForRule(cur_row)):
head.left = Tree()
head = head.left
else:
head=None
break
else:
head = head.left
continue
#print(head)
if(head != None):
noOfNodes = noOfNodes+1
addRule(head,last_true,noOfNodes,cur_row,target_col,col_list,maximal_set,z)
if((head.inp_cond == None) and (head.data == None)):
head.delete(root)
noOfNodes = noOfNodes -1
else:
conclusion_dict['Conclusion'][z] = head.pred_label
print("\nCurrent Tree:")
root.display()
time.sleep(0.8)
z = z+1
print("\n\n")
NodeInfo = PreOrderTraversal(root)
print("RDR Information:")
print(NodeInfo)
if(target_col == None):
print("\nSince There was no target column.")
print("Unique Class Value assigned by you:",NodeInfo['Node Predicted Label'].unique())
conclusion_df = pd.DataFrame(data=conclusion_dict)
correct = 0
try:
correct = (conclusion_df.shape[0] - conclusion_df.Conclusion.value_counts()['None'])
except KeyError:
correct = df.shape[0]
print("\nAll examples classified correctly, Accuracy: 100 %")
if(correct != df.shape[0]):
print("\nOut of",df.shape[0],"examples, the RDR was able to correctly classify",correct,"examples")
print("Accuracy:",((correct/conclusion_df.shape[0])*100),"%")
print("\n\n")
print("Final Tree:")
root.display()
if ('Results' in os.listdir()):
os.system('rm -rf Results')
os.system('mkdir Results')
save_prev = input("If you wish to save the results of any previous runs in the Results directory in" +
" present working directory, please do so they will be replaced with current reseults. Then press ENTER.")
NodeInfo.to_csv("Results/RDR_Tree_Data.csv")
conclusion_df.to_csv("Results/Conclusion.csv")
newick = []
newick.append(";")
newick_format(root,newick)
i=0
#print(newick)
#while(i<(len(newick)-1)):
# if((newick[i] == '(') and (newick[i+1] == ',')):
# newick.pop(i)
# i = i+1
#print(newick)
newick_str =""
for i in range(0,len(newick)):
newick_str = newick_str + newick[len(newick)-1-i]
#print(newick_str)
t = None
try:
t = ete3.Tree(newick_str,format=8)
except ete3.parser.newick.NewickError:
print("Not a well structured tree. Tree rendering with Conditions not available. Check Results Folder.")
sys.stdout = open("Results/RDR_tree_name_structure.txt",'w')
root.display()
if t:
ts = ete3.TreeStyle()
ts.show_branch_length = False
ts.show_leaf_name = False
ts.show_branch_support = False
#ts.rotation = 90
ts.branch_vertical_margin = 20
ts.scale = 120
ts.optimal_scale_level = "full"
def findInpCond(name,r):
if r:
#print(name,r.name)
if(r.name == name):
#print(r.inp_cond)
return r.inp_cond
res1 = findInpCond(name,r.left)
if res1:
return res1
res2 = findInpCond(name,r.right)
if res2:
return res2
def my_layout(node):
inp_condition = findInpCond(node.name,root)
#print(node.name,inp_condition)
F = ete3.TextFace(inp_condition, tight_text=True)
ete3.add_face_to_node(F, node, column=0, position="branch-right")
ts.layout_fn = my_layout
t.render("Results/RDR_tree.png",tree_style=ts)
if __name__=="__main__":
main()