code/rdr_driver_code.py · rdr-py

"""
*******************************************************
Owner: Ashutosh Jha
For: APPCAIR BITS Pilani Goa Campus, Reflexis Systems
Function Name: main()
Driver Code for running Ripple Down Rules script.
*******************************************************
"""
import pandas as pd
import numpy as np
import argparse 
import time
import os
import sys
import ete3
from get_target import get_target
from exec_node import exec_node
from CreateFirstRule import CreateFirstRule
from CheckIfBelongs import CheckIfBelongs
from askForRule import askForRule
from addRule import addRule
from PreOrderTraversal import PreOrderTraversal
from Tree import Tree
from newick_format import newick_format

def main():
    #Parser Function
    parser = argparse.ArgumentParser(description='Ripple Down Rules - Single Class.')
    parser.add_argument('DataSet', metavar='DS', type=str,
            help='DataSet: complete directory address of the dataset')
    parser.add_argument('typeOfFile',metavar='T',type=str,choices=["excel","csv"],
            help='Type: specify the type of file (excel or csv)')
    args = parser.parse_args()

    if(args.typeOfFile == "excel"):
        df = pd.read_excel(args.DataSet)
    if(args.typeOfFile == "csv"):
        df = pd.read_csv(args.DataSet)
    #df_exp = df.head(10)
    #df = df.head(100)
    #print(df.head())

    col_list = df.columns
    print("Column names from the given Dataset:")
    for i in range(0,len(col_list)):
        print(col_list[i],end="  ")
    print("\n")


    cur_row = {}
    target_col = get_target(col_list)
    last_true = None
    maximal_set = {}
    head = None
    root = None
    noOfNodes = 0
    conclusion_dict = {'Conclusion':['None']*df.shape[0]}
    wantToStopAddingRulesUnsup = "n"

    first_row = df.to_dict('records')[0]
    print("Add First Rule.")
    print("\nFirst Row:\n",first_row,"\n")
    root = CreateFirstRule(first_row,target_col,col_list,maximal_set,0)
    conclusion_dict['Conclusion'][0]= root.pred_label
    noOfNodes = noOfNodes+1
    z=1

    #print(c0.name,c0.inp_cond,c0.belongs)


    #print(df_exp.shape[0])
    while (z!=df.shape[0]):
        #print(z)
        cur_row = df.to_dict('records')[z]
        #print(cur_row)
        #root = c0
        head = root
    
        while((head!=None)):
            belongsToRule = CheckIfBelongs(head,cur_row,z,target_col)
            if (belongsToRule == 1):
                conclusion_dict['Conclusion'][z]= head.pred_label
                head=None
                continue
            elif(belongsToRule == 2):
                if(head.right == None):
                    #addRuleAsk = askForRule()
                    if(askForRule(cur_row)):
                        head.right = Tree()
                        head = head.right
                    else:
                        head = None
                    break
                else:
                    last_true = head
                    head = head.right
                    continue
            elif(belongsToRule == 4):
                if(head.right == None):
                    print("No Target column, so Predicted Label=",head.pred_label)
                    print(wantToStopAddingRulesUnsup)
                    if((wantToStopAddingRulesUnsup != "y") and (wantToStopAddingRulesUnsup != "Y")):
                        print("The program will ask if you are satisfied with the predicted label everytime as target column is not available," +
                                " If you trust the classification of the rules if an example belongs to then then:")
                        wantToStopAddingRulesUnsup = input("\nDo you want to classify the remaining instances with current set of rules," +
                                                        " if they belong to one of them without checking the predicted label?(y or n)")
                    if((wantToStopAddingRulesUnsup == "y") or (wantToStopAddingRulesUnsup == "Y")):
                        #print("No Target column, so Predicted Label=",head.pred_label)
                        conclusion_dict['Conclusion'][z] = head.pred_label
                        head = None
                        continue
                    else:
                        if(askForRule(cur_row)):
                            head.right = Tree()
                            head = head.right
                        else:
                            conclusion_dict['Conclusion'][z] = head.pred_label
                            head = None
                        break
                else:
                    last_true=head
                    head = head.right
                    continue
            else:
                if(head.left == None):
                    if(askForRule(cur_row)):
                        head.left = Tree()
                        head = head.left
                    else:
                        head=None
                    break
                else:
                    head = head.left
                    continue
        #print(head)
        if(head != None):
            noOfNodes = noOfNodes+1
            addRule(head,last_true,noOfNodes,cur_row,target_col,col_list,maximal_set,z)
            if((head.inp_cond == None) and (head.data == None)):
                head.delete(root)
                noOfNodes = noOfNodes -1
            else:
                conclusion_dict['Conclusion'][z] = head.pred_label
                print("\nCurrent Tree:")
                root.display()
                time.sleep(0.8)
        z = z+1

    print("\n\n")
    NodeInfo = PreOrderTraversal(root)
    print("RDR Information:")
    print(NodeInfo)
    if(target_col == None):
        print("\nSince There was no target column.")
        print("Unique Class Value assigned by you:",NodeInfo['Node Predicted Label'].unique())
    conclusion_df = pd.DataFrame(data=conclusion_dict)
    correct = 0
    try:
        correct = (conclusion_df.shape[0] - conclusion_df.Conclusion.value_counts()['None'])
    except KeyError:
        correct = df.shape[0]
        print("\nAll examples classified correctly, Accuracy: 100 %")
    if(correct != df.shape[0]):
        print("\nOut of",df.shape[0],"examples, the RDR was able to correctly classify",correct,"examples")
        print("Accuracy:",((correct/conclusion_df.shape[0])*100),"%")
    print("\n\n")
    print("Final Tree:")
    root.display()
    

    if ('Results' in os.listdir()):
        os.system('rm -rf Results')
    os.system('mkdir Results')
    save_prev = input("If you wish to save the results of any previous runs in the Results directory in" +
    " present working directory, please do so they will be replaced with current reseults. Then press ENTER.")
    NodeInfo.to_csv("Results/RDR_Tree_Data.csv")
    conclusion_df.to_csv("Results/Conclusion.csv")
    
    newick = []
    newick.append(";")
    newick_format(root,newick)
    i=0 
    #print(newick)
    #while(i<(len(newick)-1)):
    #    if((newick[i] == '(') and (newick[i+1] == ',')):
    #        newick.pop(i)
    #    i = i+1
    #print(newick)
    newick_str ="" 
    for i in range(0,len(newick)):
        newick_str = newick_str + newick[len(newick)-1-i]
    #print(newick_str)
    t = None
    try:
        t = ete3.Tree(newick_str,format=8)
    except ete3.parser.newick.NewickError:
        print("Not a well structured tree. Tree rendering with Conditions not available. Check Results Folder.")
    sys.stdout = open("Results/RDR_tree_name_structure.txt",'w')
    root.display()    
    if t:
        ts = ete3.TreeStyle()
        ts.show_branch_length = False
        ts.show_leaf_name = False
        ts.show_branch_support = False
        #ts.rotation = 90
        ts.branch_vertical_margin = 20
        ts.scale = 120
        ts.optimal_scale_level = "full"
        def findInpCond(name,r):
            if r:
                #print(name,r.name)
                if(r.name == name):
                    #print(r.inp_cond)
                    return r.inp_cond

                res1 = findInpCond(name,r.left)
                if res1:
                    return res1

                res2 = findInpCond(name,r.right)
                if res2:
                    return res2

        def my_layout(node):
            inp_condition = findInpCond(node.name,root)
            #print(node.name,inp_condition)
            F = ete3.TextFace(inp_condition, tight_text=True)
            ete3.add_face_to_node(F, node, column=0, position="branch-right")
        ts.layout_fn = my_layout
        t.render("Results/RDR_tree.png",tree_style=ts)


    
if __name__=="__main__": 
    main()