CIS 430 HW/Hw8 BAGAN CIS 430/HW8 r file BAGAN.R · Data-Mining-using-R

car.df <- read.csv("ToyotaCorolla.csv")

set.seed(1)  
train.index <- sample(c(1:dim(car.df)[1]), dim(car.df)[1]*0.6)  
train.df <- car.df[train.index, ]
valid.df <- car.df[-train.index, ]

#install.packages('rpart')
#install.packages('rpart.plot')

library(rpart)
library(rpart.plot)
tr <- rpart(Price ~  Age_08_04 + KM + Fuel_Type + HP + Automatic + Doors + Quarterly_Tax +
              Mfr_Guarantee + Guarantee_Period + Airco + Automatic_airco + CD_Player +
              Powered_Windows + Sport_Model + Tow_Bar, 
            data = train.df,
            method = "anova", minbucket = 1, maxdepth = 30, cp = 0.001)
prp(tr)

#It looks like age, automatic ac, km, and quarterly tax are the 4 most important specifications for predicting price


bins <- seq(min(car.df$Price), 
            max(car.df$Price),
            (max(car.df$Price) - min(car.df$Price))/20)
bins


Binned_Price <- .bincode(car.df$Price, 
                         bins, 
                         include.lowest = TRUE)


Binned_Price <- as.factor(Binned_Price)
Binned_Price

train.df$Binned_Price <- Binned_Price[train.index]
valid.df$Binned_Price <- Binned_Price[-train.index]

tr.binned <- rpart(Binned_Price ~  Age_08_04 + KM + Fuel_Type + 
                     HP + Automatic + Doors + Quarterly_Tax + 
                     Mfr_Guarantee + Guarantee_Period + Airco + 
                     Automatic_airco + CD_Player + Powered_Windows + 
                     Sport_Model + Tow_Bar, data = train.df)
prp(tr.binned)

t(t(tr.binned$variable.importance))

new.record <- data.frame(Age_08_04 = 77, 
                         KM = 117000, 
                         Fuel_Type = "Petrol", 
                         HP = 110, 
                         Automatic = 0, 
                         Doors = 5, 
                         Quarterly_Tax = 100, 
                         Mfr_Guarantee = 0, 
                         Guarantee_Period = 3, 
                         Airco = 1, 
                         Automatic_airco = 0, 
                         CD_Player = 0, 
                         Powered_Windows = 0, 
                         Sport_Model = 0, 
                         Tow_Bar = 1)


price.tr <- predict(tr, newdata = new.record)

price.tr.bin <- bins[predict(tr.binned, newdata = new.record, type = "class")]

cat(paste("Regression Price Estimate: ",scales::dollar(price.tr,0.01)), 
    paste("Classification Price Estimate: ",scales::dollar(price.tr.bin,0.01)),
    sep='\n')