#Chay Bagan Homework 6

library(caret)
library(FNN)
housing.df <- read.csv("BostonHousing.csv")

set.seed(123)
train.index <- sample(row.names(housing.df), 0.6*dim(housing.df)[1])  
valid.index <- setdiff(row.names(housing.df), train.index)  
train.df <- housing.df[train.index, -14]
valid.df <- housing.df[valid.index, -14]

train.norm.df <- train.df
valid.norm.df <- valid.df
housing.norm.df <-housing.df

norm.values <- preProcess(train.df, method=c("center", "scale"))
train.norm.df <- as.data.frame(predict(norm.values, train.df))
valid.norm.df <- as.data.frame(predict(norm.values, valid.df))
housing.norm.df <- as.data.frame(predict(norm.values, housing.df))

accuracy.df <- data.frame(k = seq(1, 5, 1), RMSE = rep(0, 5))

for(i in 1:5){
  knn.pred<-class::knn(train = train.norm.df[,-13],                          
                       test = valid.norm.df[,-13],                          
                       cl = train.df[,13], k = i)
  accuracy.df[i,2]<-RMSE(as.numeric(as.character(knn.pred)),valid.df[,13])
}

accuracy.df

new.df<-data.frame(0.2,0,7,0,0.538,6,62,4.7,4,307,21,10)
names(new.df)<-names(train.norm.df)[-13]

new.norm.values <- preProcess(new.df, method=c("center", "scale"))

new.norm.df <- predict(new.norm.values, newdata = new.df)

new.knn.pred <- class::knn(train = train.norm.df[,-13],
                           test = new.norm.df,
                           cl = train.df$MEDV, k = 2)
new.knn.pred

#C. The error of the training set can be seen below
new.accuracy.df<-RMSE(as.numeric(as.character(new.knn.pred)),valid.df[,13])
new.accuracy.df