#Chay Bagan Homework 6 library(caret) library(FNN) housing.df <- read.csv("BostonHousing.csv") set.seed(123) train.index <- sample(row.names(housing.df), 0.6*dim(housing.df)[1]) valid.index <- setdiff(row.names(housing.df), train.index) train.df <- housing.df[train.index, -14] valid.df <- housing.df[valid.index, -14] train.norm.df <- train.df valid.norm.df <- valid.df housing.norm.df <-housing.df norm.values <- preProcess(train.df, method=c("center", "scale")) train.norm.df <- as.data.frame(predict(norm.values, train.df)) valid.norm.df <- as.data.frame(predict(norm.values, valid.df)) housing.norm.df <- as.data.frame(predict(norm.values, housing.df)) accuracy.df <- data.frame(k = seq(1, 5, 1), RMSE = rep(0, 5)) for(i in 1:5){ knn.pred<-class::knn(train = train.norm.df[,-13], test = valid.norm.df[,-13], cl = train.df[,13], k = i) accuracy.df[i,2]<-RMSE(as.numeric(as.character(knn.pred)),valid.df[,13]) } accuracy.df new.df<-data.frame(0.2,0,7,0,0.538,6,62,4.7,4,307,21,10) names(new.df)<-names(train.norm.df)[-13] new.norm.values <- preProcess(new.df, method=c("center", "scale")) new.norm.df <- predict(new.norm.values, newdata = new.df) new.knn.pred <- class::knn(train = train.norm.df[,-13], test = new.norm.df, cl = train.df$MEDV, k = 2) new.knn.pred #C. The error of the training set can be seen below new.accuracy.df<-RMSE(as.numeric(as.character(new.knn.pred)),valid.df[,13]) new.accuracy.df