#Chay Bagan CIS 430 HW1 #Table 2.3 housing.df = read.csv("West Roxbury.csv", header = TRUE) dim(housing.df) head(housing.df) View(housing.df) housing.df[1:10, 1] housing.df[1:10, ] housing.df[5, 1:10] housing.df[5, c(1:2, 4, 8:10)] housing.df[,1] housing.df$TOTAL.VALUE housing.df$TOTAL.VALUE[1:10] length(housing.df$TOTAL.VALUE) mean(housing.df$TOTAL.VALUE) summary(housing.df) #Table 2.5 names(housing.df) t(t(names(housing.df))) colnames(housing.df)[1] = c("TOTAL.VALUE") class(housing.df$REMODEL) class(housing.df[ ,14]) levels(housing.df[, 14]) #Couldn't get this line to work class(housing.df$BEDROOMS) class(housing.df[, 1]) #Table 2.9 set.seed(1) train.rows = sample(rownames(housing.df), dim(housing.df)[1]*0.6) train.data = housing.df[train.rows, ] valid.rows = setdiff(rownames(housing.df), train.rows) valid.data = housing.df[valid.rows, ] train.rows = sample(rownames(housing.df), dim(housing.df)[1]*0.5) valid.rows = sample(setdiff(rownames(housing.df), train.rows),dim(housing.df)[1]*0.3) test.rows = setdiff(rownames(housing.df), union(train.rows, valid.rows)) train.data = housing.df[train.rows, ] valid.data = housing.df[valid.rows, ] test.data = housing.df[test.rows, ] #Table 2.11 reg = lm(TOTAL.VALUE ~ ., data = housing.df, subset = train.rows) tr.res = data.frame(train.data$TOTAL.VALUE, reg$fitted.values, reg$residuals) head(tr.res)