Data-Mining-using-R / CIS 430 HW / Hw1 BAGAN / Hw1 BAGAN CIS 430.R
Hw1 BAGAN CIS 430.R
Raw
#Chay Bagan CIS 430 HW1
#Table 2.3
housing.df = read.csv("West Roxbury.csv", header = TRUE)
dim(housing.df)
head(housing.df)
View(housing.df)

housing.df[1:10, 1]
housing.df[1:10, ]
housing.df[5, 1:10]
housing.df[5, c(1:2, 4, 8:10)]
housing.df[,1]
housing.df$TOTAL.VALUE
housing.df$TOTAL.VALUE[1:10]
length(housing.df$TOTAL.VALUE)
mean(housing.df$TOTAL.VALUE)
summary(housing.df)

#Table 2.5
names(housing.df)
t(t(names(housing.df)))
colnames(housing.df)[1] = c("TOTAL.VALUE")
class(housing.df$REMODEL)
class(housing.df[ ,14])
levels(housing.df[, 14])  #Couldn't get this line to work
class(housing.df$BEDROOMS)
class(housing.df[, 1])

#Table 2.9
set.seed(1)
train.rows = sample(rownames(housing.df), dim(housing.df)[1]*0.6)
train.data = housing.df[train.rows, ]
valid.rows = setdiff(rownames(housing.df), train.rows)
valid.data = housing.df[valid.rows, ]
train.rows = sample(rownames(housing.df), dim(housing.df)[1]*0.5)
valid.rows = sample(setdiff(rownames(housing.df), train.rows),dim(housing.df)[1]*0.3)
test.rows = setdiff(rownames(housing.df), union(train.rows, valid.rows))
train.data = housing.df[train.rows, ]
valid.data = housing.df[valid.rows, ]
test.data = housing.df[test.rows, ]

#Table 2.11
reg = lm(TOTAL.VALUE ~ ., data = housing.df, subset = train.rows)
tr.res = data.frame(train.data$TOTAL.VALUE, reg$fitted.values, reg$residuals)
head(tr.res)