################# # function form # ################# # part 1 rm(list = ls()) ad = "https://www.fsb.miamioh.edu/lij14/400_house.txt" data = read.table(url(ad), header=T) attach(data) mean(rprice) summary(lm(rprice~age))$coef logrprice = log(rprice) summary(lm(logrprice~age))$coef logarea = log(area) summary(lm(logrprice~logarea))$coef # part 2 plot(rprice~age,col=ifelse(age>=100,"blue","red")) abline(lm(rprice~age)) qm = lm(rprice~poly(age,2,raw=TRUE)) plot(rprice~age, col=ifelse(age>=100,"blue","red")) curve(predict(qm,newdata=data.frame(age=x)),add=T) agesq = age^2 summary(lm(rprice~age+agesq))$coef # part 3 summary(lm(rprice[age<18]~baths[age<18]))$coef summary(lm(rprice[age>18]~baths[age>18]))$coef inta = age*baths summary(lm(rprice~baths+inta))$coef # part 4 m = lm(rprice~baths+age+area) yhat = fitted(m) uhat = resid(m) rprice[uhat==min(uhat)] yhat[uhat==min(uhat)] uhat[uhat==min(uhat)] ############ # optional # ############ # out-of-sample forecast ad = "https://www.fsb.miamioh.edu/lij14/400_house.txt" da = read.table(url(ad), header=T) attach(da) m = lm(rprice~baths+age+area) summary(m) m$coef[1]+m$coef[2]*2+m$coef[3]*40+m$coef[4]*1000 predict(m, newdata = data.frame(baths=2, age = 40,area=1000)) # interval forecast predict(m, newdata = data.frame(baths=2, age = 40,area=1000),interval = "confidence", level = 0.95) z1 = baths-2 z2 = age-40 z3 = area-1000 lm(rprice~z1+z2+z3)$coef[1] confint(lm(rprice~z1+z2+z3))[1,] predict(m, newdata = data.frame(baths=2, age = 40,area=1000),interval = "prediction", level = 0.95) summary(lm(rprice~z1+z2+z3)) 50772.869+qt(0.975,317)*sqrt(2759.982^2+21340^2) 50772.869-qt(0.975,317)*sqrt(2759.982^2+21340^2) ############### #mini project:# ############### # function form install.packages("ISLR") library(ISLR) data(Auto) head(Auto) attach(Auto) plot(weight, mpg) abline(lm(mpg~weight)) weightsq = weight^2 summary(lm(mpg~weight+weightsq))$coef qm = lm(mpg~poly(weight,2,raw=TRUE)) plot(mpg~weight) curve(predict(qm,newdata=data.frame(weight=x)),add=T) logmpg = log(mpg) plot(logmpg~weight) abline(lm(logmpg~weight)) inta = weight*horsepower summary(lm(mpg~weight+inta))$coef logweight = log(weight) summary(lm(logmpg~logweight))$coef x = 1/weight summary(lm(mpg~x)) plot(mpg~x) abline(lm(mpg~x))