##################### # simple regression # ##################### # part 1 # if necessary, run search(), then run detach(data_that_you_attahed_before) ad = "https://www.fsb.miamioh.edu/lij14/400_house.txt" data = read.table(url(ad), header=T) head(data) attach(data) plot(baths, rprice) mean(rprice[baths==1]) mean(rprice[baths==2]) mean(rprice[baths==3]) plot(baths, rprice) abline(lm(rprice ~ baths)) # part 2 summary(lm(rprice~baths)) beta1hat = cov(rprice, baths)/var(baths) beta1hat beta0hat = mean(rprice)-beta1hat*mean(baths) beta0hat uhat = rprice-beta0hat-beta1hat*baths RSE = sqrt(sum(uhat^2)/(length(rprice)-2)) RSE R_squared = 1 - sum(uhat^2)/sum((rprice-mean(rprice))^2) R_squared confint(lm(rprice~baths)) # part 3 n = nrow(data) betahat1.v = rep(0,n) for (j in 1:n) { subsample = data[-j,] betahat1.v[j] = coef(lm(rprice~baths,data=subsample))[2] } betahat1.v[1:5] hist(betahat1.v, main="Sampling Distribution of Beta1hat", xlab="Beta1hat") # part 4 my_mod=lm(rprice~baths) yhat = fitted(my_mod) head(yhat) uhat = resid(my_mod) head(uhat) mean(uhat) R_squared = cor(rprice, yhat)^2 R_squared ################ # mini project # ################ # check out https://fsb.miamioh.edu/lij14/311r_BasicR.pdf # Learn R at https://stats.oarc.ucla.edu/r/ # google "R, HistData" to learn more about the HistData package install.packages("HistData", repos = "https://cloud.r-project.org/") library(HistData) ls("package:HistData") data(GaltonFamilies) attach(GaltonFamilies) head(GaltonFamilies) plot(father, childHeight) abline(lm(childHeight~father)) summary(lm(childHeight~father)) yhat = 40.13929+0.38451*70 yhat newdata = data.frame(father=70) m = lm(childHeight~father) predict(m,newdata) names(m) head(m$residuals) names(summary(m)) summary(m)$r.squared summary(m)$coefficients summary(lm(childHeight~mother))$r.squared summary(lm(childHeight~mother))$coefficients