####################### # multiple regression # ####################### # part 1 set.seed(12345) n = 100 # w is omitted variable. By construction, x does not cause y. w does w = rnorm(n) x = w + rnorm(n) y = 4*w + rnorm(n) # Scatterplot just shows correlation, but not causality plot(y~x) abline(lm(y~x)) # Simple regression is misleading: it indicates spurious causality summary(lm(y~x))$coef summary(lm(y~x))$r.squared # spurious causality is gone in a multipleregression that controls for w summary(lm(y~x+w))$coef # part 2 rm(list = ls()) # clear the workspace ad = "https://www.fsb.miamioh.edu/lij14/400_house.txt" data = read.table(url(ad), header=T) attach(data) summary(lm(rprice~baths))$coef summary(lm(rprice~baths+area))$coef cor(baths, area) mean(age) mean(baths) mean(baths[age>18]) mean(baths[age<18]) # part 3 m_first = lm(baths~area) rhat = resid(m_first) m_second = lm(rprice~rhat) summary(m_second)$coef summary(lm(rprice~baths+area))$coef # part 4 m = lm(rprice~baths+area+age) library("car") linearHypothesis(m, c("baths=0", "area=0")) m_u = summary(lm(rprice~baths+area+age)) m_r = summary(lm(rprice~age)) f_test = ((m_u$r.squared-m_r$r.squared)/2)/((1-m_u$r.squared)/(321-3-1)) f_test m_u$fstatistic summary(m) # part 5 summary(lm(rprice~baths))$coef set.seed(12345) bedroom = baths + 0.1*rnorm(321) summary(lm(rprice~baths+bedroom))$coef summary(lm(rprice~baths+bedroom))$fstatistic summary(lm(baths~bedroom))$r.squared ################ # mini project # ################ library(HistData) data(GaltonFamilies) attach(GaltonFamilies) # multiple regression summary(lm(childHeight~father))$coefficient summary(lm(childHeight~father+mother))$coefficient 0.38451-0.36828 ovb = 0.29051*cov(father, mother)/var(father) ovb cor(childHeight,father) cor(childHeight,mother)