####### # OLS # ####### set.seed(12345) n = 100 b1 = 1; b2 = -1 x1 = rep(1,n) x2 = rnorm(n) y = b1*x1+b2*x2+ rnorm(n) x = cbind(x1,x2) plot(x2,y) abline(lm(y~x2), col = "red", lty=2) # Numerical Method ols.rss = function(co) { beta1 = co[1] beta2 = co[2] beta = c(beta1,beta2) uhat = y - x%*%beta return(t(uhat)%*%uhat) } ou = optim(c(2,2), ols.rss) cat("OLS Estimated Coefficients are ", c(ou$par[1],ou$par[2]), "\n") cat("RSS is ", ou$value, "\n") # Matrix Algebra betahat = solve(t(x)%*%x)%*%(t(x)%*%y) betahat uha = y - x%*%betahat t(uha)%*%uha # Canned Command ou.ols=lm(y~x2) summary(ou.ols) sum(resid(ou.ols)^2) anova(ou.ols) # Real Example # multiple regression using house.txt # read data from my webpage ad = "https://www.fsb.miamioh.edu/lij14/400_house.txt" data = read.table(url(ad), header=T) data # display independent variables data[,c(1,2,3)] # define variables rprice = data[,4] age = data[,1] area = data[,2] baths = data[,3] # run multiple regression and save results in the object called m1 m1 = lm(rprice~age+area+baths) # show coefficients, standard errors, t values and p values summary(m1) # confidence interval confint(m1) confint(m1, level=0.90) # obtain RSS anova(m1) # obtain residual re = residuals(m1) # check the first order conditions of OLS summary(re) summary(re*age) # obtain RSS sum(re*re) # find the most undervalued houses which(re<=-80000) # test coefficient of age = coefficient of area newx = age + area m2 = lm(rprice~newx+baths) anova(m1, m2)