#######
# OLS #
#######
set.seed(12345)
n = 100
b1 = 1; b2 = -1
x1 = rep(1,n)
x2 = rnorm(n)
y = b1*x1+b2*x2+ rnorm(n)
x = cbind(x1,x2)
plot(x2,y)
abline(lm(y~x2), col = "red", lty=2)


# Numerical Method
ols.rss = function(co) {
beta1 = co[1]	
beta2 = co[2]		
beta = c(beta1,beta2)
uhat = y - x%*%beta
return(t(uhat)%*%uhat)	
}
ou = optim(c(2,2), ols.rss)
cat("OLS Estimated Coefficients are ", c(ou$par[1],ou$par[2]), "\n")
cat("RSS is ", ou$value, "\n")


# Matrix Algebra
betahat = solve(t(x)%*%x)%*%(t(x)%*%y)
betahat
uha = y - x%*%betahat
t(uha)%*%uha

# Canned Command
ou.ols=lm(y~x2)
summary(ou.ols)
sum(resid(ou.ols)^2)
anova(ou.ols)

# Real Example
# multiple regression using house.txt
# read data from my webpage

ad = "https://www.fsb.miamioh.edu/lij14/400_house.txt"
data = read.table(url(ad), header=T)  
data  

# display independent variables
data[,c(1,2,3)]

# define variables
rprice = data[,4]
age = data[,1]
area = data[,2]

baths = data[,3]

# run multiple regression and save results in the object called m1
m1 = lm(rprice~age+area+baths)

# show coefficients, standard errors, t values and p values
summary(m1)

# confidence interval
confint(m1)
confint(m1, level=0.90)

# obtain RSS
anova(m1)

# obtain residual
re = residuals(m1)

# check the first order conditions of OLS
summary(re)
summary(re*age)

# obtain RSS
sum(re*re)

# find the most undervalued houses
which(re<=-80000)

# test coefficient of age = coefficient of area
newx = age + area
m2 = lm(rprice~newx+baths)
anova(m1, m2)