################## # dummy variable # ################## # part 1 rm(list = ls()) ad = "https://www.fsb.miamioh.edu/lij14/411_wage.txt" data = read.table(url(ad), header=T) attach(data) prop.table(xtabs(~ female)) mean(female) ybarm = mean(wage[female==0]) ybarf = mean(wage[female==1]) barplot(c(ybarm,ybarf), col=c("red", "blue"), names.arg = c("Male", "Female")) # part 2 mean(wage[female==0]) mean(wage[female==1]) t.test(wage~female, var.equal = T) summary(lm(wage~female))$coef # part 3 plot(educ, wage, pch=16, col=ifelse(female==0,2,4)) abline(lm(wage[female==0]~educ[female==0]), col="red") abline(lm(wage[female==1]~educ[female==1]), col="blue") summary(lm(wage[female==0]~educ[female==0]))$coef summary(lm(wage[female==1]~educ[female==1]))$coef ss1 = subset(data, female==0) summary(lm(wage~educ, data=ss1))$coef ss2 = subset(data, female==1) summary(lm(wage~educ, data=ss2))$coef inta = educ*female summary(lm(wage~female+educ+inta))$coef # part 4 inta = exper*female summary(lm(wage~female+exper+inta))$coef m = lm(wage~female+exper+inta) library("car") linearHypothesis(m, c("female=0", "inta=0")) # part 5 sm = mean(wage[female==0&married==0]) sf = mean(wage[female==1&married==0]) mm = mean(wage[female==0&married==1]) mf = mean(wage[female==1&married==1]) DID = (mf-mm)-(sf-sm) DID inta = female*married summary(lm(wage~female+married+inta))$coef ca = rep(0,length(wage)) ca[female==0&married==0] = 1 ca[female==1&married==0] = 2 ca[female==0&married==1] = 3 ca[female==1&married==1] = 4 barplot(tapply(wage,ca,mean),names.arg = c("sm", "sf","mm", "mf")) ############ # NBA data # ############ rm(list = ls()) ad = "https://www.fsb.miamioh.edu/lij14/311_nba.txt" data = read.table(url(ad), sep="\t", header=T) attach(data) position[1:5] prop.table(xtabs(~position)) # Conditional Means barplot(tapply(points,position,mean,na.rm=T)) mean(points[position=="center"],na.rm=T) mean(points[position=="forward"],na.rm=T) mean(points[position=="guard"],na.rm=T) # Dummy Variables for each position center = as.integer(position=="center") forward = as.integer(position=="forward") guard = as.integer(position=="guard") # DVR and ANOVA (F test) summary(lm(points~forward+guard))$coef library("car") m = lm(points~forward+guard) linearHypothesis(m, c("forward=0", "guard=0")) # Dummy variable trap summary(lm(points~center+forward+guard))$coef summary(lm(points~center+forward+guard-1))$coef # A shortcut summary(lm(points~factor(position)))$coef ################ # mini project # ################ library(HistData) data(GaltonFamilies) attach(GaltonFamilies) plot(father, childHeight, col =ifelse(gender=="male",1,2)) abline(lm(childHeight[gender=="male"]~father[gender=="male"]), col = 1) abline(lm(childHeight[gender=="female"]~father[gender=="female"]), col = 2) summary(lm(childHeight~father+gender))$coefficient summary(lm(childHeight[gender=="male"]~father[gender=="male"]))$coefficient summary(lm(childHeight[gender=="male"]~father[gender=="male"]))$r.squared summary(lm(childHeight[gender=="female"]~father[gender=="female"]))$coefficient summary(lm(childHeight[gender=="female"]~father[gender=="female"]))$r.squared