rm(list=ls())
source("http://klein.uk/R/myfunctions.R")
ls()
library(AER)
data("CASchools")
CASchools$stratio <- with(CASchools, students/teachers)
CASchools$score <- with(CASchools, (math + read)/2)
attach(CASchools)
est1 <- lm(score ~ stratio)
summary(est1)
lowVar <- stratio > quantile(stratio, 0.25) & stratio < quantile(stratio, 0.75)
est2 <- lm(score ~ stratio, subset = lowVar)
summary(est2)
est3 <- lm(score ~ stratio, subset = lowVar==FALSE)
summary(est3)
plot(score ~ stratio)
points(score ~ stratio, col = "red", subset = lowVar)
abline(est1); abline(est2, col="red"); abline(est3, col = "blue")
legend("topright",legend=c("Pooled","lowVar","highVar"),lty=1,col=c("black","red","blue"))
set.seed(123)
x <- runif(1000)
e <- rnorm(1000)
par(mfrow=c(1,2))
y.hom <- x + e
lm.hom <- lm(y.hom ~ x)
plot(y.hom ~ x, ylim=c(-2,3)); abline(lm.hom, col="red", lwd=2)
y.het <- x + e*x^2
lm.het <- lm(y.het ~ x)
plot(y.het ~ x, ylim=c(-2,3)); abline(lm.het, col="red", lwd=2)
vcov(lm.het)
library(car)
hccm(lm.het, type="hc0")
source("http://klein.uk/R/myfunctions.R")
summary(lm.hom)
shccm(lm.hom)
summary(lm.het)
shccm(lm.het)
scatterplot(est1$resid ~ english)
X = cbind(Intercept=rep(1,420), stratio)
head(X)
Y = score
beta.hat = solve(t(X)%*%X) %*% t(X)%*%Y
beta.hat
X%*%beta.hat
set.seed(123)
epsilon <- rnorm(10000)
omega <- rnorm(10000)
eta <- rnorm(10000)
x1 <- 5 + omega
x2 <- 10 + omega + 0.3* eta
y <- 20 + x1 + x2 + epsilon
lm(y ~ x1 + x2)$coef
lm(y ~ x1)$coef
1*cov(x1,x2)/var(x1)
cor(stratio, english)
lm(score ~ stratio)
lm(score ~ stratio + english)
str(CASchools)
?CASchools
FracEnglish = english/100
lm(score ~ stratio + english + FracEnglish)
set.seed(123)
perturbedEstimate <- function(x) {
FracEnglish = english/100 + rnorm(4) * 0.0000001
est <- lm(score ~ stratio + english + FracEnglish)
coef(est)[3:4]
}
estList <- sapply(1:100, perturbedEstimate)
plot(t(estList), main = "multicollinearity, estimated coefficients")
set.seed(100)
N <- 1000
p <- 0.05
qcrit <- -qnorm(p/2)
b1 <- rnorm(N)
b2 <- rnorm(N)
reject <- abs(b1) > qcrit | abs(b2) > qcrit
mean(reject) * 100
par(mfrow=c(1,2))
plot(b2 ~ b1)
points(b2 ~ b1, subset = reject, col = "red", pch = 7)
abline(v = c(qcrit, -qcrit), h = c(qcrit, -qcrit))
data.ellipse(b1, b2, levels = 1 - p, plot.points = FALSE)
legend("topleft", c("naive rejection", "confidence region"),
pch = c(7, NA), col = "red", lty = c(NA, 1))
set.seed(100)
b1 <- rnorm(N)
b2 <- 0.3 * rnorm(N) + 0.7 * b1
reject <- abs(b1) > qcrit | abs(b2) > qcrit
plot(b2 ~ b1)
points(b2 ~ b1, subset = reject, col = "red", pch = 7)
abline(v = c(qcrit, -qcrit), h = c(qcrit, -qcrit))
data.ellipse(b1, b2, levels = 1 - p, plot.points = FALSE)
text(-1, 1, "A")
legend("topleft", c("naive rejection", "confidence region"),
pch = c(7, NA), col = "red", lty = c(NA, 1))