source("http://klein.uk/R/myfunctions.R")
ls()
metal <- read.table("http://klein.uk/R/usmetal.txt", header=T)
str(metal)
for(i in 1:3){
metal[,i+3] <- log(metal[,i])
}
names(metal)[4:6] <- c("lK","lL","lY")
str(metal)
par(mfrow=c(3,2))
for(i in 1:6){
name <- names(metal)[i]
hist(metal[,i], main=paste("Histogram of", name) , xlab=name)
}
lm1c <- lm(lY ~ lK + lL, data=metal); summary(lm1c)
par(mfrow=c(2,2)); plot(lm1c)
library(car)
linearHypothesis(lm1c, "lK=lL")
linearHypothesis(lm1c, "lK + lL = 1")
lm1f <- lm(lY ~ I(lL - lK), offset=lK, data=metal); summary(lm1f)
anova(lm1c, lm1f)
wine <- read.csv("http://klein.uk/R/wineweather1", header=T)
str(wine)
lm2a <- lm(logprice ~ degrees + hrain + wrain + time_sv, data=wine); summary(lm2a)
wine
qf(p=0.95, df1=4, df2=22)
str(summary(lm2a))
paste("R^2 is", summary(lm2a)$r.squared)
paste("df residuals = n-k =", 27-5)
paste("Restrictions = k-1 = df of the model =", 5-1)
paste("F-stat =", summary(lm2a)$r.squared / (5-1) / ((1- summary(lm2a)$r.squared) / (27-5)) )
pf(q=summary(lm2a)$fstatistic[1], df1=summary(lm2a)$fstatistic[2], df2=summary(lm2a)$fstatistic[3], lower.tail=F)
wine$vint
wine$vint.dec <- cut(wine$vint, breaks=c(1950, 1959, 1969, 1979, 1990))
levels(wine$vint.dec) <- c("50s","60s","70s","80s")
table(wine$vint.dec)
lm2b <- lm(logprice ~ degrees + hrain + wrain + vint.dec, data=wine); summary(lm2b)
linearHypothesis(lm2b, c("vint.dec60s", "vint.dec70s", "vint.dec80s"))
wine$fifties <- ifelse(wine$vint<1960, 1, 0)
wine$sixties <- ifelse(wine$vint>1959 & wine$vint<1970, 1, 0)
wine$seventies <- ifelse(wine$vint>1969 & wine$vint<1980, 1, 0)
wine$eighties <- ifelse(wine$vint>1979, 1, 0)
lm2b <- lm(logprice ~ degrees + hrain + wrain + sixties + seventies + eighties, data=wine); summary(lm2b)
linearHypothesis(lm2b, c("sixties = 0", "seventies = 0", "eighties = 0"))
lm(logprice ~ degrees + hrain + wrain + sixties + seventies + eighties + fifties , data=wine)$coef
lm(logprice ~ degrees + hrain + wrain + sixties + seventies + eighties, data=wine)$coef
lm(logprice ~ -1 + degrees + hrain + wrain + fifties + sixties + seventies + eighties, data=wine)$coef
wine <- wine[order(wine$vint, decreasing=T), ]
wine$deglag1 <- c(wine$degrees[-1], NA)
wine$deglag2 <- c(wine$degrees[-c(1,2)], rep(NA,2))
lm2di <- lm(logprice ~ degrees + hrain + wrain + deglag1 + deglag2, data=wine); summary(lm2di)
lm2dii <- lm(logprice ~ degrees + hrain + wrain + deglag1, data=wine); summary(lm2dii)
lm2diii <- lm(logprice ~ degrees + hrain + wrain, data=wine); summary(lm2diii)
myIC <- function(model){
print(model$call)
print( paste("AIC:", AIC(model, k=2) ))
print( paste("BIC:", AIC(model, k=log(length(model$res))) ))
print( paste("R2 :", summary(model)$adj.r.squared ))
}
myIC(lm2di); myIC(lm2dii); myIC(lm2diii)
house <- read.csv("http://klein.uk/R/hprice1", header=T)
str(house)
lm3a <- lm(price ~ lotsize + sqrft + bdrms, data=house)
summary(lm3a)
shccm(lm3a)
lm3b <- lm(lprice ~ llotsize + lsqrft + bdrms, data=house)
summary(lm3b)
shccm(lm3b)
house$lm3b.sqres <- lm3b$residuals^2
lm3b.white.test <- lm(lm3b.sqres ~ llotsize*lsqrft*bdrms - llotsize:lsqrft:bdrms
+ I(llotsize^2) + I(lsqrft^2) + I(bdrms^2), data=house); shccm(lm3b.white.test)
T <- summary(lm3b.white.test)$r.squared * nrow(house)
library(VGAM)
pchisq(q=T, df=9, lower.tail=F)
bond <- read.csv("http://klein.uk/R/bond_int_rates", header=T)
str(bond)
library(zoo)
bond$paneldate <- as.yearmon(bond$paneldate,format="%Ym%m")
lm4 <- lm(daaa ~ dus3mt, data=bond); shccm(lm4)
e <- lm4$res
plot(e ~ bond$paneldate, type="l")
N <- length(e)
e1 <- c(NA, e[1:(N-1)])
head(data.frame(bond$paneldate, e, e1))
plot(e ~ e1)
cor(e, e1, use="complete")
abline(a=0, b=0.2761491, col="red", lwd=2)
?durbinWatsonTest
durbinWatsonTest(lm4, max.lag=1, alternative="positive")
nations <- read.csv("http://klein.uk/R/nations", header=T)
str(nations)
lm5 <- lm(birth ~ gnpcap + urban, data=nations); shccm(lm5)
e <- lm5$resid
par(mfrow=c(1,2))
plot(e ~ lm5$model[,2], main="gnpcap"); lines(lowess(cbind(lm5$model[,2], e), f=1), col=2)
plot(e ~ lm5$model[,3], main="urban"); lines(lowess(cbind(lm5$model[,3], e), f=1), col=2)
library(lmtest)
?resettest
resettest(lm5)
plot(subset(nations, select=c("birth","gnpcap","urban")))
summary(nations$gnpcap)
plot(density(nations$gnpcap), col="green", lwd=2)
grid.x <- seq(-10000, 20000, 1)
grid.y <- dnorm(grid.x, sd=sd(nations$gnpcap))
lines(grid.x, grid.y, col="blue", lwd=2)
legend("topright", legend=c("Density of gnpcap","Normal density"), fill=c("green","blue"))
nations$lgnp <- log(nations$gnpcap)
lm5b <- lm(birth ~ lgnp + urban, data=nations); shccm(lm5b)
eb <- lm5b$resid
par(mfrow=c(1,2))
plot(eb ~ lm5b$model[,2], main="lgnp"); lines(lowess(cbind(lm5b$model[,2], eb), f=1), col=2)
plot(e ~ lm5$model[,2], main="gnpcap"); lines(lowess(cbind(lm5$model[,2], e), f=1), col=2)
resettest(lm5b)
eaef21 <- read.csv("http://klein.uk/R/eaef21", header=T)
str(eaef21)
lm6a <- lm(EARNINGS ~ S + ASVABC, data=eaef21); shccm(lm6a)
e <- lm6a$res
plot(density(e), col="green", lwd=2)
grid.x <- seq(min(e),max(e),.1)
grid.y <- dnorm(grid.x, sd=sd(e))
lines(grid.x, grid.y, col="blue", lwd=2)
legend("topright", legend=c("Density of error terms","Normal density"), fill=c("green","blue"), )
library(graphics)
qqnorm(e); qqline(e, col = 2)
lm6b <- lm(log(EARNINGS) ~ S + ASVABC, data=eaef21); shccm(lm6b)
e <- lm6b$res
crime <- read.csv("http://klein.uk/R/crime", header=T)
str(crime)
plot(subset(crime, select=c("crime","pctmetro","poverty","single")))
plot(crime ~ pctmetro, data=crime, col="white"); text(x=crime$pctmetro, y=crime$crime, labels=crime$state)
plot(crime ~ poverty, data=crime, col="white"); text(x=crime$poverty, y=crime$crime, labels=crime$state)
plot(crime ~ single, data=crime, col="white"); text(x=crime$single, y=crime$crime, labels=crime$state)
lm7 <- lm(crime ~ pctmetro + poverty + single, data=crime); shccm(lm7)
crime$rstudent <- rstudent(lm7)
crime <- crime[order(crime$rstudent), ]
head(crime)
tail(crime)
subset(crime, abs(rstudent) > 2)
lm7b <- lm(crime ~ pctmetro + poverty + single,
data=subset(crime, state!="dc")); shccm(lm7b)
plot(crime ~ single, data=crime, col="white"); text(x=crime$single, y=crime$crime, labels=crime$state)
m.pctmetro <- mean(crime$pctmetro)
m.poverty <- mean(crime$poverty)
r.single <- seq(min(crime$single),max(crime$single),.1)
myReg <- function(x, model){
coef(model)%*%c(1, m.pctmetro, m.poverty, x)
}
y <- sapply(r.single, myReg, model=lm7); lines(x=r.single, y=y, col="red", lwd=2)
y <- sapply(r.single, myReg, model=lm7b); lines(x=r.single, y=y, col="blue", lwd=2)
legend("topleft", legend=c("OLS with DC","OLS without DC"), fill=c("red","blue"))
q("no")