rm(list=ls())
source("http://klein.uk/R/myfunctions.R")
ls()
data("CASchools", package = "AER")
CASchools$stratio = with(CASchools, students/teachers)
CASchools$score = with(CASchools, (math + read)/2)
attach(CASchools)
par(mfrow = c(2, 2))
plot(score ~ income, main = "district average income")
lm1 = lm(score ~ income)
shccm(lm1)
abline(lm1, col = "blue", lwd=2)
plot(lm1, which = 1)
income2 = income * income
lm2 = lm(score ~ income + income2)
shccm(lm2)
or = order(income)
plot(score ~ income, main = "district average income")
abline(lm1, col = "blue", lwd = 2)
lines(lm2$fitted[or] ~ income[or], col = "red", lwd = 2)
legend("bottomright", c("linear", "quadratic"), lwd = 2,
col = c("blue", "red"))
plot(lm2, which = 1)
library(car)
lht(lm2, c("income","income2"))
lm2$coef["income"] + 2 * 50 * lm2$coef["income2"]
lht(lm2, "income + 100*income2")
par(mfrow=c(1,1))
plot(score ~ income, main = "district average income")
abline(lm1, col = "blue", lwd=2)
lm2 = lm(score ~ income + income2)
lines(income[or], fitted(lm2)[or], col = "red", lwd = 2)
shccm(lm2)
income3 = income * income * income
lm3 = lm(score ~ income + income2 + income3)
lines(income[or], fitted(lm3)[or], col = "green", lwd = 2)
shccm(lm3)
lmp = lm(score ~ poly(income, 10))
smooth = list(income = seq(5, 70, 0.5))
lines(smooth$income, predict(lmp, newdata = smooth),
col = "magenta", lwd = 2)
legend("bottomright", c("linear", "quadratic", "cubic",
"10th-deg"), lwd = 2, col = c("blue", "red", "green", "magenta"))
z = (income - mean(income) / sd(income))
lm4 = lm(score ~ z + I(z^2) + I(z^3))
shccm(lm4)
vif(lm3); vif(lm4)
plot(score ~ income, main = "district average income")
abline(lm1, col = "blue", lwd=2)
lines(income[or], fitted(lm2)[or], col = "red", lwd = 2)
legend("bottomright", c("linear", "quadratic", "lin-log",
"log-lin", "log-log"), lwd = 3, col = c("blue", "red",
"green", "black", "orange"))
lmLiLo = lm(score ~ log(income)); lmLiLo
lines(income[or], fitted(lmLiLo)[or], col = "green", lwd = 3)
lmLiLo$coef[2]/median(income)
lmLoLi = lm(log(score) ~ income); lmLoLi
lines(income[or], exp(fitted(lmLoLi))[or], col = "black", lwd = 3)
lmLoLo = lm(log(score) ~ log(income)); lmLoLo
lines(income[or], exp(fitted(lmLoLo))[or], col = "orange", lwd = 3)
fridge = read.csv("http://klein.uk/R/fridge2.csv")
str(fridge)
attach(fridge)
summary(fridge[, c("ecost", "price")])
plot(price ~ ecost, main="Energy cost and refrigerator price")
lm1 = lm(price ~ ecost); summary(lm1)
abline(lm1, col="blue", lwd=2)
scatterplotMatrix(fridge[, 1:3])
cor(fridge[, 1:3])
library(ellipse)
corC = cor(fridge[,1:3])
colors = c("#A50F15","#DE2D26","#FB6A4A","#FCAE91","#FEE5D9","white",
"#EFF3FF","#BDD7E7","#6BAED6","#3182BD","#08519C")
plotcorr(corC, col=colors[5*corC + 6], type = "lower")
lm2 = lm(price ~ volume + ecost); summary(lm2)
library(car)
vif(lm2)
freezer = as.factor(freezer)
levels(freezer) = c("bottom","top","side")
lm3 = lm(price ~ volume + ecost + freezer); summary(lm3)
lm3$coef["(Intercept)"]
lm3$coef["(Intercept)"] + lm3$coef["freezertop"]
lm3$coef["(Intercept)"] + lm3$coef["freezerside"]
lht(lm3, "freezerside"); lht(lm3, "freezertop")
freezer = relevel(freezer, ref="side")
lm4 = lm(price ~ volume + ecost + freezer); summary(lm4)
lm.top = lm(price ~ volume + ecost, data=fridge[freezer=="top",]); summary(lm.top)
lm.bottom = lm(price ~ volume + ecost, data=fridge[freezer=="bottom",]); summary(lm.bottom)
lm.side = lm(price ~ volume + ecost, data=fridge[freezer=="side",]); summary(lm.side)
lm6 = lm(price ~ volume + ecost + freezer + volume:freezer + ecost:freezer); summary(lm6)
vif(lm6)
lht(lm6, c("volume:freezerbottom","volume:freezertop","ecost:freezerbottom","ecost:freezertop"))
levels(freezer)[2:3] = "topbottom"
lm6 = lm(price ~ volume + ecost + freezer + volume:freezer + ecost:freezer); summary(lm6)
lht(lm6, c("volume:freezertopbottom", "ecost:freezertopbottom"))