rm(list=ls())
source("http://klein.uk/R/myfunctions.R")
ls()
data = read.csv("http://klein.uk/R/wineandwealth.csv")
str(data)
attach(data)
data
summary(data[seq(1,7,2)])
summary(data[seq(2,8,2)])
lmA <- lm(SalesA ~ IncomeA); summary(lmA)
lmB <- lm(SalesB ~ IncomeB); summary(lmB)
lmC <- lm(SalesC ~ IncomeC); summary(lmC)
lmD <- lm(SalesD ~ IncomeD); summary(lmD)
library(sfsmisc)
library(car)
plot(SalesA ~ IncomeA, main="Confidence/prediction bands/intervals for predicted sales of Almaden", xlim=c(0,15), ylim=c(0,14))
abline(lmA)
abline(v=15, h=sum(lmA$coef*c(1,15)), lty=2)
linesHyperb.lm(lmA, c.prob=0.95, confidence=TRUE, do.abline=TRUE, col="blue")
linesHyperb.lm(lmA, c.prob=0.95, confidence=FALSE, k=1)
legend("topleft", c("confidence band","prediction band"), col = c("blue","red"), lty=c(2,2))
abline(h=c(8.692466, 12.31044), col="blue")
abline(h=c(7.170113, 13.8328), col="red")
legend("topleft", c("confidence band","prediction band","confidence interval at 15K income","prediction interval at 15K income"), col = c("blue","red","blue","red"), lty=c(2,2,1,1), bg="white")
n = length(IncomeA)
RSE = sqrt(sum(lmA$resid^2 / (n-2)))
Xi = 15
Xbar = mean(IncomeA)
varX = var(IncomeA)
s.e.fitted = sqrt( RSE^2 * (1/n + ((Xi - Xbar)^2) / ((n-1)*varX)) ); s.e.fitted
x = data.frame(IncomeA = 15)
conf_a = predict(lmA, x, se.fit=T, level=.95, interval="confidence")
conf_a
x = data.frame(IncomeA = 15)
pred_a = predict(lmA, x, se.fit=T, level=.95, interval="prediction")
pred_a
library(stats)
vcov(lmA)
s.e.fitted
s.e.predicted = sqrt( RSE^2 * (1 + 1/n + ((Xi - Xbar)^2) / ((n-1)*varX)) ); s.e.predicted
quantile.t = qt(p=.025, df=9, lower.tail = FALSE); quantile.t
round(sum(lmA$coef*c(1,15)) + c(-1,+1)*quantile.t*s.e.fitted, digits = 2)
round(sum(lmA$coef*c(1,15)) + c(-1,+1)*quantile.t*s.e.predicted, digits = 2)
plot(SalesD ~ IncomeD, ylim=c(min(SalesD), max(SalesD)+1))
abline(lm(SalesD ~ IncomeD), col="red")
at = which(SalesD == max(SalesD)); at
text(x=IncomeD[at], y=SalesD[at], "Influential obs", pos=2)
hD = hat(model.matrix(lmD))
plot(hD)
abline(h=2*2/11)
sort(hD)
plot(SalesC ~ IncomeC, ylim=c(min(SalesC), max(SalesC)+1))
abline(lm(SalesC ~ IncomeC), col="red")
at <- which(SalesC == max(SalesC)); at
points(SalesC[at] ~ IncomeC[at], col="red")
text(x = IncomeC[at], y = SalesC[at], "Outlier", pos=2, col="red")
abline(lm(SalesC[-at] ~ IncomeC[-at]), col="blue")
legend("topleft", legend=c("with","~out"), fill=c("red","blue"))
sort(rstudent(lmC), decreasing=TRUE)
qt(.025, df=9, lower.tail=FALSE)
cookD <- cooks.distance(lmD)
plot(cookD)
round(cookD, 2)