# -------------------------------------------------------------------
# MPO1 Quantitative Research Methods
# Thilo Klein
# Lab Session 2: Linear Regression in R; Good Practice Guide

# Required libraries: .
  source("http://klein.uk/R/myfunctions.R")
  ls()
# -------------------------------------------------------------------




# --- Ex 1: Run Script-file from command line ---------------------------
 # print("hello, world")
 # setwd("C:/Dokumente und Einstellungen/Thilo/Desktop/Lab_Sessions/Session2/Data")
 # source("hello.R")


# --- Ex 2: The linear model ---------------------------
# --- Ex 2: a) ---
# Review the contents and regress employment growth on GDP growth. Provide an 
# interpretation of the results.

 growth <- read.csv("http://klein.uk/R/growth",header=T,sep=",")
 str(growth)
 lm2 <- lm(empgrow ~ GDPgrow, data=growth); summary(lm2)


# --- Ex 2: b) ---
# Visually inspect data and regression line. 

 plot(empgrow ~ GDPgrow, data=growth)
 abline(lm2, col="red")


# --- Ex 2: c) ---
# Are the coefficients significant? 

 # compare t-value of b2 (5.8) to 97.5% quantile of t-dist (2.07) and reject H0: b2=0.
 qt(p=1-0.025, df=23)
 qt(p=0.025, df=23, lower.tail=F)


# --- Ex 2: f) ---
# Build a confidence interval for the slope. 

 b_1 <- lm2$coef[1]
 sd_b1 <- summary(lm2)$coef[1,2]
 t_0.975 <- qt(p=1-0.025, df=23)
 b_1 + c(-1,1)*sd_b1*t_0.975            # 95%-confidence interval for b_1




# --- Ex 3: The linear model with quadratic terms --------------------------------
 house <- read.csv("http://klein.uk/R/housing",header=T,sep=",")
 str(house)


# --- Ex 3: a) ---
# Estimate a model to test this, using total expenditure as a proxy for total income.

 lm3a <- lm(housing ~ total, data=house)
 summary(lm3a)


# --- Ex 3: b) ---
# Is a quadratic form more appropriate?

 house$totalsq <- house$total^2
 lm3b <- lm(housing ~ total + totalsq, data=house)
 summary(lm3b)




# --- Ex 4: Extrapolation and accuracy of least least squares --------------------------------

 eaef <- read.csv("http://klein.uk/R/eaef",header=T,sep=",")
 str(eaef)

 lm4 <- lm(weight ~ height, data=eaef)
 summary(lm4)




# --- Ex 5: Estimates for changing units of measurement --------------------------------

 eaef$weight_grams <- eaef$weight*454           # one pound is 454 grams
 eaef$height_metric <- eaef$height * 2.54               # one inch is 2.54 cm

 lm(weight_grams ~ height, data=eaef)
 5.562*454   # =2525.148

 lm(weight ~ height_metric, data=eaef)
 5.562496/2.54   # =2.189959




# --- Ex 6: Multiple linear regression --------------------------------

 hprice1 <- read.csv("http://klein.uk/R/hprice1",header=T,sep=",")
 str(hprice1)

 lm6 <- lm(price ~ sqrft + bdrms, data=hprice1)
 summary(lm6)




# --- Ex 7: Confidence intervals of regression coefficients (1) --------------------------------

 oilprice1 <- read.csv("http://klein.uk/R/oilprice1",header=T,sep=",")
 str(oilprice1)

 plot(price ~ api, data=oilprice1)
 lm7 <- lm(price ~ api, data=oilprice1)
 summary(lm7)

 t <- qt(p=0.975, df=13-2)              # quantile of t distribution

 str(lm7)
 b <- lm7$coef[2]                               # coefficient

 str(summary(lm7))
 se_b <- summary(lm7)$coef[2,2] # standard error of coefficient

 b + c(-1,1)*se_b*t                     # confidence interval

 ls(); rm(b, t, se_b)                   # remove objects if no longer needed!




# --- Ex 8: Reversal of regressor and regressand --------------------------------

 lm(earnings ~ schooling, data=eaef)
 lm(schooling ~ earnings, data=eaef)

 ?cor
 cor(eaef$earnings, eaef$schooling, method="pearson")




# --- Ex 9: Regression against a constant (optional) --------------------------------

 summary(eaef$weight)
 lm(weight ~ 1, data=eaef)
 # lm(weight ~ -1 + c(rep(2,540)), data=eaef)




# --- Ex 10: Confidence intervals for regression coefficients (2) --------------------------------

 t5 <- qt(p=0.025, df=60-2, lower.tail=F)       # t-quantile 5%
 t1 <- qt(p=0.005, df=60-2, lower.tail=F) # t-quantile 1%

 myCI <- function(b, se, t){    # function to generate CI
   b + c(-1,1)*se*t
 }

 myCI(-.2, .07, t5)     # CI for beta=-0.2, se_beta=0.07; at 5% level
 myCI(-.12, .07, t5)    # CI for beta=-0.12, se_beta=0.07; at 1% level





# -------------------------------------------------------------------
# --- End of Session ------------------------------------------------

 q("no")