# ------------------------------------------------------------------- # Master in Finance Econometrics Module # Thilo Klein # Lab Session 3: Generalized Least Squares # Required libraries: VGAM, zoo, timeDate, tseries source("http://klein.uk/R/myfunctions.R") ls() # ------------------------------------------------------------------- # --- Ex 1: Heteroskedasticity (1) ---------------- # --- Ex 1: a) --- # Use the data in hprice1.csv to obtain heteroskedasticity-robust standard errors # and homoskedastic-only standard errors for equation: price ~ lotsize + sqrft + bdrms. # Discuss any important difference with the usual homoskedasticity-only standard errors. house <- read.csv("http://klein.uk/R/hprice1", header=T) str(house) lm1a <- lm(price ~ lotsize + sqrft + bdrms, data=house) summary(lm1a) shccm(lm1a) # --- Ex 1: b) --- # Repeat part a) for log(price) ~ log(lotsize) + log(sqrft) + bdrms. lm1b <- lm(lprice ~ llotsize + lsqrft + bdrms, data=house) summary(lm1b) shccm(lm1b) # --- Ex 1: c) --- # What does this example suggest about heteroskedasticity and the transformation used # for the dependent variable? # refer to handout. # --- Ex 1: d) --- # Apply the full White-test for heteroskedasticity to part b). Which variables does it # apply? Using the chi-squared form of the statistic, obtain the p-value. What do you # conclude? house$lm1b.sqres <- lm1b$residuals^2 lm1b.white.test <- lm(lm1b.sqres ~ llotsize*lsqrft*bdrms - llotsize:lsqrft:bdrms + I(llotsize^2) + I(lsqrft^2) + I(bdrms^2), data=house); shccm(lm1b.white.test) T <- summary(lm1b.white.test)$r.squared * nrow(house) library(VGAM) pchisq(q=T, df=9, lower.tail=F) # =0.39 -> little evidence against homoskedasticity assumption # --- Ex 2: Heteroskedasticity (2) ---------------- training <- read.csv("http://klein.uk/R/training", header=T) str(training) # --- Ex 2: a) --- # Consider the simple regression model log(scrap)=b_1 + b_2 grant + u, where scrap is # the firm scrap rate, and grant is a dummy variable indicating whether a firm received # a job training grant. Can you think of sum reasons why the unobserved factors in u # might be correlated in grant? lm2a <- lm(log(scrap) ~ grant, data=training); summary(lm2a) par(mfrow=c(2,2)) plot(lm2a) # --- Ex 2: b) --- # Estimate a simple regression model. Does receiving a job-training grant significantly # lower a firm's scrap rate? lm2b <- lm(log(scrap) ~ grant, data=training); summary(lm2b) # --- Ex 2: c) --- # Now, add as an a explanatory variable log(scrap_1) (this variable is the scrap rate # of the previous year). How does this change the estimated effect of grant? Is it # statistically significant at the 5% level? lm2c <- lm(log(scrap) ~ grant + log(scrap_1), data=training); summary(lm2c) plot(lm2c) # --- Ex 2: d) --- # Test the null hypothesis that the parameter on log(scrap_1) is 1 against the # two-sided alternative. Report the p-value for the test. linearHypothesis(model=lm2c, "log(scrap_1) = 1") # --- Ex 2: e) --- # Repeat parts c) and d) using heteroscedasticity-robust standard errors, and briefly # discuss any notable differences. shccm(lm2c) linearHypothesis(model=lm2c, "log(scrap_1) = 1", white.adjust=TRUE) # linearHypothesis(model=lm2c, "log(scrap_1) = 1", white.adjust="hc3") # --- Ex 3: Autocorrelation ---------------- bond <- read.csv("http://klein.uk/R/bonds", header=T) str(bond) # --- Ex 3: a) --- # Regress changes in AAA bond returns (daaa) on US Treasury Bill interest rates (dus3mt). # Plot the residuals. Are the residuals distributed evenly across time? lm3a <- lm(daaa ~ dus3mt, data=bond); shccm(lm3a) library(zoo) bond$paneldate <- as.yearmon(bond$paneldate,format="%Ym%m") e <- lm3a$res plot(e ~ bond$paneldate, type="l") # --- Ex 3: b) --- # Investigate serial autocorrelation in residuals. Use the Breusch-Godfrey Serial # Correlation LM Test. N <- length(e) e1 <- c(NA, e[1:(N-1)]) e2 <- c(NA, NA, e[1:(N-2)]) head(data.frame(bond$paneldate, e, e1, e2)) plot(e ~ e1) cor(e, e1, use="complete") abline(a=0, b=0.2761491, col="red", lwd=2) # --- Breusch-Godfrey Serial Correlation LM Test: --- # 1. fit auxiliary regression of estimated error terms against all independent variables and # lagged estimated residuals up to order p (here: p=2) lm3bBG <- lm(e ~ bond$dus3mt + e1 + e2); shccm(lm3bBG) # 2. under H0: "no autocorrelation in error terms up to order p" the test statistic n*R^2 # follows a Chi-squared distribution with p degrees of freedom. n <- length(e) R2 <- summary(lm3bBG)$r.sq n*R2 # test statistic qchisq(p=0.95,df=2) # theoretical quantile of the Chi-squared distribution under H0 # or simply: library(lmtest) bgtest(daaa ~ dus3mt, data=bond, order=2, type="Chisq") # --- Durbin-Watson Test for Autocorrelated Errors --- ?durbinWatsonTest durbinWatsonTest(lm3a, max.lag=1, alternative="positive") # or: dwtest(daaa ~ dus3mt, data=bond, alternative="greater") # --- Ex 4: Non-linearity in variables ---------------- # --- Ex 4: a) --- # Fit a regression model of volume on t (a time trend) nyse <- read.csv("http://klein.uk/R/nysevolume", header=T) str(nyse) lm4a <- lm(volume ~ t, data=nyse); shccm(lm4a) # --- Ex 4: b) --- # Examine the residuals. Assess the linearity of the relation between volume and # the time trend. par(mfrow=c(2,2)) plot(lm4a) par(mfrow=c(1,1)) plot(volume ~ t, data=nyse) abline(lm4a, col="red", lwd=2) # --- Ex 4: c) --- # Generate a log transformation of the variable volume, call it logvol. Run the # regression in a. with this new variable. What happens now with the residual? lm4c <- lm(log(volume) ~ t, data=nyse); shccm(lm4c) par(mfrow=c(2,2)) plot(lm4c) # --- Ex 4: d) --- # Now run the following model (and analyse again the residuals): # log(volume) ~ t + t^2 lm4d <- lm(log(volume) ~ t + I(t^2), data=nyse); shccm(lm4d) plot(lm4d) # --- Ex 5: Normality ---------------- # --- Ex 5: a) --- # Regress changes in AAA bond returns (daaa) on US Treasury Bill interest rates (dus3mt). # Obtain the histogram of the residuals. bond <- read.csv("http://klein.uk/R/bonds", header=T) str(bond) lm5a <- lm(daaa ~ dus3mt, data=bond); shccm(lm5a) hist(lm5a$res, breaks=30) library(timeDate) sk <- skewness(lm5a$res, method="moment"); sk ku <- kurtosis(lm5a$res, method="moment"); ku # --- Ex 5: b) --- # Analyse the Jarque-Bera test of normality results. n <- length(lm5a$res) (n-2)/6 *( sk^2 + ((ku - 3)^2)/4 ) # Jarque-Bera test statistic (handout, p.7) qchisq(p=0.95, df=2) # theoretical distribution under the null # or simply: library(tseries) jarque.bera.test(lm5a$res) # --- Ex 6: Outliers ---------------- crime <- read.csv("http://klein.uk/R/crime", header=T) str(crime) # --- Ex 6: a) --- # A regression model for crime might have pctmetro, poverty, and single as independent # variables. Run this regression and plot the residuals. lm6a <- lm(crime ~ pctmetro + poverty + single, data=crime); shccm(lm6a) par(mfrow=c(2,2)) plot(lm6a) # --- Ex 6: b) --- # Identify what is the observation that is an outlier. crime$rstudent <- rstudent(lm6a) crime <- crime[order(crime$rstudent), ] head(crime) tail(crime) # --- Ex 6: c) --- # Try to solve this problem adding to the regression an impulse dummy variable. crime$DC <- ifelse(crime$state=="dc", 1, 0) lm6c <- lm(crime ~ pctmetro + poverty + single + DC, data=crime); shccm(lm6c) lm6c <- lm(crime ~ pctmetro + poverty + single, data=subset(crime, state!="dc")); shccm(lm6c) # --- Digression: Graphical Analysis of outlier --- plot(crime ~ single, data=crime, col="white"); text(x=crime$single, y=crime$crime, labels=crime$state) m.pctmetro <- mean(crime$pctmetro) m.poverty <- mean(crime$poverty) r.single <- seq(min(crime$single),max(crime$single),.1) myReg <- function(x, model){ coef(model)%*%c(1, m.pctmetro, m.poverty, x) } y <- sapply(r.single, myReg, model=lm6a) lines(x=r.single, y=y, col="red", lwd=2) y <- sapply(r.single, myReg, model=lm6c) lines(x=r.single, y=y, col="blue", lwd=2) legend("topleft", legend=c("with DC","without DC"), fill=c("red","blue")) # ------------------------------------------------------------------- # --- End of Session ------------------------------------------------ q("no")