# ------------------------------------------------------------------- # MPO1-A Advanced Quantitative Research Methods # Thilo Klein # Lab Session 3: Endogeneity, IV and SEM # Libraries: lmtest, sandwich, AER, systemfit setwd("C:/Dokumente und Einstellungen/Thilo/Desktop/Lab_SessionsLT/Data") source("http://klein.uk/R/myfunctions.R") ls() # ------------------------------------------------------------------- # --- Ex 1: IV regression, Sargan and Hausman test (1) ----------------------------- bond <- read.csv("http://klein.uk/R/Lent/bonds.csv", header=T) str(bond) # --- Ex 1: c) --- data.frame(bond$paneldate, bond$dus3mt, c(NA, bond$dus3mt[-600])) bond$dus3mt_1 <- c(NA, bond$dus3mt[-600]) bond$dus3mt_2 <- c(NA, NA, bond$dus3mt[c(-599, -600)]) data.frame(bond$paneldate, bond$dus3mt, bond$dus3mt_1, bond$dus3mt_2) ## checking instrument relevance: lm1c <- lm(dus3mt ~ dus3mt_1 + dus3mt_2, data=bond); shccm(lm1c) # --- Ex 1: e) --- # 2SLS ## convenience functions: HC0 and HC1 covariances hc0 <- function(x) vcovHC(x, type = "HC0") hc1 <- function(x) vcovHC(x, type = "HC1") lm1e.1 <- lm(dus3mt ~ dus3mt_1 + dus3mt_2, data=bond) library(lmtest) library(sandwich) coeftest(lm1e.1, vcov=hc0) linearHypothesis(lm1e.1, c("dus3mt_1=0","dus3mt_2=0"), vcov=hc0) lm1e.2 <- lm(daaa ~ lm1e.1$fitted, data=bond[is.na(bond$dus3mt_2)==FALSE,]) coeftest(lm1e.2, vcov=hc0) library(AER) # help("StockWatson2007", package="AER") # help("Greene2003") # help("Baltagi2002") # help("CameronTrivedi1998") # help("Franses1998") # ?ivreg lm1e.iv <- ivreg(daaa ~ dus3mt | dus3mt_1 + dus3mt_2, data=bond) coeftest(lm1e.iv, vcov=hc0) # errors are different for iv and manual 2SLS! # compare to OLS estimates: lm1e.ols <- lm(daaa ~ dus3mt, data=bond); coeftest(lm1e.ols, vcov=hc0) # --- Ex 1: f) --- # Sargan test for overidentifying restrictions (validity of instruments) ## Sargan test from Stock and Watson (2007) lm1f <- lm(lm1e.iv$res ~ dus3mt_1 + dus3mt_2, data=bond[is.na(bond$dus3mt_2)==FALSE,]) lm1f.or <- linearHypothesis(lm1f, c("(Intercept)=0", "dus3mt_1=0", "dus3mt_2=0"), test="Chisq", vcov=hc0) ## WARNING: df (and hence p-value) invalid above. ## correct df: no. instruments - no. endogenous variables pchisq(q=lm1f.or[2,3], lm1e.iv$df.res - lm1f$df.res, lower.tail = FALSE) ## Sargan test from handout, Lab Session 3 ## Step 2. Auxiliary regression: lm.aux <- lm(lm1e.iv$res ~ dus3mt_1 + dus3mt_2, data=bond[is.na(bond$dus3mt_2)==FALSE,]) R2 <- summary(lm.aux)$r.squared LM <- length(lm.aux$res) * R2 # LM is chi.sqr_{m-k} m = no. instruments = 3; k = no. regressors = 2 pchisq(q = LM, 2 - 1, lower.tail = FALSE) # --- Ex 1: g) --- # Hausman tests for exogeneity of regressors ## Hausman test from Baltagi (2002) cf_diff <- coef(lm1e.iv) - coef(lm1e.ols) vc_diff <- vcov(lm1e.iv) - vcov(lm1e.ols) x2_diff <- as.vector(t(cf_diff) %*% solve(vc_diff) %*% cf_diff) # is Chi^2 with degrees of freedom (df) = rank of matrix vc_diff pchisq(q = x2_diff, df = 2, lower.tail = FALSE) ## Durbin-Wu-Hausman test from handout, Lab Session 3 ## Step 1. dwh1 <- lm(daaa ~ dus3mt, data=bond) bond$e <- dwh1$res ## Step 2. dwh2 <- lm(dus3mt ~ dus3mt_1 + dus3mt_2, data=bond) bond$v <- c(NA, NA, dwh2$res) ## Step 3. dwh3 <- lm(e ~ dus3mt + v, data=bond); shccm(dwh3) ## Lagrange Multiplier Statistic: LM <- length(dwh3$res) * summary(dwh3)$r.squared # is Chi^2_{k-k0} with # k = no. regressors = 2; # k0 = no. regressors potentially endogenous = 1. # H0: All regressors are exogenous. p-value of the LM statistic: pchisq(q = LM, df = 1, lower.tail = FALSE) ## http://www.lsw.wiso.uni-erlangen.de/studium/grundstudium/07ss/statistik2/uebung/11_code.R lm1g <- lm(dus3mt ~ dus3mt_1 + dus3mt_2, data=bond) summary( lm(daaa ~ dus3mt + lm1g$res, data=bond[is.na(bond$dus3mt_2)==FALSE,]) ) summary( lm(daaa ~ dus3mt + lm1g$res, data=bond[is.na(bond$dus3mt_2)==FALSE,]) ) # What did we do? # 1. Made price exogenous by creating a new price variable. This variable is formed by # regressing all the exogenous variables on price, and taking the fitted value as the new # variable. The individual exogenous variables (as well as the new fitted value) are called # instrumental variables, or instruments. Since the new variable is created from exogenous # variables, it should not be correlated with the disturbance term, and can be considered # exogenous. # 2. Ran a regression for the demand function in which you included the residual left over when # you created the new (exogenous) price variable. The Hausman test is simple: look at the # t-statistic on the residual's coefficient. Your null hypothesis is that there is no # endogeneity. If the t-statistic is high enough, you can reject the null hypothesis. # --- Ex 2: IV regression, Sargan and Hausman test (2) ----------------------------- mroz <- read.csv("http://klein.uk/R/Lent/mroz.csv", header=T) str(mroz) # --- Ex 2: a) --- # Graph a histogram for wage and lwage, and comment on the shape of the distribution. table(ifelse(mroz$hours>0,1,0)) # There are a total of 753 women in the sample of which 428 work. # Note that log-wages are missing for those women that don't work. par(mfrow=c(1,2)) hist(mroz$wage) # The distribution of wages is clearly skewed to the right, a large tail of high # wages, with a maximum of $25. Remember: the mean is $4.18. hist(mroz$lwage) # The distribution of log wages is more symmetric. Wages are often lognormally # distributed, which means that log wages are normally distributed. These are # typical patterns for wages. # --- Ex 2: b) --- # For the remainder of this exercise, retain only the women that work in the data set. mroz.w <- subset(mroz, hours>0) # --- Ex 2: c) --- # Estimate the model: lwage ~ educ + exper + I(exper^2). How would you interpret # the coefficients? Does estimated values make sense? Which kind of problems may # these present? lm2c.ols <- lm(lwage ~ educ + exper + I(exper^2), data=mroz.w); shccm(lm2c.ols) # The OLS coeffient on educ suggests an 11% return. The average return to log # wages of an extra year of experience is given by 0.042-(0.0008*2)*exper = 0.021 # at the mean of exper (=13.03), or a 2.5% increase in wages for an additional year # of experience at that level. # Omitted ability is probably biasing the results upwards. Ability is correlated # with education but it is not in the regression (then is in the error term). Then # educ is correlated with the error term. In other words educ is endogenous. # --- Ex 2: d) --- # Estimate the model by manual 2SLS (do not use ivreg yet), using fatheduc and # motheduc as instruments for educ. How do the results differ from those obtained # with educ? ## 1st stage regression: lm2d.1 <- lm(educ ~ fatheduc + motheduc, data=mroz.w) # fatheduc and motheduc both explain educ, the reduced form coefficients are significant ## 2nd stage: # lm2d.1$fitted acts as an instrumental variable for educ. lm2d.2 <- lm(lwage ~ lm2d.1$fitted + exper + I(exper^2), data=mroz.w) # There is a reduction in the return to education to 6%. This was expected if the # effect was to collect the bias created by the omitted variable ability, as the # bias with the OLS regression should be positive. Of course, other IVs could # have delivered different estimations of the rate of return. If this were the # case other explanations should be presented. These could be, for instance, that # IV captures some error in measurement or others. coeftest(lm2d.2, vcov=hc0) # compare to 2SLS to OLS: coeftest(lm2c.ols,vcov=hc0) # --- Ex 2: e) --- # Do fatheduc and motheduc explain educ? Are these good instruments? shccm(lm2d.1) # --- Ex 2: f) --- # A problem with 2SLS is that the standard errors in the second stage are wrong. # The correct standard errors are provided by the ivreg command in R. # Comment on the differences. lm2f.iv <- ivreg(lwage ~ educ + exper + I(exper^2) | exper + I(exper^2) + fatheduc + motheduc, data=mroz.w) coeftest(lm2f.iv, vcov=hc0) # errors are different for ivreg and manual 2SLS! # compare to manual 2SLS estimates and OLS: coeftest(lm2d.2, vcov=hc0) coeftest(lm2c.ols,vcov=hc0) # There are only very minor changes in the standard errors. # Note that the se for educ is quite a bit larger than the OLS one. # --- Ex 2: g) --- # Perform the test for overidentifying restrictions, i.e.: whether the instruments # are uncorrelated with u. # Sargan test from Stock and Watson (2007) lm2g.or <- lm(lm2f.iv$res ~ exper + I(exper^2) + fatheduc + motheduc, data=mroz.w) lm2g.or.test <- linearHypothesis(lm2g.or, c("fatheduc=0", "motheduc=0"), test="Chisq", vcov=hc0) ## WARNING: df (and hence p-value) invalid above. ## correct df: # instruments - # endogenous variables pchisq(q=lm2g.or.test[2,3], df = lm2f.iv$df.res - lm2g.or$df.res, lower.tail=F) ## Sargan test from handout, Lab Session 3 ## Step 2. Auxiliary regression: lm.aux <- lm(lm2f.iv$res ~ exper + I(exper^2) + fatheduc + motheduc, data=mroz.w) R2 <- summary(lm.aux)$r.squared LM <- length(lm.aux$res) * R2 # LM is chi.sqr_{m-k} m = no. instruments = 5; k = no. regressors = 4 pchisq(q = LM, df = 5-4, lower.tail=F) # The p-values are 0.5088 and 0.5386 respectively. We don't reject that the # instruments are valid in that they are not correlated with the errors. # --- Ex 2: h) --- # Test for endogeneity of educ. ## Durbin-Wu-Hausman test from handout, Lab Session 3 ## Step 1. Create OLS residuals. Already done! e <- lm2c.ols$res ## Step 2. Regress every endogenous regressor (in this case only educ) ## on z (constant, fatheduc, motheduc, exper, expersq) and obtain residuals. Done! v <- lm2h.1$res ## Step 3. dwh3 <- lm(e ~ educ + exper + expersq + v, data=mroz.w); shccm(dwh3) ## Lagrange Multiplier Statistic: LM <- length(dwh3$res) * summary(dwh3)$r.squared # is Chi^2_{k-k0} with # k = no. regressors = 2; # k0 = no. regressors potentially endogenous = 1. # H0: All regressors are exogenous. p-value of the LM statistic: pchisq(q = LM, df = 1, lower.tail = F) ## Or: lm2h.1 <- lm(educ ~ fatheduc + motheduc + exper + I(exper^2), data=mroz.w) lm2h.2 <- lm(lwage ~ educ + exper + I(exper^2) + lm2h.1$res, data=mroz.w) coeftest(lm2h.2, vcov=hc0) # The coefficient on lm2h.i$res is not significant at 10%, and so endogeneity # is debatable. ## Hausman test for exogeneity of regressors: dwh.test <- function(model.iv, model.ols){ cf_diff <- coef(model.iv) - coef(model.ols) vc_diff <- vcovHC(model.iv, "HC0") - vcovHC(model.ols, "HC0") x2_diff <- as.vector(t(cf_diff) %*% solve(vc_diff) %*% cf_diff) pchisq(q = x2_diff, df = dim(vc_diff)[1], lower.tail = F) } # execute dwh.test-function: dwh.test(lm2f.iv, lm2c.ols) # As they were designed the DWH test (p-value 0.09) and the Hausman test (0.16) # do not reject the hypothesis of exogeneity at 5%. Note, however, that also # exper and expersq could be potentially endogenous. Other instruments should be # added, should we consider them as such. # --- Ex 3: ----------------------------- crime <- read.csv("http://klein.uk/R/Lent/crime.csv", header=T) str(crime) # --- Ex 3: a) --- # Observe the data. Regress crime on a constant and police. Give a possible # explanation of the estimated positive effect. plot(crime ~ police, data=crime) lm3a.ols <- lm(crime ~ police, data=crime); shccm(lm3a.ols) abline(lm3a.ols, col="red", lwd=2) # --- Ex 3: d) --- # Use the data to estimate coefficients by instrumental variables, using z (and a # constant) as instruments. Check that the result of c) holds true. Provide an # interpretation of the resulting estimate. lm3e.iv <- ivreg(crime ~ police | election, data=crime) coeftest(lm3e.iv) # Applying property in c) y1 <- mean(crime$crime[crime$election==1]) x1 <- mean(crime$police[crime$election==1]) y0 <- mean(crime$crime[crime$election==0]) x0 <- mean(crime$police[crime$election==0]) b.iv <- (y1-y0)/(x1-x0) paste("IV estimation through formula for dummy IVs:", b.iv) # now the sign is as expected! # --- Ex 3: e) --- # Perform the Hausman test on the exogeneity of the variable x. dwh.test(lm3e.iv, lm3a.ols) abline(lm3e.iv, col="blue", lwd=2) legend("topleft", legend=c("OLS","2SLS"), fill=c("red","blue")) # --- Ex 4: ----------------------------- bw <- read.csv("http://klein.uk/R/Lent/brwght.csv", header=T) str(bw) # --- Ex 4: a) --- # Estimate using OLS. lm4.ols <- lm(lbwght ~ packs, data=bw) coeftest(lm4.ols) # --- Ex 4: d) --- # Estimate by IV using cigprice as an instrument. lm4.iv <- ivreg(lbwght ~ packs | cigprice, data=bw) coeftest(lm4.iv) lm4.1 <- lm(packs ~ cigprice, data=bw); coeftest(lm4.1) # --- Simultaneous equation models (SEM) --- # --- Ex 5: ----------------------------- # --- Ex 5: c) --- # Simulate n=100 observations from this model with a=0, b=0.5 (therefore the # multiplier is equal to 2), the perturbations are distributed as standard normals, # G is distributed normal with mean 10 and variance 1. Applying the formula above: # plim(b) = 0.5 + 0.5/3 = 0.67, and the multiplier would be around 3. Note that # estimating the consumption equation by OLS would overestimate the real multiplier, # which is 2. (Try again with a sample of 50 and 10000 and note the differences with # the predicted values). epsilon1 <- rnorm(1000) epsilon2 <- rnorm(1000) g <- 10 + rnorm(1000) d <- (1/(1-0.5))*g + (epsilon1 + epsilon2)/(1-0.5) # d was created using the reduced form. c <- 0.5*d + epsilon1 lm5c <- lm(c ~ d) beta <- lm5c$coef[2] multiplier <- 1/(1-beta) paste("The multiplier estimated with OLS is equal to", multiplier) # spot on! # --- Ex 5: d) --- # Apply instrumental variables to obtain better estimators. Report the value for # the multiplier estimated. # As g is exogenous and we need two instruments, we use g and a constant as instruments. sim <- data.frame(c=c, d=d, g=g) lm5d <- ivreg(c ~ d | g, data=sim) beta <- lm5d$coef[2] multiplier <- 1/(1-beta) paste("The multiplier estimated with IV is equal to", multiplier) # spot on! # --- Ex 6: ----------------------------- orange <- read.csv("http://klein.uk/R/Lent/oranges.csv", header=T) str(orange) # --- Ex 6: a) --- # Estimate the price equation using OLS. Test the null hypothesis that # price elasticity = -1. lm6a.ols <- lm(logpt ~ logqt + logrit, data=orange) linearHypothesis(lm6a.ols, "logqt = -1", vcov=hc0) # The p-value for the F-statistic is equal to 0.04, therefore the null is # rejected at 5%-level. # --- Ex 6: b) --- # Estimate the price equation also by IV, using log(ACt) and log(APt) as # instruments for log(Q). Test again the null hypothesis of unit price elasticity. lm6b.iv <- ivreg(logpt ~ logqt + logrit | logac + logap + logrit, data=orange) linearHypothesis(lm6b.iv, "logqt = -1", vcov=hc0) # The p-value for the F-statistic is now equal to 0.002, therefore the null is # rejected at 1% level of significance. # --- Ex 6: c) --- # Perform the Hausman test for the exogeneity of log(Qt) in the price equation. dwh.test(lm6b.iv, lm6a.ols) # by yourself: # Step 1. lm6c.i <- lm(logpt ~ logqt + logrit, data=orange) ehat <- lm6c.i$res # Step 2. lm6c.ii <- lm(logqt ~ logrit + logac + logap, data=orange) vhat <- lm6c.ii$res # Step 3. lm6c.iii <- lm(ehat ~ logqt + logrit + vhat, data=orange) LM <- dim(model.matrix(lm6c.iii))[1] * summary(lm6c.ii)$r.sq paste("LM statistic is =", LM) paste("p-value of the LM statistic =", pchisq(q = LM, df=1, lower.tail=F) ) # The results are not clear. With DWH test, we reject exogeneity at a 1% level # with Hausman test we do not reject. # --- Ex 6: d) --- # Investigate the quality of the instruments, that is, whether they are # sufficiently correlated with log(Q_t) and uncorreated with the price shocks e_t # (take the IV residuals as estimates of the shocks). lm6d <- lm(logqt ~ logrit + logac + logap, data=orange) shccm(lm6d) # The R2 is .93 proof of a good correlation between endogenous variable and # instruments. Only past advertisements are insignificant. # To test for the exogeneity of the instruments we apply a Sargan test. lm6d.or <- lm(lm6b.iv$res ~ logac + logap + logrit, data=orange) lm6d.or.test <- linearHypothesis(lm6d.or, c("logac=0", "logap=0", "logrit"), test="Chisq", vcov=hc0) ## WARNING: df (and hence p-value) invalid above. ## correct df: # instruments - # endogenous variables pchisq(q=lm6d.or.test[2,3], df = lm6b.iv$df.res - lm6d.or$df.res, lower.tail=F) # Therefore the hypothesis of instruments being exogenous is rejected. One # possible explication is that there is some simultaneous determination of # quantities and advertisement expenditures (example: high prices raising proffits # and then advertisement and quantities supplied. # --- Ex 6: e) --- # Answer questions b), c) and d) also for the n=45 observations obtained by # excluding the data over the period 1942-6. # Left for the student, the conclusions are in line with what we observed so far. # --- Ex 6: f) --- # Is the demand equation identified? Estimate this equation by OLS, and motivate # your choice. # to answer this question we apply the order condition: k - k1 >= m1 # where # k = total number of exogenous variables in the model; # k1 = total number of exogenous variables in the first equation; # m1 = number of endogenous variables in the same equation. # k = 2: the constant and the variable log(RI). # When we analyse the demand equation: the exogenous variables are the same, so 2 # again. # m1 = 1: log(Q). The order condition is not satisfied: 0<1. Then the demand # equation is not identified. # The estimation by OLS was already discussed in parts a, b, c. The equation 1 # cannot be estimated as part of a # model as the SEM designed for parts f and g. # --- Ex 6: g) --- # Is the supply equation identified? Estimate this equation by a method that you # find most appropriate, and motivate your choice. # The order condition is now satisfied. k = 2; k2 = 1; m2 = 1. Now: k - k2 = 1 = m2 # the reason to be satisfied is that it satisfies the exlcusion condition, as # log(RI) is excluded. # to estimate it within the scope of this model we should apply two-stage # least squares using the constant and log(RI) as instrumets. lm6g.iv <- ivreg(logpt ~ logqt | logrit, data=orange) coeftest(lm6g.iv) # The slope is now negative, contrary to what it should have been expected, # and insignificant. So estimations in part c) (with more IVs) seem to be the # best estimations for this model. # --- Ex 7: ----------------------------- smoke <- read.csv("http://klein.uk/R/Lent/smoke.csv", header=T) str(smoke) # --- Ex 7: d) --- # Estimate the income equation by OLS and discuss the estimate of beta_1. lm7d <- lm(lincome ~ cigs + educ + age + agesq, data=smoke) shccm(lm7d) # --- Ex 7: e) --- # Estimate the reduced form for cigs. (Recall that this entails regressing cigs # on all exogenous variables.) Are log(cigpric) and restaurn significant in the # reduced form? lm7e <- lm(cigs ~ educ + age + agesq + lcigpric + restaurn, data=smoke) coeftest(lm7e) linearHypothesis(lm7e, c("lcigpric=0", "restaurn=0"), vcov=hc0) # --- Ex 7: f) --- # Now, estimate the income equation by 2SLS. Discuss how the estimate of beta_1 # compares with the OLS estimates. lm7f.iv <- ivreg(lincome ~ cigs + educ + age + agesq | lcigpric + restaurn + educ + age + agesq, data=smoke) coeftest(lm7f.iv, vcov=hc0) # --- Ex 8: ----------------------------- open <- read.csv("http://klein.uk/R/Lent/openness.csv", header=T) str(open) # --- Ex 8: a) --- # To confirm the last assertion, estimate the reduced form for open. Is the first # equation identified? Estimate it using a constant and log(land) as IVs (2SLS). lm8a.ols <- lm(open ~ lpcinc + lland, data=open); coeftest(lm8a.ols) lm8a.iv <- ivreg(inf ~ open + lpcinc | lland + lpcinc, data=open); coeftest(lm8a.iv) # Note that the coefficient on open is statistically significant and economically # important, and the sign of alpha1 negative, as expected. # --- Ex 8: b) --- # Because log(pcinc) is insignificant in both estimations so far [CHECK THIS], # drop it from the analysis. Estimate by OLS and IV without log(pcinc). Do any # important conclusions change? lm8b.iv <- ivreg(inf ~ open | lland, data=open); coeftest(lm8b.iv) # --- Ex 8: c) --- # Still leaving log(pcinc) out of the analysis, is land or log(land) a better # instrument for open? (Hint: regress open on each of these separately and jointly). lm8c.i <- lm(open ~ land, data=open); coeftest(lm8c.i, vcov=hc0) lm8c.ii <- lm(open ~ lland, data=open); coeftest(lm8c.ii, vcov=hc0) lm8c.iii <- lm(open ~ land + lland, data=open); coeftest(lm8c.iii, vcov=hc0) # --- Ex 8: d) --- # Add the dummy variable oil (indicative of the country being an oil-producer) to # the original equation in a and treat it as exogenous. Estimate the equation by IV. # Does being an oil producer have a ceteris paribus effect on inflation? lm8d.iv <-ivreg(inf ~ open + lpcinc + oil | lland + lpcinc + oil, data=open) coeftest(lm8d.iv) # --- Ex 9: ----------------------------- cement <- read.csv("http://klein.uk/R/Lent/cement.csv", header=T) str(cement) # --- Ex 9: a) --- cement$month <- as.factor(cement$month) lm9a <- lm(gprc ~ gcem + gprcpet + month, data=cement); shccm(lm9a) # --- Ex 9: b) --- lm9b <- lm(gcem ~ gdefs + gprcpet + month, data=cement) # --- Ex 9: c) --- lm9c <- lm(gcem ~ gres + gnon + gprcpet + month, data=cement) # --- Ex 9: d) --- lm9d.iv <- ivreg(gprc ~ gcem + gprcpet + month | gprcpet + month + gres + gnon, data=cement) # --- Ex 10: ----------------------------- fish <- read.csv("http://klein.uk/R/Lent/fish.csv", header=T) str(fish) # --- Ex 10: c) --- fish$weekday <- ifelse(fish$mon==1,"Mon", ifelse(fish$tues==1,"Tue", ifelse(fish$wed==1,"Wed", ifelse(fish$thurs==1,"Thu","Fri")))) fish$weekday <- as.factor(fish$weekday) levels(fish$weekday) lm10c <- lm(lavgprc ~ weekday + wave2 + wave3, data=fish); coeftest(lm10c, vcov=hc0) linearHypothesis(lm10c, c("wave2=0","wave3=0"), vcov=hc0) # --- Ex 10: d) --- # Now, estimate the demand equation by 2SLS. What is the 95% confidence interval for the # price elasticity of demand? Is the estimated elasticity reasonable? library(systemfit) ?systemfit ## Specify the system eqDemand <- ltotqty ~ lavgprc + weekday eqSupply <- lavgprc ~ ltotqty + wave2 + wave3 system <- list( demand = eqDemand, supply = eqSupply ) inst <- ~ wave2 + wave3 + weekday ## OLS estimation lm10d.ols <- systemfit(system, data=fish) coeftest(lm10d.ols) ## 2SLS estimation lm10d.sem <- systemfit(system, "2SLS", inst=inst, data=fish) coeftest(lm10d.sem) ## (Same results for demand equation with ivreg) lm10d.ols2 <- lm(ltotqty ~ lavgprc + weekday, data=fish) lm10d.iv2 <- ivreg(ltotqty ~ lavgprc + weekday | weekday + wave2 + wave3, data=fish) ## (2SLS estimation with different instruments in each equation) lm10d.sem$eq[[1]]; lm10d.sem$eq[[2]] inst1 <- ~ weekday inst2 <- ~ wave2 + wave3 instlist <- list( inst1, inst2 ) lm10d.sem2 <- systemfit(system, "2SLS", inst=instlist, data=fish) coeftest(lm10d.sem2) # Note this is another way to estimate 2sls. Should we not have given the 2sls # option R would have estimated employing 3sls, this model first estimates a 2sls, # then with the correlation between perturbations ACROSS equations it estimates # the covariance matrix of the perturbations across equations, which is used in a # third step to estimate the parameters. ## 3SLS estimation lm10d.3sls <- systemfit( system, "3SLS", inst = inst, data = fish, method3sls = "GMM" ) coeftest(lm10d.3sls) hausman.systemfit(lm10d.sem, lm10d.3sls) # no significant difference between 2SLS and 3SLS. # In general, the results are more efficient, but it has two flaws: 1. it relies on all # equations being well modeled. errors in modelling any of them will affect estimations # in all the rest of the equations (2sls is free from this problem); # 2. it relies in big datasets. as it has to estimate a large number of covariances. # --- Digression: What about HC-standard errors? --- # based on Wooldridge (2002) "Econometric analysis of cross section and panel data", # Chapter 5, page 100, formula 5.34 source("http://klein.uk/R/myfunctions.R") shccm.sysf(lm10d.sem) shccm.sysf(lm10d.3sls) # --- Ex 10: e) --- lm10e <- lm(lavgprc ~ weekday + wave2 + wave3, data=fish); coeftest(lm10e, vcov=hc0) linearHypothesis(lm10e, c("weekdayMon=0","weekdayTue=0","weekdayWed=0","weekdayThu=0"), vcov=hc1) # ------------------------------------------------------------------- # --- End of Session ------------------------------------------------ save.image("EndOfSession.RData") q("yes")