# -------------------------------------------------------------------
# MPO1-A Advanced Quantitative Research Methods
# Thilo Klein
# Lab Session 2: ARIMA, ARCH and GARCH Models

# Libraries: tseries, ccgarch, FinTS, dynlm, forecast, rgarch
# http://cran.r-project.org/web/views/TimeSeries.html
  rm(list=ls())
  setwd("E:/LabSessions_MPO1A_LT/Data")
  source("http://klein.uk/R/myfunctions.R")
  ls()
# -------------------------------------------------------------------




# --- Ex 1: Estimation of a quarterly ARMA model of the US Producer Price Index (PPI) -----------------------------

 quart <- read.csv("http://klein.uk/R/Lent/quarterly.csv", h=T)
 str(quart)



# --- Ex 1: a) ---
# Plot the PPI in level and first difference. Are these sequences stationary?

 ## generate variables
 PPI <- ts(quart$PPI, start=c(1960, 1), freq=4)
 LPPI <- ts(log(PPI), start=c(1960, 1), freq=4)
 LPPI.d1 <- diff(LPPI, lag=1, differences=1)

 ## plot series
 par(mfrow=c(2,1))
 plot(LPPI); plot(LPPI.d1)



# --- Ex 1: b) ---
# Take logs and first difference of PPI (the resulting variable is a measure of what?). 
# Plot again and compute ACF and PACF and do ADF test. Why is it important to take logs? 
# Has the variable a unit root?

 # The autocorrelation and partial autocorrelation decay quickly: it seems to be a 
 # stationary series:

 par(mfrow=c(2,1))
 acf(LPPI.d1); pacf(LPPI.d1)
 acf(LPPI.d1, xlim=c(0.25,5.5)); pacf(LPPI.d1) # get rid of 0 lag autocorrelation in acf

 # ADF test: without trend!!
 library(tseries)
 adf.test.1(LPPI.d1, int=T, trend=F, k=2)
 # We reject the null hypothesis: the series is stationary.

 # test for remaining autocorrelation in the residuals
 lm1 <- adf.test.2(LPPI.d1, int=T, trend=F, k=2)
 acf(lm1$resid); pacf(lm1$resid)
 # install.packages("ccgarch")
 library(ccgarch)
 ljung.box.test.1(lm1$resid, seq(1,15,1))



# --- Ex 1: d) ---
# Estimate an ARMA(p,q), for the following cases (p,q)=(1,0),  (2,0), (1,1),  (1,(1,4)), (2,1), 
# (1,(0,4)). For each of these models report SSR, AIC and Q(5), Q(10) and Q(15). Which is 
# the best model? Diagnostic tests: Normality, no autocorrelation, no ARCH terms?

 # --- best model: ---
 arma1d <- arma(LPPI.d1, lag=list(ar=1, ma=c(1,4)))
 summary(arma1d)
 sum(arma1d$resid^2, na.rm=T) # SSR

 # --- other models: ---
 arma1da <- arma(LPPI.d1, order=c(1,0)); summary(arma1da)
 nNA <- is.na(arma1d$res)==F
 ljung.box.test(arma1da$res[nNA])

 arma1db <- arma(LPPI.d1, order=c(2,0)); summary(arma1db)
 ljung.box.test(arma1db$res[nNA])

 arma1dc <- arma(LPPI.d1, order=c(1,1)); summary(arma1dc)
 ljung.box.test(arma1dc$res[nNA])

 arma1dd <- arma(LPPI.d1, order=c(2,1)); summary(arma1dd)
 ljung.box.test(arma1dd$res[nNA])

 arma1de <- arma(LPPI.d1, lag=list(ar=c(1),ma=c(1,4))); summary(arma1de)
 ljung.box.test(arma1de$res[nNA])

 arma1df <- arma(LPPI.d1, lag=list(ar=c(1),ma=c(4))); summary(arma1df)
 ljung.box.test(arma1df$res[nNA])



 # --- Diagnostic tests: ---

 ## Autocorrelation: Ljung-Box statistic
 # Is there any evidence of autocorrelation up to lag 5, 10 and 15? No, there isn’t!

 nNA <- is.na(arma1d$res)==F
 ljung.box.test.1(arma1d$res[nNA], seq(1,15,1))
 par(mfrow=c(2,1)); acf(arma1d$res[nNA]); pacf(arma1d$res[nNA])

 ## Normality: Jarque-Bera test for the null of normality
 # There is evidence of non-normality. We reject the null hypothesis of normal distributed 
 # residuals. We should improve our model!
 hist(arma1d$res, breaks=20, freq=F)
 grid.x <- seq(-.05,.05,.001)
 grid.y <- dnorm(grid.x, sd=sd(arma1d$res, na.rm=T))
 lines(grid.x, grid.y, col="blue", lwd=2)
 jarque.bera.test(arma1d$res[nNA])

 ## ARCH terms: ARCH LM test
 # We reject the hypothesis of no autocorrelation of the squared residuals: 
 # install.packages("FinTS")
 library(FinTS)
 ArchTest(c(arma1d$res), lags=2)
 plot(arma1d$res)

 # What did we just do? ARCH test manually:
 u.sqr <- arma1d$res^2
 library(dynlm)
 arch1d <- dynlm(u.sqr ~ L(u.sqr,1) + L(u.sqr,2))
 summary(arch1d)
 lht(arch1d, c("L(u.sqr, 1)", "L(u.sqr, 2)"))

 # Or alternatively: ARCH LM test manually:
 # Obs*R^2 is distributed chi-squared with 2 degrees of freedom
 n <- length(arch1d$res)
 R2 <- summary(arch1d)$r.sq
 LM <- n * R2; LM
 pchisq(LM, df=2, lower.tail=F)



# --- Ex 1: e) ---
# Estimate the ARMA models (p,q)=(1,1) and (1,(1,4)) over the period 1960.1-1989.3. Obtain 
# the one-step-ahead forecast and the one-step-ahead forecast error.

 LPPI.d1.w <- window(LPPI.d1, start=c(1960,1), end=c(1989,3))
 LPPI.d1

 arma1e.11 <- arima(LPPI.d1.w, order=c(1,0,1))
 arma1e.11

 arma1e.114 <- arima(LPPI.d1.w, order=c(1,0,4), fixed=c(NA, NA, 0, 0, NA, NA))
 arma1e.114

 # prediction from ARMA(1,1)
 arma11.pred <- predict(arma1e.11, n.ahead=1); arma11.pred
 # prediction from ARMA(1,(1,4))
 arma114.pred <- predict(arma1e.114, n.ahead=1); arma114.pred

 # Actual observation is closer to ARMA(1,(1,4))
 obs <- window(LPPI.d1, c(1989,4), c(1989,4))
 obs

 ## Forecast accuracy
 # Mean Square Error (MSE)
 (obs - arma11.pred$pred)^2; (obs - arma114.pred$pred)^2
 # Mean Absolute Error (MAE)
 abs(obs - arma11.pred$pred); abs(obs - arma114.pred$pred)
 # Mean Absolute Percentage Error (MAPE)
 abs((obs - arma11.pred$pred)/obs)*100; abs((obs - arma114.pred$pred)/obs)*100





# --- Ex 2: ARMA model selection -----------------------------
 aa <- read.csv("http://klein.uk/R/Lent/arima.csv", h=T)
 str(aa)

 for(i in 4:10){
   aa[,i] <- ts(aa[,i], start=1801, freq=1)
 }

 ## Box-Jenkins Procedure

 # 1. Is the time series stationary?
   # i) plot the data

   # ii) compute ACF and PACF

   # iii) perform ADF tests


 # 2. AR, MA, or ARMA?

   # Plot ACF and PACF


 # 3. Estimate the model, defining p and q
 # (guided by PACF and ACF)


 # 4. Verify that the ACF and PACF of residuals do not show
 # significant spikes after estimation


 # 5. If there are two or more competing models, use AIC or SIBC to decide
 # which is better. The lower AIC and/or SBIC the better the model

 # 6. In the final specification all parameters must be significant,
 # and residuals wel behaved:

   # No autocorrelation?

   # No heteroskedasticity?

   # Normal distribution?



 # --- automatic model seection based on aic criterion ---
 # install.packages("forecast")
 library(forecast)
 # ?auto.arima
 summary( auto.arima(aa$Y1, trace=T, d=0, ic="aic"))
 summary( auto.arima(aa$Y2, trace=T, d=0, ic="aic") )
 summary( auto.arima(aa$Y3, trace=T, d=0, ic="aic") )
 summary( auto.arima(aa$Y4, trace=T, d=0, ic="aic") )
 summary( auto.arima(aa$Y5, trace=T, d=0, ic="aic") )
 summary( auto.arima(aa$Y6, trace=T, d=0, ic="aic") )
 summary( auto.arima(aa$Y7, trace=T, d=0, ic="aic") )





# --- Ex 3: Seasonality and ARIMA modeling -----------------------------

 str(quart)

# --- Ex 3: a) ---
# Plot the M1NSA in level and first difference. Are these sequences stationary?

 M1NSA <- ts(quart$M1NSA, start=c(1960,1), freq=4)
 M1NSA.d1 <- diff(M1NSA, lag=1)
 par(mfrow=c(2,1))
 plot(M1NSA); plot(M1NSA.d1)

 adf.test(M1NSA)
 adf.test(M1NSA.d1)


# --- Ex 3: b) ---
# Take logs and first difference of MINSA (the resulting variable is a measure of money 
# growth). Plot again and compute ACF and PACF. What happens at lags 4, 8, 12, etc?

 LM1NSA <- log(M1NSA)
 LM1NSA.d1 <- diff(LM1NSA, lag=1)

 plot(LM1NSA); plot(LM1NSA.d1)
 acf(LM1NSA.d1); pacf(LM1NSA.d1)


# --- Ex 3: c) ---
# Take a seasonal difference (over the first differenced series) and compute again ACF and 
# PACF, what can you observe now? 

 LM1NSA.d1.d4 <- diff(LM1NSA.d1, lag=4)
 acf(LM1NSA.d1.d4); pacf(LM1NSA.d1.d4)


# --- Ex 3: d) ---
# What kind of model suggests the ACF and PACF? 

 arma3d <- arma(LM1NSA.d1.d4, lag=list(ar=c(1),ma=c(4)))
 summary(arma3d)


# --- Ex 3: e) ---
# Estimate a SARIMA(1,1,0)(0,1,1) and a SARIMA(0,1,1)(0,1,1). What is the best model? 

 ## SARIMA(1,1,0)(0,1,1), AIC: -1465.47
 arima3e <- arima(LM1NSA.d1.d4, order=c(1,1,0), seasonal=list(order=c(0,1,1), period=4))
 summary(arima3e); coeftest(arima3e)

 ## SARIMA(0,1,1)(0,1,1), AIC: -1478.93
 arima3e2 <- arima(LM1NSA.d1.d4, order=c(0,1,1), seasonal=list(order=c(0,1,1), period=4))
 summary(arima3e2); coeftest(arima3e2)

 ## The second model is the better one, but:
 auto.arima(LM1NSA.d1.d4, trace=T)





# --- Ex 4: GARCH model of US PPI -----------------------------

 str(quart)



# --- Ex 4: a) ---
# Formally test for ARCH errors (using the residuals from the ARIMA model).

 arma4a <- arma(LPPI.d1, lag=list(ar=1,ma=c(1,4))); summary(arma4a)
 plot(arma4a$res)

 ## ARCH terms: ARCH LM test with H0: No autocorrelation in RESID^2 up to order 4
 # We reject the hypothesis of no autocorrelation of the squared residuals: 
 library(FinTS)
 ArchTest(c(arma4a$res), lags=4)

 # ARCH test manually:
 u.sqr <- arma4a$res^2
 library(dynlm)
 arch4a <- dynlm(u.sqr ~ L(u.sqr,1) + L(u.sqr,2) + L(u.sqr,3) + L(u.sqr,4))
 summary(arch4a)
 lht(arch4a, c("L(u.sqr, 1)", "L(u.sqr, 2)", "L(u.sqr, 3)", "L(u.sqr, 4)"))

 # ARCH LM test manually:
 # Obs*R^2 is distributed chi-squared with 4 degrees of freedom
 LM <- length(arch4a$res) * summary(arch4a)$r.sq
 pchisq(LM, df=4, lower.tail=F)
 # We reject the null. Then proceed to estimate an ARCH model for the variance.



# --- Ex 4: b) ---
# Use the ACF and PACF of the squared residuals as an indication of the order of the GARCH 
# process. How many ARCH terms seem to be needed? Estimate the model. 

 par(mfrow=c(2,1))
 acf(arch4a$res^2); pacf(arch4a$res^2)

 # manually install rgarch and dependencies as described on
 # http://klein.uk/MPO1A.html
 library(rgarch)

 # Estimate ARCH(4) model with mean equation ARMA(1,(1,4))
 # ?ugarchspec
 spec <- ugarchspec(variance.model = list(model = "fGARCH", submodel="GARCH", garchOrder = c(4,0)),
   mean.model = list(armaOrder = c(1,4), include.mean=T), fixed.pars=list(ma2=0,ma3=0))
 # ?ugarchfit
 sgarch.fit <- ugarchfit(data=c(LPPI.d1), spec = spec)
 sgarch.fit



# --- Ex 4: c) ---
# Test again for remaining ARCH terms (and compute again ACF and PACF). What can you conclude? 
# Observe carefully the estimated coefficients, what problems do you identify?

 str(sgarch.fit)
 ArchTest(sgarch.fit@fit$resid, lags=4)

 # MA(1) term is not significant and ARCH effects still remain.



# --- Ex 4: d) ---
# Estimate now a GARCH(1,1), do you still have the same problems? Tabulate ACF and PACF and 
# test for autocorrelation up to lag 4 in squared residuals.

 # Estimate GARCH(1,1) model with mean equation ARMA(1,(1,4))
 spec <- ugarchspec(variance.model = list(model = "fGARCH", submodel="GARCH", garchOrder = c(1,1)),
   mean.model = list(armaOrder = c(1,4), include.mean=T), fixed.pars=list(ma2=0,ma3=0))
 sgarch.fit <- ugarchfit(data=c(LPPI.d1), spec = spec)
 sgarch.fit

 # Verify that the correlogram of squared residuals is clean. Do ARCH LM test again.

 ArchTest(sgarch.fit@fit$resid, lags=4)

 # ARCH test manually:
 u.sqr <- ts(sgarch.fit@fit$resid^2)
 arch4d <- dynlm(u.sqr ~ L(u.sqr,1) + L(u.sqr,2) + L(u.sqr,3) + L(u.sqr,4))
 summary(arch4d)
 lht(arch4d, c("L(u.sqr, 1)", "L(u.sqr, 2)", "L(u.sqr, 3)", "L(u.sqr, 4)"))

 # We reject the null of no autocorrelation in the squared residuals up to lag 4. 
 # We do not proceed further here.  



# --- Ex 4: e) ---
# Produce one-step-ahead forecast with this model.

 ?ugarchforecast
 forc <- ugarchforecast(sgarch.fit, n.ahead=100)
 forc
 plot(forc,which="all")

 # As time goes on, the forecast tends to the long run variance of the process.




# --- Ex 5:  -----------------------------


# --- Ex 5: a) ---
# Estimate an ARIMA model for the series ym (return on a portfolio) following 
# Box-Jenkins methodology. Why might someone conclude that the residuals appear 
# to be white noise?

 arch <- read.csv("http://klein.uk/R/Lent/arch.csv")
 str(arch)

 # Box-Jenkins methodology
 # -> Estimate a MA(3,6)

 arma5a <- arima(arch$ym, order=c(0,0,6), fixed=c(0, 0, NA, 0, 0, NA, NA))
 plot(arma5a$resid, type="l")


# --- Ex 5: b) ---
# Perform the LM test for ARCH errors.

 ArchTest(arma5a$resid, lags=4)


# --- Ex 5: c) ---
# Estimate an ARCH-M process (using the standard deviation in the mean equation), 
# with an ARCH(1) for the variance.

 spec <- ugarchspec(variance.model = list(model = "fGARCH", submodel="GARCH", garchOrder = c(1,0)),
   mean.model = list(armaOrder = c(0,6), garchInMean=T, inMeanType=1),
   fixed.pars=list(ma1=0,ma2=0,ma4=0,ma5=0))
 sgarch.fit <- ugarchfit(data=arch$ym, spec = spec)
 sgarch.fit

 ArchTest(sgarch.fit@fit$resid, lags=4)


# --- Ex 5: d) ---
# Check the ACF and the PACF. Do they appear to be satisfactory? Try other formulations 
# for the ARCH-M process. Interpret the coefficient of the std. dev. in the mean equation.

 par(mfrow=c(2,1))
 acf(sgarch.fit@fit$resid); pacf(sgarch.fit@fit$resid)

 spec <- ugarchspec(variance.model = list(model = "fGARCH", submodel="GARCH", garchOrder = c(1,1)),
   mean.model = list(armaOrder = c(0,6), garchInMean=T, inMeanType=1),
   fixed.pars=list(ma1=0,ma2=0,ma3=0,ma4=0,ma5=0))
 sgarch.fit <- ugarchfit(data=arch$ym, spec = spec)
 sgarch.fit

 ArchTest(sgarch.fit@fit$resid, lags=4)




# --- Ex 7:  -----------------------------
# The variables, Ret, Inf, dtbill, are the S&P Composite index return, the US inflation 
# rate, and the first difference of the three-month Treasury bill rate. The mean equation 
# has the following form Ret = c + b1*Ret(-1) + b2*Inf(-1) + b3*dtbill(-1) + u.

 garch <- read.csv("http://klein.uk/R/Lent/garch.csv")
 str(garch)

 ret <- ts(garch$ret, start=c(1954, 1), freq=12)
 inf <- ts(garch$inf, start=c(1954, 1), freq=12)
 dtbill <- ts(garch$dtbill, start=c(1954, 1), freq=12)
 garch.ts <- ts.union(ret, inf, dtbill)

 lm7 <- dynlm(ret ~ L(ret,1) + L(inf,1) + L(dtbill,1), data=garch.ts)
 summary(lm7)


# --- Ex 7: a) ---
# Test for ARCH terms in the squared residuals from this equation (and plot the ACF and PACF 
# of the squared residual).

 ArchTest(lm7$resid, lags=2)
 par(mfrow=c(2,1))
 acf(lm7$resid); pacf(lm7$resid)


# --- Ex 7: b) ---
# Try to model the heteroskedasticity using: GARCH and TARCH models.

 ## ---
 # GARCH(1,1) probably is enough to take into account all the autocorrelation on squared residuals.

 # generate matrix of external regressors
 n <- dim(garch)[1]
 ret_1 <- c(NA, garch$ret[1:(n-1)])
 head(data.frame(garch$obs, garch$ret, garch$ret_1))
 inf_1 <- c(NA, garch$inf[1:(n-1)])
 dtbill_1 <- c(NA, garch$dtbill[1:(n-1)])
 ex.reg <- data.frame(ret_1, inf_1, dtbill_1)[2:n,]
 head(ex.reg)
 ex.reg <- as.matrix(ex.reg)

 spec <- ugarchspec(variance.model = list(model = "fGARCH", submodel="GARCH", garchOrder = c(1,1)),
   mean.model = list(armaOrder = c(0,0), external.regressors=ex.reg) )
 sgarch.fit <- ugarchfit(data=garch$ret[2:n], spec = spec)
 sgarch.fit

 par(mfrow=c(2,1))
 acf(sgarch.fit@fit$resid); pacf(sgarch.fit@fit$resid)
 ArchTest(sgarch.fit@fit$resid, lags=2)

 ## ---
 # TARCH: Even though the previous model seems to be good, we can try to see if there are 
 # some asymmetric responses to negative and positive shocks.

 spec <- ugarchspec(variance.model = list(model = "fGARCH", submodel="TGARCH", garchOrder = c(1,1)),
   mean.model = list(armaOrder = c(0,0), external.regressors=ex.reg) )
 sgarch.fit <- ugarchfit(data=garch$ret[2:n], spec = spec)
 sgarch.fit








# -------------------------------------------------------------------
# --- End of Session ------------------------------------------------

 save.image("EndOfSession.RData")
 q("yes")