```# -------------------------------------------------------------------
# MPO1-A Advanced Quantitative Research Methods
# Thilo Klein
# Lab Session 2: ARIMA, ARCH and GARCH Models

# Libraries: tseries, ccgarch, FinTS, dynlm, forecast, rgarch
# http://cran.r-project.org/web/views/TimeSeries.html
rm(list=ls())
setwd("E:/LabSessions_MPO1A_LT/Data")
source("http://klein.uk/R/myfunctions.R")
ls()
# -------------------------------------------------------------------

# --- Ex 1: Estimation of a quarterly ARMA model of the US Producer Price Index (PPI) -----------------------------

str(quart)

# --- Ex 1: a) ---
# Plot the PPI in level and first difference. Are these sequences stationary?

## generate variables
PPI <- ts(quart\$PPI, start=c(1960, 1), freq=4)
LPPI <- ts(log(PPI), start=c(1960, 1), freq=4)
LPPI.d1 <- diff(LPPI, lag=1, differences=1)

## plot series
par(mfrow=c(2,1))
plot(LPPI); plot(LPPI.d1)

# --- Ex 1: b) ---
# Take logs and first difference of PPI (the resulting variable is a measure of what?).
# Plot again and compute ACF and PACF and do ADF test. Why is it important to take logs?
# Has the variable a unit root?

# The autocorrelation and partial autocorrelation decay quickly: it seems to be a
# stationary series:

par(mfrow=c(2,1))
acf(LPPI.d1); pacf(LPPI.d1)
acf(LPPI.d1, xlim=c(0.25,5.5)); pacf(LPPI.d1) # get rid of 0 lag autocorrelation in acf

library(tseries)
# We reject the null hypothesis: the series is stationary.

# test for remaining autocorrelation in the residuals
lm1 <- adf.test.2(LPPI.d1, int=T, trend=F, k=2)
acf(lm1\$resid); pacf(lm1\$resid)
# install.packages("ccgarch")
library(ccgarch)
ljung.box.test.1(lm1\$resid, seq(1,15,1))

# --- Ex 1: d) ---
# Estimate an ARMA(p,q), for the following cases (p,q)=(1,0),  (2,0), (1,1),  (1,(1,4)), (2,1),
# (1,(0,4)). For each of these models report SSR, AIC and Q(5), Q(10) and Q(15). Which is
# the best model? Diagnostic tests: Normality, no autocorrelation, no ARCH terms?

# --- best model: ---
arma1d <- arma(LPPI.d1, lag=list(ar=1, ma=c(1,4)))
summary(arma1d)
sum(arma1d\$resid^2, na.rm=T) # SSR

# --- other models: ---
arma1da <- arma(LPPI.d1, order=c(1,0)); summary(arma1da)
nNA <- is.na(arma1d\$res)==F
ljung.box.test(arma1da\$res[nNA])

arma1db <- arma(LPPI.d1, order=c(2,0)); summary(arma1db)
ljung.box.test(arma1db\$res[nNA])

arma1dc <- arma(LPPI.d1, order=c(1,1)); summary(arma1dc)
ljung.box.test(arma1dc\$res[nNA])

arma1dd <- arma(LPPI.d1, order=c(2,1)); summary(arma1dd)
ljung.box.test(arma1dd\$res[nNA])

arma1de <- arma(LPPI.d1, lag=list(ar=c(1),ma=c(1,4))); summary(arma1de)
ljung.box.test(arma1de\$res[nNA])

arma1df <- arma(LPPI.d1, lag=list(ar=c(1),ma=c(4))); summary(arma1df)
ljung.box.test(arma1df\$res[nNA])

# --- Diagnostic tests: ---

## Autocorrelation: Ljung-Box statistic
# Is there any evidence of autocorrelation up to lag 5, 10 and 15? No, there isnâ€™t!

nNA <- is.na(arma1d\$res)==F
ljung.box.test.1(arma1d\$res[nNA], seq(1,15,1))
par(mfrow=c(2,1)); acf(arma1d\$res[nNA]); pacf(arma1d\$res[nNA])

## Normality: Jarque-Bera test for the null of normality
# There is evidence of non-normality. We reject the null hypothesis of normal distributed
# residuals. We should improve our model!
hist(arma1d\$res, breaks=20, freq=F)
grid.x <- seq(-.05,.05,.001)
grid.y <- dnorm(grid.x, sd=sd(arma1d\$res, na.rm=T))
lines(grid.x, grid.y, col="blue", lwd=2)
jarque.bera.test(arma1d\$res[nNA])

## ARCH terms: ARCH LM test
# We reject the hypothesis of no autocorrelation of the squared residuals:
# install.packages("FinTS")
library(FinTS)
ArchTest(c(arma1d\$res), lags=2)
plot(arma1d\$res)

# What did we just do? ARCH test manually:
u.sqr <- arma1d\$res^2
library(dynlm)
arch1d <- dynlm(u.sqr ~ L(u.sqr,1) + L(u.sqr,2))
summary(arch1d)
lht(arch1d, c("L(u.sqr, 1)", "L(u.sqr, 2)"))

# Or alternatively: ARCH LM test manually:
# Obs*R^2 is distributed chi-squared with 2 degrees of freedom
n <- length(arch1d\$res)
R2 <- summary(arch1d)\$r.sq
LM <- n * R2; LM
pchisq(LM, df=2, lower.tail=F)

# --- Ex 1: e) ---
# Estimate the ARMA models (p,q)=(1,1) and (1,(1,4)) over the period 1960.1-1989.3. Obtain

LPPI.d1.w <- window(LPPI.d1, start=c(1960,1), end=c(1989,3))
LPPI.d1

arma1e.11 <- arima(LPPI.d1.w, order=c(1,0,1))
arma1e.11

arma1e.114 <- arima(LPPI.d1.w, order=c(1,0,4), fixed=c(NA, NA, 0, 0, NA, NA))
arma1e.114

# prediction from ARMA(1,1)
# prediction from ARMA(1,(1,4))

# Actual observation is closer to ARMA(1,(1,4))
obs <- window(LPPI.d1, c(1989,4), c(1989,4))
obs

## Forecast accuracy
# Mean Square Error (MSE)
(obs - arma11.pred\$pred)^2; (obs - arma114.pred\$pred)^2
# Mean Absolute Error (MAE)
abs(obs - arma11.pred\$pred); abs(obs - arma114.pred\$pred)
# Mean Absolute Percentage Error (MAPE)
abs((obs - arma11.pred\$pred)/obs)*100; abs((obs - arma114.pred\$pred)/obs)*100

# --- Ex 2: ARMA model selection -----------------------------
str(aa)

for(i in 4:10){
aa[,i] <- ts(aa[,i], start=1801, freq=1)
}

## Box-Jenkins Procedure

# 1. Is the time series stationary?
# i) plot the data

# ii) compute ACF and PACF

# 2. AR, MA, or ARMA?

# Plot ACF and PACF

# 3. Estimate the model, defining p and q
# (guided by PACF and ACF)

# 4. Verify that the ACF and PACF of residuals do not show
# significant spikes after estimation

# 5. If there are two or more competing models, use AIC or SIBC to decide
# which is better. The lower AIC and/or SBIC the better the model

# 6. In the final specification all parameters must be significant,
# and residuals wel behaved:

# No autocorrelation?

# No heteroskedasticity?

# Normal distribution?

# --- automatic model seection based on aic criterion ---
# install.packages("forecast")
library(forecast)
# ?auto.arima
summary( auto.arima(aa\$Y1, trace=T, d=0, ic="aic"))
summary( auto.arima(aa\$Y2, trace=T, d=0, ic="aic") )
summary( auto.arima(aa\$Y3, trace=T, d=0, ic="aic") )
summary( auto.arima(aa\$Y4, trace=T, d=0, ic="aic") )
summary( auto.arima(aa\$Y5, trace=T, d=0, ic="aic") )
summary( auto.arima(aa\$Y6, trace=T, d=0, ic="aic") )
summary( auto.arima(aa\$Y7, trace=T, d=0, ic="aic") )

# --- Ex 3: Seasonality and ARIMA modeling -----------------------------

str(quart)

# --- Ex 3: a) ---
# Plot the M1NSA in level and first difference. Are these sequences stationary?

M1NSA <- ts(quart\$M1NSA, start=c(1960,1), freq=4)
M1NSA.d1 <- diff(M1NSA, lag=1)
par(mfrow=c(2,1))
plot(M1NSA); plot(M1NSA.d1)

# --- Ex 3: b) ---
# Take logs and first difference of MINSA (the resulting variable is a measure of money
# growth). Plot again and compute ACF and PACF. What happens at lags 4, 8, 12, etc?

LM1NSA <- log(M1NSA)
LM1NSA.d1 <- diff(LM1NSA, lag=1)

plot(LM1NSA); plot(LM1NSA.d1)
acf(LM1NSA.d1); pacf(LM1NSA.d1)

# --- Ex 3: c) ---
# Take a seasonal difference (over the first differenced series) and compute again ACF and
# PACF, what can you observe now?

LM1NSA.d1.d4 <- diff(LM1NSA.d1, lag=4)
acf(LM1NSA.d1.d4); pacf(LM1NSA.d1.d4)

# --- Ex 3: d) ---
# What kind of model suggests the ACF and PACF?

arma3d <- arma(LM1NSA.d1.d4, lag=list(ar=c(1),ma=c(4)))
summary(arma3d)

# --- Ex 3: e) ---
# Estimate a SARIMA(1,1,0)(0,1,1) and a SARIMA(0,1,1)(0,1,1). What is the best model?

## SARIMA(1,1,0)(0,1,1), AIC: -1465.47
arima3e <- arima(LM1NSA.d1.d4, order=c(1,1,0), seasonal=list(order=c(0,1,1), period=4))
summary(arima3e); coeftest(arima3e)

## SARIMA(0,1,1)(0,1,1), AIC: -1478.93
arima3e2 <- arima(LM1NSA.d1.d4, order=c(0,1,1), seasonal=list(order=c(0,1,1), period=4))
summary(arima3e2); coeftest(arima3e2)

## The second model is the better one, but:
auto.arima(LM1NSA.d1.d4, trace=T)

# --- Ex 4: GARCH model of US PPI -----------------------------

str(quart)

# --- Ex 4: a) ---
# Formally test for ARCH errors (using the residuals from the ARIMA model).

arma4a <- arma(LPPI.d1, lag=list(ar=1,ma=c(1,4))); summary(arma4a)
plot(arma4a\$res)

## ARCH terms: ARCH LM test with H0: No autocorrelation in RESID^2 up to order 4
# We reject the hypothesis of no autocorrelation of the squared residuals:
library(FinTS)
ArchTest(c(arma4a\$res), lags=4)

# ARCH test manually:
u.sqr <- arma4a\$res^2
library(dynlm)
arch4a <- dynlm(u.sqr ~ L(u.sqr,1) + L(u.sqr,2) + L(u.sqr,3) + L(u.sqr,4))
summary(arch4a)
lht(arch4a, c("L(u.sqr, 1)", "L(u.sqr, 2)", "L(u.sqr, 3)", "L(u.sqr, 4)"))

# ARCH LM test manually:
# Obs*R^2 is distributed chi-squared with 4 degrees of freedom
LM <- length(arch4a\$res) * summary(arch4a)\$r.sq
pchisq(LM, df=4, lower.tail=F)
# We reject the null. Then proceed to estimate an ARCH model for the variance.

# --- Ex 4: b) ---
# Use the ACF and PACF of the squared residuals as an indication of the order of the GARCH
# process. How many ARCH terms seem to be needed? Estimate the model.

par(mfrow=c(2,1))
acf(arch4a\$res^2); pacf(arch4a\$res^2)

# manually install rgarch and dependencies as described on
# http://klein.uk/MPO1A.html
library(rgarch)

# Estimate ARCH(4) model with mean equation ARMA(1,(1,4))
# ?ugarchspec
spec <- ugarchspec(variance.model = list(model = "fGARCH", submodel="GARCH", garchOrder = c(4,0)),
mean.model = list(armaOrder = c(1,4), include.mean=T), fixed.pars=list(ma2=0,ma3=0))
# ?ugarchfit
sgarch.fit <- ugarchfit(data=c(LPPI.d1), spec = spec)
sgarch.fit

# --- Ex 4: c) ---
# Test again for remaining ARCH terms (and compute again ACF and PACF). What can you conclude?
# Observe carefully the estimated coefficients, what problems do you identify?

str(sgarch.fit)
ArchTest(sgarch.fit@fit\$resid, lags=4)

# MA(1) term is not significant and ARCH effects still remain.

# --- Ex 4: d) ---
# Estimate now a GARCH(1,1), do you still have the same problems? Tabulate ACF and PACF and
# test for autocorrelation up to lag 4 in squared residuals.

# Estimate GARCH(1,1) model with mean equation ARMA(1,(1,4))
spec <- ugarchspec(variance.model = list(model = "fGARCH", submodel="GARCH", garchOrder = c(1,1)),
mean.model = list(armaOrder = c(1,4), include.mean=T), fixed.pars=list(ma2=0,ma3=0))
sgarch.fit <- ugarchfit(data=c(LPPI.d1), spec = spec)
sgarch.fit

# Verify that the correlogram of squared residuals is clean. Do ARCH LM test again.

ArchTest(sgarch.fit@fit\$resid, lags=4)

# ARCH test manually:
u.sqr <- ts(sgarch.fit@fit\$resid^2)
arch4d <- dynlm(u.sqr ~ L(u.sqr,1) + L(u.sqr,2) + L(u.sqr,3) + L(u.sqr,4))
summary(arch4d)
lht(arch4d, c("L(u.sqr, 1)", "L(u.sqr, 2)", "L(u.sqr, 3)", "L(u.sqr, 4)"))

# We reject the null of no autocorrelation in the squared residuals up to lag 4.
# We do not proceed further here.

# --- Ex 4: e) ---
# Produce one-step-ahead forecast with this model.

?ugarchforecast
forc
plot(forc,which="all")

# As time goes on, the forecast tends to the long run variance of the process.

# --- Ex 5:  -----------------------------

# --- Ex 5: a) ---
# Estimate an ARIMA model for the series ym (return on a portfolio) following
# Box-Jenkins methodology. Why might someone conclude that the residuals appear
# to be white noise?

str(arch)

# Box-Jenkins methodology
# -> Estimate a MA(3,6)

arma5a <- arima(arch\$ym, order=c(0,0,6), fixed=c(0, 0, NA, 0, 0, NA, NA))
plot(arma5a\$resid, type="l")

# --- Ex 5: b) ---
# Perform the LM test for ARCH errors.

ArchTest(arma5a\$resid, lags=4)

# --- Ex 5: c) ---
# Estimate an ARCH-M process (using the standard deviation in the mean equation),
# with an ARCH(1) for the variance.

spec <- ugarchspec(variance.model = list(model = "fGARCH", submodel="GARCH", garchOrder = c(1,0)),
mean.model = list(armaOrder = c(0,6), garchInMean=T, inMeanType=1),
fixed.pars=list(ma1=0,ma2=0,ma4=0,ma5=0))
sgarch.fit <- ugarchfit(data=arch\$ym, spec = spec)
sgarch.fit

ArchTest(sgarch.fit@fit\$resid, lags=4)

# --- Ex 5: d) ---
# Check the ACF and the PACF. Do they appear to be satisfactory? Try other formulations
# for the ARCH-M process. Interpret the coefficient of the std. dev. in the mean equation.

par(mfrow=c(2,1))
acf(sgarch.fit@fit\$resid); pacf(sgarch.fit@fit\$resid)

spec <- ugarchspec(variance.model = list(model = "fGARCH", submodel="GARCH", garchOrder = c(1,1)),
mean.model = list(armaOrder = c(0,6), garchInMean=T, inMeanType=1),
fixed.pars=list(ma1=0,ma2=0,ma3=0,ma4=0,ma5=0))
sgarch.fit <- ugarchfit(data=arch\$ym, spec = spec)
sgarch.fit

ArchTest(sgarch.fit@fit\$resid, lags=4)

# --- Ex 7:  -----------------------------
# The variables, Ret, Inf, dtbill, are the S&P Composite index return, the US inflation
# rate, and the first difference of the three-month Treasury bill rate. The mean equation
# has the following form Ret = c + b1*Ret(-1) + b2*Inf(-1) + b3*dtbill(-1) + u.

str(garch)

ret <- ts(garch\$ret, start=c(1954, 1), freq=12)
inf <- ts(garch\$inf, start=c(1954, 1), freq=12)
dtbill <- ts(garch\$dtbill, start=c(1954, 1), freq=12)
garch.ts <- ts.union(ret, inf, dtbill)

lm7 <- dynlm(ret ~ L(ret,1) + L(inf,1) + L(dtbill,1), data=garch.ts)
summary(lm7)

# --- Ex 7: a) ---
# Test for ARCH terms in the squared residuals from this equation (and plot the ACF and PACF
# of the squared residual).

ArchTest(lm7\$resid, lags=2)
par(mfrow=c(2,1))
acf(lm7\$resid); pacf(lm7\$resid)

# --- Ex 7: b) ---
# Try to model the heteroskedasticity using: GARCH and TARCH models.

## ---
# GARCH(1,1) probably is enough to take into account all the autocorrelation on squared residuals.

# generate matrix of external regressors
n <- dim(garch)[1]
ret_1 <- c(NA, garch\$ret[1:(n-1)])
inf_1 <- c(NA, garch\$inf[1:(n-1)])
dtbill_1 <- c(NA, garch\$dtbill[1:(n-1)])
ex.reg <- data.frame(ret_1, inf_1, dtbill_1)[2:n,]
ex.reg <- as.matrix(ex.reg)

spec <- ugarchspec(variance.model = list(model = "fGARCH", submodel="GARCH", garchOrder = c(1,1)),
mean.model = list(armaOrder = c(0,0), external.regressors=ex.reg) )
sgarch.fit <- ugarchfit(data=garch\$ret[2:n], spec = spec)
sgarch.fit

par(mfrow=c(2,1))
acf(sgarch.fit@fit\$resid); pacf(sgarch.fit@fit\$resid)
ArchTest(sgarch.fit@fit\$resid, lags=2)

## ---
# TARCH: Even though the previous model seems to be good, we can try to see if there are
# some asymmetric responses to negative and positive shocks.

spec <- ugarchspec(variance.model = list(model = "fGARCH", submodel="TGARCH", garchOrder = c(1,1)),
mean.model = list(armaOrder = c(0,0), external.regressors=ex.reg) )
sgarch.fit <- ugarchfit(data=garch\$ret[2:n], spec = spec)
sgarch.fit

# -------------------------------------------------------------------
# --- End of Session ------------------------------------------------

save.image("EndOfSession.RData")
q("yes")
```