ARCH and GARCH Models
NKE
First, let me use tidyquant to acquire the data.
library(tidyquant); library(tidyverse)
NKE <- tq_get("NKE", from="2019-01-01")
NKE
## # A tibble: 918 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NKE 2019-01-02 72.8 74.6 72.2 74.1 6762700 71.7
## 2 NKE 2019-01-03 73.2 73.3 71.2 72.8 8007400 70.4
## 3 NKE 2019-01-04 73.4 75.1 73.1 74.7 7844200 72.3
## 4 NKE 2019-01-07 74.7 76.4 74.3 75.7 8184800 73.3
## 5 NKE 2019-01-08 76.8 77.4 76.2 76.7 8809000 74.3
## 6 NKE 2019-01-09 77.0 77.2 76.1 76.6 8591000 74.2
## 7 NKE 2019-01-10 75.6 77.3 75.5 76.4 11148600 74.0
## 8 NKE 2019-01-11 76.3 76.9 75.8 76.0 10689900 73.6
## 9 NKE 2019-01-14 75.5 76.8 75.5 76.1 5677400 73.7
## 10 NKE 2019-01-15 76.2 78.0 76.1 77.9 6212400 75.4
## # … with 908 more rows
For a variety of reasons, equities are unlikely to be mean-reverting in level form. Let me apply some unit-root testing for Nike since 2019.
NKEAC <- NKE %>% select(date, adjusted)
summary(urca::ur.df(NKEAC$adjusted))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.938 -1.044 0.067 1.168 20.568
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 0.0001009 0.0006328 0.160 0.873
## z.diff.lag -0.0012145 0.0330807 -0.037 0.971
##
## Residual standard error: 2.269 on 914 degrees of freedom
## Multiple R-squared: 2.903e-05, Adjusted R-squared: -0.002159
## F-statistic: 0.01326 on 2 and 914 DF, p-value: 0.9868
##
##
## Value of test-statistic is: 0.1595
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
summary(urca::ur.df(NKEAC$adjusted, type="trend"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8038 -1.0576 -0.0166 1.0902 20.6120
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.575e-01 3.425e-01 1.628 0.104
## z.lag.1 -4.600e-03 4.014e-03 -1.146 0.252
## tt 3.295e-05 4.383e-04 0.075 0.940
## z.diff.lag 1.013e-04 3.320e-02 0.003 0.998
##
## Residual standard error: 2.267 on 912 degrees of freedom
## Multiple R-squared: 0.003104, Adjusted R-squared: -0.0001755
## F-statistic: 0.9465 on 3 and 912 DF, p-value: 0.4175
##
##
## Value of test-statistic is: -1.1461 1.0607 1.4188
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.96 -3.41 -3.12
## phi2 6.09 4.68 4.03
## phi3 8.27 6.25 5.34
summary(urca::ur.df(NKEAC$adjusted, type = "drift"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8025 -1.0509 -0.0196 1.0868 20.6135
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5462463 0.3075563 1.776 0.0761 .
## z.lag.1 -0.0043702 0.0025955 -1.684 0.0926 .
## z.diff.lag -0.0001219 0.0330476 -0.004 0.9971
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.266 on 913 degrees of freedom
## Multiple R-squared: 0.003098, Adjusted R-squared: 0.0009138
## F-statistic: 1.418 on 2 and 913 DF, p-value: 0.2426
##
##
## Value of test-statistic is: -1.6837 1.59
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.43 -2.86 -2.57
## phi1 6.43 4.59 3.78
summary(urca::ur.kpss(NKEAC$adjusted, type = "mu"))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 6 lags.
##
## Value of test-statistic is: 9.2369
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
summary(urca::ur.kpss(NKEAC$adjusted, type = "tau"))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: tau with 6 lags.
##
## Value of test-statistic is: 1.411
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.119 0.146 0.176 0.216
So the process seems to clearly contain a unit-root, statistically. Let’s have a look.
Visualizing Adjusted Close
library(fpp3)
NKE %>% mutate(tday = row_number()) %>% as_tsibble(index=tday) %>% autoplot(adjusted)
Returns
Let me work with returns.
NKER <- NKE %>% tq_transmute(select=adjusted, mutate_fun = periodReturn, period="daily") %>% mutate(tday = row_number())
NKER %>% as_tsibble(index=tday) %>% autoplot(daily.returns)
Unit roots and returns
How do the returns seem to behave?
summary(urca::ur.df(NKER$daily.returns))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.118677 -0.008654 0.000554 0.011048 0.155177
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 -0.97198 0.04690 -20.726 <2e-16 ***
## z.diff.lag -0.03425 0.03307 -1.036 0.301
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02064 on 914 degrees of freedom
## Multiple R-squared: 0.504, Adjusted R-squared: 0.5029
## F-statistic: 464.4 on 2 and 914 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -20.7258
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
summary(urca::ur.df(NKER$daily.returns, type="trend"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.119867 -0.009244 -0.000428 0.009998 0.155032
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.135e-03 1.370e-03 1.558 0.120
## z.lag.1 -9.774e-01 4.701e-02 -20.791 <2e-16 ***
## tt -3.148e-06 2.582e-06 -1.219 0.223
## z.diff.lag -3.158e-02 3.311e-02 -0.954 0.340
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02063 on 912 degrees of freedom
## Multiple R-squared: 0.5054, Adjusted R-squared: 0.5037
## F-statistic: 310.6 on 3 and 912 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -20.7914 144.0971 216.1456
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -3.96 -3.41 -3.12
## phi2 6.09 4.68 4.03
## phi3 8.27 6.25 5.34
summary(urca::ur.df(NKER$daily.returns, type = "drift"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.119360 -0.009343 -0.000130 0.010387 0.154500
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0006866 0.0006828 1.006 0.315
## z.lag.1 -0.9742361 0.0469504 -20.750 <2e-16 ***
## z.diff.lag -0.0331021 0.0330908 -1.000 0.317
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02064 on 913 degrees of freedom
## Multiple R-squared: 0.5045, Adjusted R-squared: 0.5035
## F-statistic: 464.9 on 2 and 913 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -20.7503 215.2877
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.43 -2.86 -2.57
## phi1 6.43 4.59 3.78
summary(urca::ur.kpss(NKER$daily.returns, type = "mu"))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 6 lags.
##
## Value of test-statistic is: 0.2047
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
summary(urca::ur.kpss(NKER$daily.returns, type = "tau"))
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: tau with 6 lags.
##
## Value of test-statistic is: 0.0648
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.119 0.146 0.176 0.216
So we can reject the null of a unit root and we fail to reject the null of stationarity of the returns. How about the correlation functions?
ACF and PACF of Returns
NKER %>% as_tsibble(index=tday) %>% gg_tsdisplay(daily.returns, plot_type = "partial")
They are nominally white noise.
Box.test(NKER$daily.returns, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: NKER$daily.returns
## X-squared = 0.052508, df = 1, p-value = 0.8188
NKER %>% as_tsibble(index=tday) %>% model(ARIMA(daily.returns)) %>% report()
## Series: daily.returns
## Model: ARIMA(0,0,0)
##
## sigma^2 estimated as 0.000425: log likelihood=2260.79
## AIC=-4519.57 AICc=-4519.57 BIC=-4514.75
So the returns have a slightly positive and marginally significant constant. Nike has averaged 0.15 percent returns per day over the period given this very simple representation.
Models
Let’s start with a representation of an AR(p) process.
\[R_{t} = \mu_{R} + \phi_{1}R_{t-1} + \phi_{2}R_{t-2} + \ldots + \phi_{p}R_{t-p} + \epsilon_{t} \]
where
\[ \mathbb{E}[\epsilon_{t}] = 0 \]
and
\[ \mathbb{E}[\epsilon \epsilon^{\prime}] = \sigma^{2} \cdot I_{T} \]
where \(R_{t}\) represent the returns at time \(t\), \(\mu\) is the mean of the stationary process generating \(R_{t}\) and \(\epsilon\) is a white noise error term. Note the stationarity of \(R_t\) is arises so long as the roots of \(1 - \phi_{1}z + \phi_{2}z^2 + \ldots + \phi_{p}z^{p} = 0\) and that the unconditional variance is constant.
ARCH(m)
The ARCH(m) process of Engel (1982) describes the square of \(\epsilon_{t}\) to follow an AR(m) process such that \[ \epsilon_{t}^{2} = \xi + a_{1}\epsilon_{t-1}^{2} + a_{2}\epsilon_{t-2}^{2} + \ldots + a_{m}\epsilon_{t-m}^{2} + \omega_{t} \] with \(\omega\) as white noise. The \(\epsilon\) are errors in forecasting \(R\); the same restriction on the roots will apply from before. The unconditional variance is still a constant described now by \(\frac{\xi}{1-a_{1}-a_{2}-\ldots-a_{m}}\).
There are a few implementations of the basic test for ARCH effects. Let’s have a look at one through six lags. I am not going to tidy this.
1:6 %>% map(function(x) {FinTS::ArchTest(NKER$daily.returns, lags = x)})
## [[1]]
##
## ARCH LM-test; Null hypothesis: no ARCH effects
##
## data: NKER$daily.returns
## Chi-squared = 37.705, df = 1, p-value = 8.23e-10
##
##
## [[2]]
##
## ARCH LM-test; Null hypothesis: no ARCH effects
##
## data: NKER$daily.returns
## Chi-squared = 66.435, df = 2, p-value = 3.747e-15
##
##
## [[3]]
##
## ARCH LM-test; Null hypothesis: no ARCH effects
##
## data: NKER$daily.returns
## Chi-squared = 66.504, df = 3, p-value = 2.392e-14
##
##
## [[4]]
##
## ARCH LM-test; Null hypothesis: no ARCH effects
##
## data: NKER$daily.returns
## Chi-squared = 66.651, df = 4, p-value = 1.155e-13
##
##
## [[5]]
##
## ARCH LM-test; Null hypothesis: no ARCH effects
##
## data: NKER$daily.returns
## Chi-squared = 72.462, df = 5, p-value = 3.147e-14
##
##
## [[6]]
##
## ARCH LM-test; Null hypothesis: no ARCH effects
##
## data: NKER$daily.returns
## Chi-squared = 97.817, df = 6, p-value < 2.2e-16
The evidence suggests the presence of ARCH effects. Let’s model them. For this, we need a way to represent these models in a consistent syntax.
ARCH
options(scipen=7)
NKER.arch <- tseries::garch(NKER$daily.returns,c(0,1))
##
## ***** ESTIMATION WITH ANALYTICAL GRADIENT *****
##
##
## I INITIAL X(I) D(I)
##
## 1 4.037780e-04 1.000e+00
## 2 5.000000e-02 1.000e+00
##
## IT NF F RELDF PRELDF RELDX STPPAR D*STEP NPRELDF
## 0 1 -3.126e+03
## 1 7 -3.127e+03 4.74e-04 7.64e-04 2.8e-04 3.0e+09 2.8e-05 1.16e+06
## 2 8 -3.128e+03 1.92e-05 2.28e-05 2.7e-04 2.0e+00 2.8e-05 1.60e+01
## 3 15 -3.144e+03 5.14e-03 8.32e-03 4.5e-01 2.0e+00 8.3e-02 1.59e+01
## 4 16 -3.149e+03 1.84e-03 1.44e-03 1.8e-01 0.0e+00 5.6e-02 1.44e-03
## 5 18 -3.154e+03 1.46e-03 1.16e-03 1.9e-01 0.0e+00 8.7e-02 1.16e-03
## 6 19 -3.155e+03 4.06e-04 2.89e-04 9.0e-02 0.0e+00 5.5e-02 2.89e-04
## 7 20 -3.156e+03 1.28e-04 1.04e-04 5.8e-02 0.0e+00 4.1e-02 1.04e-04
## 8 21 -3.156e+03 1.11e-05 9.76e-06 2.1e-02 0.0e+00 1.6e-02 9.76e-06
## 9 22 -3.156e+03 2.95e-07 2.83e-07 3.5e-03 0.0e+00 2.7e-03 2.83e-07
## 10 23 -3.156e+03 7.57e-10 7.66e-10 2.0e-04 0.0e+00 1.5e-04 7.66e-10
## 11 24 -3.156e+03 9.69e-13 1.02e-12 1.1e-06 0.0e+00 8.7e-07 1.02e-12
##
## ***** RELATIVE FUNCTION CONVERGENCE *****
##
## FUNCTION -3.155817e+03 RELDX 1.111e-06
## FUNC. EVALS 24 GRAD. EVALS 12
## PRELDF 1.018e-12 NPRELDF 1.018e-12
##
## I FINAL X(I) D(I) G(I)
##
## 1 2.798850e-04 1.000e+00 2.490e-01
## 2 3.900802e-01 1.000e+00 4.005e-05
summary(NKER.arch)
##
## Call:
## tseries::garch(x = NKER$daily.returns, order = c(0, 1))
##
## Model:
## GARCH(0,1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.71023 -0.45067 0.04028 0.59324 9.19373
##
## Coefficient(s):
## Estimate Std. Error t value Pr(>|t|)
## a0 0.000279885 0.000006364 43.981 <2e-16 ***
## a1 0.390080215 0.039840355 9.791 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Diagnostic Tests:
## Jarque Bera Test
##
## data: Residuals
## X-squared = 3656.1, df = 2, p-value < 2.2e-16
##
##
## Box-Ljung test
##
## data: Squared.Residuals
## X-squared = 0.6779, df = 1, p-value = 0.4103
So the model seems to capture the ARCH part in the \(a_{1}\) part as the squared residuals are white noise but the Jarque-Bera test suggests deviations from normality. What about a GARCH process?
NKER.garch <- tseries::garch(NKER$daily.returns,c(1,1))
##
## ***** ESTIMATION WITH ANALYTICAL GRADIENT *****
##
##
## I INITIAL X(I) D(I)
##
## 1 3.825265e-04 1.000e+00
## 2 5.000000e-02 1.000e+00
## 3 5.000000e-02 1.000e+00
##
## IT NF F RELDF PRELDF RELDX STPPAR D*STEP NPRELDF
## 0 1 -3.128e+03
## 1 7 -3.130e+03 5.71e-04 9.12e-04 2.9e-04 3.4e+09 2.9e-05 1.53e+06
## 2 8 -3.130e+03 2.39e-05 2.90e-05 2.8e-04 2.0e+00 2.9e-05 2.04e+01
## 3 15 -3.148e+03 5.89e-03 8.66e-03 4.3e-01 2.0e+00 7.7e-02 2.02e+01
## 4 16 -3.156e+03 2.38e-03 2.85e-03 2.8e-01 2.0e+00 7.7e-02 1.85e+00
## 5 17 -3.162e+03 1.97e-03 2.63e-03 2.2e-01 2.0e+00 7.7e-02 1.39e+00
## 6 18 -3.169e+03 2.19e-03 2.51e-03 1.5e-01 2.0e+00 7.7e-02 3.04e-01
## 7 21 -3.188e+03 5.94e-03 7.74e-03 3.5e-01 1.8e+00 3.1e-01 2.04e-01
## 8 32 -3.192e+03 1.29e-03 3.26e-03 1.8e-05 3.1e+00 2.1e-05 6.86e-03
## 9 33 -3.192e+03 1.24e-04 9.49e-05 1.8e-05 2.0e+00 2.1e-05 3.05e-03
## 10 34 -3.192e+03 1.09e-05 1.20e-05 1.8e-05 2.0e+00 2.1e-05 4.26e-03
## 11 41 -3.194e+03 5.24e-04 7.02e-04 3.9e-02 1.6e+00 4.7e-02 4.12e-03
## 12 42 -3.194e+03 5.88e-05 9.32e-05 2.5e-02 0.0e+00 3.2e-02 9.32e-05
## 13 43 -3.194e+03 2.42e-05 2.07e-05 5.5e-03 0.0e+00 8.0e-03 2.07e-05
## 14 44 -3.194e+03 1.45e-05 8.81e-06 4.0e-03 0.0e+00 5.8e-03 8.81e-06
## 15 45 -3.195e+03 2.06e-05 1.99e-05 9.2e-03 3.3e-01 1.6e-02 2.14e-05
## 16 46 -3.195e+03 4.27e-06 4.07e-06 6.0e-03 0.0e+00 9.8e-03 4.07e-06
## 17 47 -3.195e+03 2.23e-07 2.53e-07 7.2e-04 0.0e+00 9.8e-04 2.53e-07
## 18 48 -3.195e+03 1.61e-09 7.10e-09 8.6e-05 0.0e+00 1.2e-04 7.10e-09
## 19 49 -3.195e+03 1.43e-09 2.66e-10 1.4e-05 0.0e+00 2.0e-05 2.66e-10
## 20 50 -3.195e+03 -1.76e-11 4.81e-14 6.3e-07 0.0e+00 9.5e-07 4.81e-14
##
## ***** RELATIVE FUNCTION CONVERGENCE *****
##
## FUNCTION -3.194540e+03 RELDX 6.275e-07
## FUNC. EVALS 50 GRAD. EVALS 20
## PRELDF 4.805e-14 NPRELDF 4.805e-14
##
## I FINAL X(I) D(I) G(I)
##
## 1 5.442536e-05 1.000e+00 -5.739e-01
## 2 1.978270e-01 1.000e+00 -3.235e-04
## 3 6.802930e-01 1.000e+00 -6.299e-04
summary(NKER.garch)
##
## Call:
## tseries::garch(x = NKER$daily.returns, order = c(1, 1))
##
## Model:
## GARCH(1,1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.18910 -0.46830 0.04642 0.59649 10.37362
##
## Coefficient(s):
## Estimate Std. Error t value Pr(>|t|)
## a0 0.00005443 0.00000839 6.487 0.0000000000877 ***
## a1 0.19782696 0.03196108 6.190 0.0000000006031 ***
## b1 0.68029302 0.04434770 15.340 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Diagnostic Tests:
## Jarque Bera Test
##
## data: Residuals
## X-squared = 6926.6, df = 2, p-value < 2.2e-16
##
##
## Box-Ljung test
##
## data: Squared.Residuals
## X-squared = 0.23586, df = 1, p-value = 0.6272