---
title: "Var and ES estimation"
output: html_document
date: '2022-04-06'
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
We will use the **qrmtools** package By Marius Hofert (version 0.0-14):
```{r, message=FALSE}
library(qrmtools)
```
In what follows we first simulate losses from a given parametric distribution. In particular, we will consider a Pareto distribution, having CDF
$$
F(x) = 1-(1+x/\kappa)^{-\theta}, \quad x\geq0; \quad \theta,\, \kappa>0
$$
**Question 1** Why do we use a Pareto distribution?
```{r}
#densities
curve(dPar(x, shape = 2, scale=2), from=0, to=8, ylim=c(0,1))
curve(dPar(x, shape = 1/0.1, scale=1/0.1), lty=2, col=2, add = TRUE)
curve(dPar(x, shape = 1), lty=2, col="violet", add = TRUE)
```
```{r}
#cdf
curve(pPar(x, shape = 2, scale=2), from = 0, to= 8, ylim=c(0,1))
curve(pPar(x, shape = 1/0.1, scale=1/0.1), add=T, lty=2, col=2)
curve(pPar(x, shape = 1), lty=2, col="violet", add = TRUE)
```
```{r}
plot(pPar(rPar(1000, shape = 2, scale = 2),
shape = 2, scale=2), ylab="", xlab="") # should be U(0,1)
```
The following code will simulate a sample of size $2500$ from a Pareto distribution with parameters $\theta=2$ and $\kappa=1$ (default):
```{r}
th <- 2 # Pareto parameter (true underlying model)
set.seed(125) # set a seed (for reproducibility)
n <- 2500 # sample size (~= 10y of daily data)
L <- rPar(n, shape = th) # simulate losses with the 'inversion method'
```
#### Nonparametric estimates of risk measures for a fixed alpha
Let's start from the estimation of $VaR_\alpha$ and $ES_\alpha$ for $\alpha=0.99$
```{r, warning=FALSE}
alpha <- 0.99
## Value-at-risk
VaR.np <- VaR_np(L, level = alpha)
VaR.np
## Expected shortfall
ES.np <- ES_np(L, level = alpha)
ES.np
```
```{r, warning=FALSE}
#visualize on data
plot(L)
abline(h = c(VaR.np, ES.np), lty = 2, col=c(2,6))
```
**Question 2** How can we compute VaR and ES with R built-in functions?
```{r}
hist(L, freq = F, breaks = 30) #histogram of loss
abline(v = c(VaR.np, ES.np), lty = 2, col=c(2,6))
```
We study the behavior in alpha, that is, we plot the nonparametric $VaR_\alpha$ and $ES_\alpha$ estimators as functions of alpha:
```{r, warning=FALSE}
q<-seq(0.5, 5, by = 0.05) # alpha values concentrated near 1
alpha <- 1-10^-q
# estimate VaR for all alpha
VaR. <- VaR_np(L, level = alpha)
# estimate ES for all alpha
ES. <- ES_np(L, level = alpha, verbose = TRUE)
## Compare with theoretical VaR and ES values (under the Pareto model)
VaR.Par. <- VaR_Par(alpha, shape = th)
ES.Par. <- ES_Par(alpha, shape = th)
```
The code below can be used to plot the estimated risk measures with `true' $VaR_\alpha$ and $ES_\alpha$ values
```{r}
ran <- range(ES.Par., ES., VaR.Par., VaR., na.rm = TRUE)
# true VaR_alpha
plot(alpha, VaR.Par., type = "l", ylim = ran, xlab = expression(alpha),
ylab = " ")
title(expression("True"~VaR[alpha]*","~ES[alpha]~"and estimates"))
lines(alpha, ES.Par., type="l", lty=2) # true ES (Pareto)
lines(alpha, VaR., type="l", col=2) # non par. estimates of Var
lines(alpha, ES., type="l", col=5) # non par. estimates of ES
legend("topleft", bty = "n", lty=c(1,2,1,1), col=c(1,1,2,5),
legend = c(expression(VaR[alpha]), expression(ES[alpha]),
expression(widehat(VaR)[alpha]), expression(widehat(ES)[alpha])))
```
We can use logarithmic y-axis to improve visualization:
```{r}
ran <- range(ES.Par., ES., VaR.Par., VaR., na.rm = TRUE)
# true VaR_alpha
plot(alpha, VaR.Par., type = "l", ylim = ran, xlab = expression(alpha),
ylab = " ", log = "y")
title(expression("True"~VaR[alpha]*","~ES[alpha]~"and estimates"))
lines(alpha, ES.Par., type="l", lty=2) # true ES (Pareto)
lines(alpha, VaR., type="l", col=2) # non par. estimates of Var
lines(alpha, ES., type="l", col=5) # non par. estimates of ES
legend("topleft", bty = "n", lty=c(1,2,1,1), col=c(1,1,2,5),
legend = c(expression(VaR[alpha]), expression(ES[alpha]),
expression(widehat(VaR)[alpha]), expression(widehat(ES)[alpha])))
```
An alternative is to consider a log transformation on both x and y, and look at the functions in $1-\alpha$:
```{r}
ran <- range(ES.Par., ES., VaR.Par., VaR., na.rm = TRUE)
# true VaR_alpha
plot(1-alpha, VaR.Par., type = "l", ylim = ran, xlab = expression(1-alpha),
ylab = " ", log = "xy")
title(expression("True"~VaR[alpha]*","~ES[alpha]~"and estimates"))
lines(1-alpha, ES.Par., type="l", lty=2) # true ES (Pareto)
lines(1-alpha, VaR., type="l", col=2) # non par. estimates of Var
lines(1-alpha, ES., type="l", col=5) # non par. estimates of ES
legend("bottomleft", bty = "n", lty=c(1,2,1,1), col=c(1,1,2,5),
legend = c(expression(VaR[alpha]), expression(ES[alpha]),
expression(widehat(VaR)[alpha]), expression(widehat(ES)[alpha])))
```
#### Bootstrapping confidence intervals for $VaR_\alpha$, $ES_\alpha$
We first define the function **bootstrapRM** to create the matrix containing the bootstrapped estimators having dimensions $\mbox{(length(alpha))}\times B$, where B is the number of bootstrap replications:
```{r}
bootstrapRM<-function(x, B, level, method=c("VaR", "ES"))
{
method<-match.arg(method)
rm<-if(method=="VaR"){
VaR_np
}else{
function(x, level) ES_np(x, level=level, verbose = TRUE)
}
x.B<-matrix(sample(x, size=n*B, replace = TRUE), ncol = B) # nxB matrix
apply(x.B, 2, rm, level=level) #length(level)xB - matrix
}
```
In addition, the following function returns the empirical quantiles of order $v/2$ and $(1-v/2)$, for $v \in (0,1)$ (dafault is $v=0.05$):
```{r}
Clev<-function(x, v=0.05, na.rm=FALSE)
{
quantile(x, c(v/2, 1-v/2), na.rm=na.rm, names=FALSE)
}
```
The following code computes bootstrapped statistics and confidence intervals for VaR estimators as functions in $\alpha$:
```{r}
set.seed(125)
B<-1000
VaR.boot<-bootstrapRM(L, B=B, level=alpha)
str(VaR.boot)
#mean of VaR estimators
VaR.boot.<-rowMeans(VaR.boot)
#variance of VaR estimators
VaR.boot.var<- apply(VaR.boot, 1, var)
# CI 95%
VaR.boot.CI<- apply(VaR.boot, 1, Clev)
```
Similarly, we bootstrap the ES estimator, and obtain the bootstrapped mean, variance and 95\% confidence intervals for $ES_\alpha$.
```{r, warning=FALSE}
set.seed(125)
B<-1000
ES.boot<-bootstrapRM(L, B=B, level=alpha, method = "ES")
sum(is.na(ES.boot)) #check for NaNs
```
NaNs can be due to too few losses exceeding VaR. Let's check the proportion of NaNs:
```{r, warning=FALSE}
percNaN <- 100 * apply(is.na(ES.boot), 1, mean) # % of NaNs for all alpha
plot(1-alpha, percNaN, type = "l", log = "x",
xlab = expression(1-alpha), ylab = "% of NaN")
## => Becomes a serious issue for alpha >= 0.999
```
Hence, we compute statistics with NaNs removed
```{r}
ES.boot.<-rowMeans(ES.boot, na.rm = TRUE)
ES.boot.var<- apply(ES.boot, 1, var, na.rm = TRUE)
ES.boot.CI<- apply(ES.boot, 1, Clev, na.rm = TRUE)
```
Finally, we can plot as a function in alpha:
- true $VaR_\alpha$ and $ES_\alpha$
- the nonparametric estimates $\hat{VaR}_\alpha$ and $\hat{ES}_\alpha$
- the bootstrapped mean of $\hat{VaR}_\alpha$ and $\hat{ES}_\alpha$
- the bootstrapped 95\% confidence intervals for $VaR_\alpha$ and $ES_\alpha$
```{r, warning=FALSE}
ran <- range(VaR.Par., # true VaR
VaR., # nonparametric estimate
VaR.boot., # bootstrapped estimate
VaR.boot.CI, # bootstrapped confidence intervals
ES.Par., ES., ES.boot., ES.boot.var, ES.boot.CI, na.rm = TRUE)
# true VaR_alpha
plot(1-alpha, VaR.Par., type = "l", log = "xy",
ylim = ran, xlab = expression(1-alpha), ylab = "")
# nonparametrically estimated VaR_alpha
lines(1-alpha, VaR., lty = "dashed", lwd = 2, col = 2)
# bootstrapped nonparametric estimate of VaR_alpha
lines(1-alpha, VaR.boot., lty = "solid", col = 2)
# bootstrapped 95% CI
lines(1-alpha, VaR.boot.CI[1,], lty = "dotted", col = 2)
lines(1-alpha, VaR.boot.CI[2,], lty = "dotted", col = 2)
## ES_alpha
lines(1-alpha, ES.Par.) # true ES_alpha
# nonparametrically estimated ES_alpha
lines(1-alpha, ES., lty = "dashed", lwd = 2, col = 4)
# bootstrapped nonparametric estimate of ES_alpha
lines(1-alpha, ES.boot., lty = "solid", col = 4)
# bootstrapped 95% CI
lines(1-alpha, ES.boot.CI[1,], lty = "dotted", col = 4)
lines(1-alpha, ES.boot.CI[2,], lty = "dotted", col = 4)
legend("bottomleft", bty = "n",
lty = c("solid", rep(c("dashed", "solid", "dotted"),
times = 2), "dashed"),
col = c("black", rep(c(2, 4), each = 3), "black"),
legend = c(
## VaR_alpha
expression("True"~VaR[alpha]~"and"~ES[alpha]),
expression(widehat(VaR)[alpha]),
expression("Bootstrapped"~~widehat(VaR)[alpha]),
expression("Bootstrapped 95% CIs"~~widehat(VaR)[alpha]),
## ES_alpha
expression(widehat(ES)[alpha]),
expression("Bootstrapped"~~widehat(ES)[alpha]),
expression("Bootstrapped 95% CIs"~~widehat(ES)[alpha])))
```
#### Peaks-over-threshold
As an alternative estimation method, the peaks-over-threshold (POT) model for the occurrence of extremes in such data can be implemented. In particular, the GPD (Generalized Pareto Distribution) model for the excess losses over a threshold $u$ is used to estimate the tail of the underlying loss distribution $F$ and associated risk measures. Tail probabilities can be derived by means of the function
$$
\bar{F}(x)=P(X>x)=\bar{F}(u)\left(1+\xi\frac{x-u}{\beta}\right)^{-1/\xi}, \mbox{ for } x\geq u
$$
where $\bar{F}(u)=P(X>u)$ and
$$
\left(1+\xi\frac{x-u}{\beta}\right)^{-1/\xi}=\bar{F}_u(x-u)=P(X-u>x-u|X>u),
$$
provided that $x\geq u$ and $F_u(x)=P(X-u\leq x|X>u)$ is the excess distribution over threshold $u$, assumed to be $GPD(\xi, \beta)$.
Hence, if we know $F(u)$, tail probabilities can be obtained by using the formula above. The $VaR_\alpha=q_{\alpha}(F)$ can be derived by solving $1-\alpha=\bar{F}(x)$ for $x$, and $ES_\alpha$ can be calculated as $ES_\alpha=(1/(1-\alpha))\int_{\alpha}^1 q_x(F)\mbox{d}x$. We get
$$
VaR_{\alpha}=u+\frac{\beta}{\xi}\left(\left(\frac{1-\alpha}{\bar{F}(u)}\right)^{-\xi}-1\right); \quad ES_{\alpha}=\frac{VaR_{\alpha}}{1-\xi}+\frac{\beta-\xi u}{1-\xi}
$$
Estimation in practice considers the following main steps:
- For a fixed $u$ (e.g. a high quantile) we first estimate the parameters of a GPD model by fitting this distribution to the excess losses; denote these estimates with $\hat{\xi}$ (shape) and $\hat{\beta}$ (scale)
- $\bar{F}(u)$ is estimated by $\hat{\bar{F}}(u)=N_{u}/n$, where $N_u$ is the number of excess losses
- $\hat{\bar{F}}(u)$, $\hat{\xi}$ and $\hat{\beta}$ are replaced in the expressions for $VaR_{\alpha}$ and $ES_{\alpha}$, given $\alpha \in (0,1)$.
```{r}
u <- quantile(L, probs = 0.95, names = FALSE)
eL <- L[L > u] - u # compute the excesses over u
fit <- fit_GPD_MLE(eL) # fit a GPD to the excesses
xi <- fit$par[["shape"]] # fitted shape xi
beta <- fit$par[["scale"]] # fitted scale beta
Fu <- length(eL) / length(L) # number of excesses / number of losses = N_u / n
VaR.POT <- u + (beta/xi)*(((1-alpha)/Fu)^(-xi)-1)
ES.POT <- (VaR.POT + beta-xi*u) / (1-xi)
```
```{r, warning=FALSE}
plot(1-alpha, VaR.Par., type = "l", log = "xy",
ylim = ran, xlab = expression(1-alpha), ylab = "")
lines(1-alpha, ES.Par.) # true ES_alpha
## peaks-over-threshold-based estimators
lines(1-alpha, VaR.POT, lty = "dashed")
lines(1-alpha, ES.POT, lty = "dashed")
legend("bottomleft", bty = "n", lty = c("solid", "dashed"),
legend=c(expression("True"~VaR[alpha]~"and"~ES[alpha]),
expression("POT-based"~VaR[alpha]~"and"~ES[alpha]~"estimates")))
## note: CI can be constructed by means of the likelihood ratio test (not shown here)
```