---
title: "Var and ES estimation"
output: html_document
date: '2022-04-06'
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

We will use the **qrmtools** package By Marius Hofert (version 0.0-14):  

```{r, message=FALSE}
library(qrmtools)
```

In what follows we first simulate losses from a given parametric distribution. In particular, we will consider a Pareto distribution, having CDF

$$
F(x) = 1-(1+x/\kappa)^{-\theta}, \quad x\geq0; \quad \theta,\, \kappa>0 
$$

**Question 1** Why do we use a Pareto distribution?

```{r}
#densities
curve(dPar(x, shape = 2, scale=2), from=0, to=8, ylim=c(0,1))
curve(dPar(x, shape = 1/0.1, scale=1/0.1), lty=2, col=2, add = TRUE)
curve(dPar(x, shape = 1), lty=2, col="violet", add = TRUE)
```

```{r}
#cdf
curve(pPar(x, shape = 2, scale=2),  from = 0, to= 8, ylim=c(0,1))
curve(pPar(x, shape = 1/0.1, scale=1/0.1), add=T, lty=2, col=2)
curve(pPar(x, shape = 1), lty=2, col="violet", add = TRUE)
```

```{r}
plot(pPar(rPar(1000, shape = 2, scale = 2), 
      shape = 2, scale=2), ylab="", xlab="") # should be U(0,1)
```

The following code will simulate a sample of size $2500$ from a Pareto distribution with parameters $\theta=2$ and $\kappa=1$ (default):


```{r}
th <- 2                         # Pareto parameter (true underlying model)
set.seed(125)                   # set a seed (for reproducibility)
n <- 2500                       # sample size (~= 10y of daily data)
L <- rPar(n, shape = th)        # simulate losses with the 'inversion method' 
```

#### Nonparametric estimates of risk measures for a fixed alpha 

Let's start from the estimation of $VaR_\alpha$ and $ES_\alpha$ for $\alpha=0.99$ 

```{r, warning=FALSE}
alpha <- 0.99

## Value-at-risk
VaR.np <- VaR_np(L, level = alpha)
VaR.np

## Expected shortfall
ES.np <- ES_np(L, level = alpha)
ES.np
```

```{r, warning=FALSE}
#visualize on data
plot(L)
abline(h = c(VaR.np, ES.np), lty = 2, col=c(2,6))
```

**Question 2** How can we compute VaR and ES with R built-in functions?

```{r}
hist(L, freq = F, breaks = 30)  #histogram of loss
abline(v = c(VaR.np, ES.np), lty = 2, col=c(2,6))
```

We study the behavior in alpha, that is, we plot the nonparametric $VaR_\alpha$ and $ES_\alpha$ estimators as functions of alpha:

```{r, warning=FALSE}
q<-seq(0.5, 5, by = 0.05) # alpha values concentrated near 1
alpha <- 1-10^-q 

# estimate VaR for all alpha
VaR. <- VaR_np(L, level = alpha) 

# estimate ES for all alpha
ES. <- ES_np(L, level = alpha, verbose = TRUE)

## Compare with theoretical VaR and ES values (under the Pareto model)
VaR.Par. <- VaR_Par(alpha, shape = th)
ES.Par. <-  ES_Par(alpha, shape = th)
```

The code below can be used to plot the estimated risk measures with `true' $VaR_\alpha$ and $ES_\alpha$ values 

```{r}
ran <- range(ES.Par., ES., VaR.Par., VaR., na.rm = TRUE)
# true VaR_alpha
plot(alpha, VaR.Par., type = "l", ylim = ran, xlab = expression(alpha),
     ylab = " ") 
title(expression("True"~VaR[alpha]*","~ES[alpha]~"and estimates"))
lines(alpha, ES.Par., type="l", lty=2) # true ES (Pareto)
lines(alpha, VaR., type="l", col=2) # non par. estimates of Var
lines(alpha, ES., type="l", col=5) # non par. estimates of ES
legend("topleft", bty = "n", lty=c(1,2,1,1), col=c(1,1,2,5), 
       legend = c(expression(VaR[alpha]), expression(ES[alpha]), 
                  expression(widehat(VaR)[alpha]), expression(widehat(ES)[alpha])))

```

We can use logarithmic y-axis to improve visualization:

```{r}
ran <- range(ES.Par., ES., VaR.Par., VaR., na.rm = TRUE)
# true VaR_alpha
plot(alpha, VaR.Par., type = "l", ylim = ran, xlab = expression(alpha),
     ylab = " ", log = "y") 
title(expression("True"~VaR[alpha]*","~ES[alpha]~"and estimates"))
lines(alpha, ES.Par., type="l", lty=2) # true ES (Pareto)
lines(alpha, VaR., type="l", col=2) # non par. estimates of Var
lines(alpha, ES., type="l", col=5) # non par. estimates of ES
legend("topleft", bty = "n", lty=c(1,2,1,1), col=c(1,1,2,5), 
       legend = c(expression(VaR[alpha]), expression(ES[alpha]), 
                  expression(widehat(VaR)[alpha]), expression(widehat(ES)[alpha])))
```

An alternative is to consider a log transformation on both x and y, and look at the functions in  $1-\alpha$:

```{r}
ran <- range(ES.Par., ES., VaR.Par., VaR., na.rm = TRUE)
# true VaR_alpha
plot(1-alpha, VaR.Par., type = "l", ylim = ran, xlab = expression(1-alpha),
     ylab = " ", log = "xy") 
title(expression("True"~VaR[alpha]*","~ES[alpha]~"and estimates"))
lines(1-alpha, ES.Par., type="l", lty=2) # true ES (Pareto)
lines(1-alpha, VaR., type="l", col=2) # non par. estimates of Var
lines(1-alpha, ES., type="l", col=5) # non par. estimates of ES
legend("bottomleft", bty = "n", lty=c(1,2,1,1), col=c(1,1,2,5), 
       legend = c(expression(VaR[alpha]), expression(ES[alpha]), 
                  expression(widehat(VaR)[alpha]), expression(widehat(ES)[alpha])))

```

#### Bootstrapping confidence intervals for $VaR_\alpha$, $ES_\alpha$ 

We first define the function **bootstrapRM** to create the matrix containing the bootstrapped estimators having dimensions $\mbox{(length(alpha))}\times B$, where B is the number of bootstrap replications:

```{r}
bootstrapRM<-function(x, B, level, method=c("VaR", "ES"))
{
 method<-match.arg(method)
 rm<-if(method=="VaR"){
 VaR_np
 }else{
   function(x, level) ES_np(x, level=level, verbose = TRUE)
 }
  x.B<-matrix(sample(x, size=n*B, replace = TRUE), ncol = B) # nxB matrix
  apply(x.B, 2, rm, level=level)  #length(level)xB - matrix
 }
```

In addition, the following function returns the empirical quantiles of order $v/2$ and $(1-v/2)$, for $v \in (0,1)$ (dafault is $v=0.05$):

```{r}
Clev<-function(x, v=0.05, na.rm=FALSE)   
{
  quantile(x, c(v/2, 1-v/2), na.rm=na.rm, names=FALSE) 
}
```

The following code computes bootstrapped statistics and confidence intervals for VaR estimators as functions in $\alpha$:

```{r}
set.seed(125)
B<-1000
VaR.boot<-bootstrapRM(L, B=B, level=alpha)
str(VaR.boot)

#mean of VaR estimators
VaR.boot.<-rowMeans(VaR.boot) 

#variance of VaR estimators
VaR.boot.var<- apply(VaR.boot, 1, var)

# CI 95%
VaR.boot.CI<- apply(VaR.boot, 1, Clev)
```  

Similarly, we bootstrap the ES estimator, and obtain the bootstrapped mean, variance and 95\% confidence intervals for $ES_\alpha$.  

```{r, warning=FALSE}
set.seed(125)
B<-1000
ES.boot<-bootstrapRM(L, B=B, level=alpha, method = "ES")

sum(is.na(ES.boot))  #check for NaNs
```

NaNs can be due to too few losses exceeding VaR. Let's check the proportion of NaNs: 

```{r, warning=FALSE}
percNaN <- 100 * apply(is.na(ES.boot), 1, mean) # % of NaNs for all alpha

plot(1-alpha, percNaN, type = "l", log = "x",
     xlab = expression(1-alpha), ylab = "% of NaN")
## => Becomes a serious issue for alpha >= 0.999 
```
Hence,  we compute statistics with NaNs removed

```{r}
ES.boot.<-rowMeans(ES.boot, na.rm = TRUE) 
ES.boot.var<- apply(ES.boot, 1, var, na.rm = TRUE)
ES.boot.CI<- apply(ES.boot, 1, Clev, na.rm = TRUE)
```

Finally, we can plot as a function in alpha:

 - true $VaR_\alpha$ and $ES_\alpha$

 - the nonparametric estimates $\hat{VaR}_\alpha$ and $\hat{ES}_\alpha$

 - the bootstrapped mean of $\hat{VaR}_\alpha$ and $\hat{ES}_\alpha$

 - the bootstrapped 95\% confidence intervals for $VaR_\alpha$ and $ES_\alpha$


```{r, warning=FALSE}
ran <- range(VaR.Par., # true VaR
             VaR., # nonparametric estimate
             VaR.boot., # bootstrapped estimate 
             VaR.boot.CI, # bootstrapped confidence intervals
             ES.Par., ES., ES.boot., ES.boot.var, ES.boot.CI, na.rm = TRUE) 

# true VaR_alpha
plot(1-alpha, VaR.Par., type = "l", log = "xy",
     ylim = ran, xlab = expression(1-alpha), ylab = "") 
# nonparametrically estimated VaR_alpha
lines(1-alpha, VaR., lty = "dashed", lwd = 2, col = 2) 
# bootstrapped nonparametric estimate of VaR_alpha
lines(1-alpha, VaR.boot., lty = "solid", col = 2) 
# bootstrapped 95% CI
lines(1-alpha, VaR.boot.CI[1,], lty = "dotted", col = 2)
lines(1-alpha, VaR.boot.CI[2,], lty = "dotted", col = 2)

## ES_alpha
lines(1-alpha, ES.Par.) # true ES_alpha
# nonparametrically estimated ES_alpha
lines(1-alpha, ES., lty = "dashed", lwd = 2, col = 4) 
# bootstrapped nonparametric estimate of ES_alpha
lines(1-alpha, ES.boot., lty = "solid", col = 4) 
# bootstrapped 95% CI
lines(1-alpha, ES.boot.CI[1,], lty = "dotted", col = 4) 
lines(1-alpha, ES.boot.CI[2,], lty = "dotted", col = 4)

legend("bottomleft", bty = "n",
       lty = c("solid", rep(c("dashed", "solid", "dotted"), 
                            times = 2), "dashed"),
       col = c("black", rep(c(2, 4), each = 3), "black"),
       legend = c(
         ## VaR_alpha
         expression("True"~VaR[alpha]~"and"~ES[alpha]),
         expression(widehat(VaR)[alpha]),
         expression("Bootstrapped"~~widehat(VaR)[alpha]),
         expression("Bootstrapped 95% CIs"~~widehat(VaR)[alpha]),
         ## ES_alpha
         expression(widehat(ES)[alpha]),
         expression("Bootstrapped"~~widehat(ES)[alpha]),
         expression("Bootstrapped 95% CIs"~~widehat(ES)[alpha])))
```

#### Peaks-over-threshold

As an alternative estimation method, the peaks-over-threshold (POT) model for the occurrence of extremes in such data can be implemented. In particular, the GPD (Generalized Pareto Distribution) model for the excess losses over a threshold $u$ is used to estimate the tail of the underlying loss distribution $F$ and associated risk measures. Tail probabilities can be derived by means of the function 

$$
\bar{F}(x)=P(X>x)=\bar{F}(u)\left(1+\xi\frac{x-u}{\beta}\right)^{-1/\xi}, \mbox{ for } x\geq u
$$

where $\bar{F}(u)=P(X>u)$ and 
$$
\left(1+\xi\frac{x-u}{\beta}\right)^{-1/\xi}=\bar{F}_u(x-u)=P(X-u>x-u|X>u),
$$
provided that $x\geq u$ and $F_u(x)=P(X-u\leq x|X>u)$ is the excess distribution over threshold $u$, assumed to be $GPD(\xi, \beta)$.

Hence, if we know $F(u)$, tail probabilities can be obtained by using the formula above. The $VaR_\alpha=q_{\alpha}(F)$ can be derived by solving $1-\alpha=\bar{F}(x)$ for $x$, and $ES_\alpha$ can be calculated as $ES_\alpha=(1/(1-\alpha))\int_{\alpha}^1 q_x(F)\mbox{d}x$. We get

$$
VaR_{\alpha}=u+\frac{\beta}{\xi}\left(\left(\frac{1-\alpha}{\bar{F}(u)}\right)^{-\xi}-1\right); \quad ES_{\alpha}=\frac{VaR_{\alpha}}{1-\xi}+\frac{\beta-\xi u}{1-\xi}
$$


Estimation in practice considers the following main steps:

 - For a fixed $u$ (e.g. a high quantile) we first estimate the parameters of a GPD model by fitting this distribution to the excess losses; denote these estimates with $\hat{\xi}$ (shape) and $\hat{\beta}$ (scale)
 - $\bar{F}(u)$ is estimated by $\hat{\bar{F}}(u)=N_{u}/n$, where $N_u$ is the number of excess losses 
 - $\hat{\bar{F}}(u)$, $\hat{\xi}$ and $\hat{\beta}$ are replaced in the expressions for $VaR_{\alpha}$ and $ES_{\alpha}$, given $\alpha \in (0,1)$. 
  

```{r}
u <- quantile(L, probs = 0.95, names = FALSE)

eL <- L[L > u] - u            # compute the excesses over u
fit <- fit_GPD_MLE(eL)        # fit a GPD to the excesses
xi <- fit$par[["shape"]]      # fitted shape xi
beta <- fit$par[["scale"]]    # fitted scale beta
Fu <- length(eL) / length(L) # number of excesses / number of losses = N_u / n
VaR.POT <- u + (beta/xi)*(((1-alpha)/Fu)^(-xi)-1) 
ES.POT <- (VaR.POT + beta-xi*u) / (1-xi) 

```

```{r, warning=FALSE}
plot(1-alpha, VaR.Par., type = "l", log = "xy",
     ylim = ran, xlab = expression(1-alpha), ylab = "") 
lines(1-alpha, ES.Par.) # true ES_alpha
##  peaks-over-threshold-based estimators
lines(1-alpha, VaR.POT, lty = "dashed")
lines(1-alpha, ES.POT,  lty = "dashed")
legend("bottomleft", bty = "n", lty = c("solid", "dashed"),
       legend=c(expression("True"~VaR[alpha]~"and"~ES[alpha]),
                expression("POT-based"~VaR[alpha]~"and"~ES[alpha]~"estimates")))
## note: CI can be constructed by means of the likelihood ratio test (not shown here)
```