Data Science for Insurance: Var and ES estimation

We will use the qrmtools package By Marius Hofert (Hofert et al., 2022) :

library(qrmtools)

In what follows we first simulate losses from a given parametric distribution. In particular, we will consider a Pareto distribution, having CDF

\[ F(x) = 1-(1+x/\kappa)^{-\theta}, \quad x\geq0; \quad \theta,\, \kappa>0 \]

Question 1 Why do we use a Pareto distribution?

#densities
curve(dPar(x, shape = 2, scale=2), from=0, to=8, ylim=c(0,1))
curve(dPar(x, shape = 1/0.1, scale=1/0.1), lty=2, col=2, add = TRUE)
curve(dPar(x, shape = 1), lty=2, col="violet", add = TRUE)

#cdf
curve(pPar(x, shape = 2, scale=2),  from = 0, to= 8, ylim=c(0,1))
curve(pPar(x, shape = 1/0.1, scale=1/0.1), add=T, lty=2, col=2)
curve(pPar(x, shape = 1), lty=2, col="violet", add = TRUE)

plot(pPar(rPar(1000, shape = 2, scale = 2), 
      shape = 2, scale=2), ylab="", xlab="") # should be U(0,1)

The following code will simulate a sample of size \(2500\) from a Pareto distribution with parameters \(\theta=2\) and \(\kappa=1\) (default):

th <- 2                         # Pareto parameter (true underlying model)
set.seed(125)                   # set a seed (for reproducibility)
n <- 2500                       # sample size (~= 10y of daily data)
L <- rPar(n, shape = th)        # simulate losses with the 'inversion method'

Nonparametric estimates of risk measures for a fixed alpha

Let’s start from the estimation of \(VaR_\alpha\) and \(ES_\alpha\) for \(\alpha=0.99\)

alpha <- 0.99

## Value-at-risk
VaR.np <- VaR_np(L, level = alpha)
VaR.np

## [1] 9.947986

## Expected shortfall
ES.np <- ES_np(L, level = alpha)
ES.np

## [1] 17.19785

#visualize on data
plot(L)
abline(h = c(VaR.np, ES.np), lty = 2, col=c(2,6))

Question 2 How can we compute VaR and ES with R built-in functions?

hist(L, freq = F, breaks = 30)  #histogram of loss
abline(v = c(VaR.np, ES.np), lty = 2, col=c(2,6))

We study the behavior in alpha, that is, we plot the nonparametric \(VaR_\alpha\) and \(ES_\alpha\) estimators as functions of alpha:

q<-seq(0.5, 5, by = 0.05) # alpha values concentrated near 1
alpha <- 1-10^-q 

# estimate VaR for all alpha
VaR. <- VaR_np(L, level = alpha) 

# estimate ES for all alpha
ES. <- ES_np(L, level = alpha, verbose = TRUE)

## Compare with theoretical VaR and ES values (under the Pareto model)
VaR.Par. <- VaR_Par(alpha, shape = th)
ES.Par. <-  ES_Par(alpha, shape = th)

The code below can be used to plot the estimated risk measures with `true’ \(VaR_\alpha\) and \(ES_\alpha\) values

ran <- range(ES.Par., ES., VaR.Par., VaR., na.rm = TRUE)
# true VaR_alpha
plot(alpha, VaR.Par., type = "l", ylim = ran, xlab = expression(alpha),
     ylab = " ") 
title(expression("True"~VaR[alpha]*","~ES[alpha]~"and estimates"))
lines(alpha, ES.Par., type="l", lty=2) # true ES (Pareto)
lines(alpha, VaR., type="l", col=2) # non par. estimates of Var
lines(alpha, ES., type="l", col=5) # non par. estimates of ES
legend("topleft", bty = "n", lty=c(1,2,1,1), col=c(1,1,2,5), 
       legend = c(expression(VaR[alpha]), expression(ES[alpha]), 
                  expression(widehat(VaR)[alpha]), expression(widehat(ES)[alpha])))

We can use logarithmic y-axis to improve visualization:

ran <- range(ES.Par., ES., VaR.Par., VaR., na.rm = TRUE)
# true VaR_alpha
plot(alpha, VaR.Par., type = "l", ylim = ran, xlab = expression(alpha),
     ylab = " ", log = "y") 
title(expression("True"~VaR[alpha]*","~ES[alpha]~"and estimates"))
lines(alpha, ES.Par., type="l", lty=2) # true ES (Pareto)
lines(alpha, VaR., type="l", col=2) # non par. estimates of Var
lines(alpha, ES., type="l", col=5) # non par. estimates of ES
legend("topleft", bty = "n", lty=c(1,2,1,1), col=c(1,1,2,5), 
       legend = c(expression(VaR[alpha]), expression(ES[alpha]), 
                  expression(widehat(VaR)[alpha]), expression(widehat(ES)[alpha])))

An alternative is to consider a log transformation on both x and y, and look at the functions in \(1-\alpha\):

ran <- range(ES.Par., ES., VaR.Par., VaR., na.rm = TRUE)
# true VaR_alpha
plot(1-alpha, VaR.Par., type = "l", ylim = ran, xlab = expression(1-alpha),
     ylab = " ", log = "xy") 
title(expression("True"~VaR[alpha]*","~ES[alpha]~"and estimates"))
lines(1-alpha, ES.Par., type="l", lty=2) # true ES (Pareto)
lines(1-alpha, VaR., type="l", col=2) # non par. estimates of Var
lines(1-alpha, ES., type="l", col=5) # non par. estimates of ES
legend("bottomleft", bty = "n", lty=c(1,2,1,1), col=c(1,1,2,5), 
       legend = c(expression(VaR[alpha]), expression(ES[alpha]), 
                  expression(widehat(VaR)[alpha]), expression(widehat(ES)[alpha])))

Bootstrapping confidence intervals for \(VaR_\alpha\), \(ES_\alpha\)

We first define the function bootstrapRM to create the matrix containing the bootstrapped estimators having dimensions \(\mbox{(length(alpha))}\times B\), where B is the number of bootstrap replications:

bootstrapRM<-function(x, B, level, method=c("VaR", "ES"))
{
 method<-match.arg(method)
 rm<-if(method=="VaR"){
 VaR_np
 }else{
   function(x, level) ES_np(x, level=level, verbose = TRUE)
 }
  x.B<-matrix(sample(x, size=n*B, replace = TRUE), ncol = B) # nxB matrix
  apply(x.B, 2, rm, level=level)  #length(level)xB - matrix
 }

In addition, the following function returns the empirical quantiles of order \(v/2\) and \((1-v/2)\), for \(v \in (0,1)\) (dafault is \(v=0.05\)):

Clev<-function(x, v=0.05, na.rm=FALSE)   
{
  quantile(x, c(v/2, 1-v/2), na.rm=na.rm, names=FALSE) 
}

The following code computes bootstrapped statistics and confidence intervals for VaR estimators as functions in \(\alpha\):

set.seed(125)
B<-1000
VaR.boot<-bootstrapRM(L, B=B, level=alpha)
str(VaR.boot)

##  num [1:91, 1:1000] 0.758 0.866 1.001 1.071 1.181 ...

#mean of VaR estimators
VaR.boot.<-rowMeans(VaR.boot) 

#variance of VaR estimators
VaR.boot.var<- apply(VaR.boot, 1, var)

# CI 95%
VaR.boot.CI<- apply(VaR.boot, 1, Clev)

Similarly, we bootstrap the ES estimator, and obtain the bootstrapped mean, variance and 95% confidence intervals for \(ES_\alpha\).

set.seed(125)
B<-1000
ES.boot<-bootstrapRM(L, B=B, level=alpha, method = "ES")

sum(is.na(ES.boot))  #check for NaNs

## [1] 36057

NaNs can be due to too few losses exceeding VaR. Let’s check the proportion of NaNs:

percNaN <- 100 * apply(is.na(ES.boot), 1, mean) # % of NaNs for all alpha

plot(1-alpha, percNaN, type = "l", log = "x",
     xlab = expression(1-alpha), ylab = "% of NaN")

## => Becomes a serious issue for alpha >= 0.999

Hence, we compute statistics with NaNs removed

ES.boot.<-rowMeans(ES.boot, na.rm = TRUE) 
ES.boot.var<- apply(ES.boot, 1, var, na.rm = TRUE)
ES.boot.CI<- apply(ES.boot, 1, Clev, na.rm = TRUE)

Finally, we can plot as a function in alpha:

true \(VaR_\alpha\) and \(ES_\alpha\)
the nonparametric estimates \(\hat{VaR}_\alpha\) and \(\hat{ES}_\alpha\)
the bootstrapped mean of \(\hat{VaR}_\alpha\) and \(\hat{ES}_\alpha\)
the bootstrapped 95% confidence intervals for \(VaR_\alpha\) and \(ES_\alpha\)

ran <- range(VaR.Par., # true VaR
             VaR., # nonparametric estimate
             VaR.boot., # bootstrapped estimate 
             VaR.boot.CI, # bootstrapped confidence intervals
             ES.Par., ES., ES.boot., ES.boot.var, ES.boot.CI, na.rm = TRUE) 

# true VaR_alpha
plot(1-alpha, VaR.Par., type = "l", log = "xy",
     ylim = ran, xlab = expression(1-alpha), ylab = "") 
# nonparametrically estimated VaR_alpha
lines(1-alpha, VaR., lty = "dashed", lwd = 2, col = 2) 
# bootstrapped nonparametric estimate of VaR_alpha
lines(1-alpha, VaR.boot., lty = "solid", col = 2) 
# bootstrapped 95% CI
lines(1-alpha, VaR.boot.CI[1,], lty = "dotted", col = 2)
lines(1-alpha, VaR.boot.CI[2,], lty = "dotted", col = 2)

## ES_alpha
lines(1-alpha, ES.Par.) # true ES_alpha
# nonparametrically estimated ES_alpha
lines(1-alpha, ES., lty = "dashed", lwd = 2, col = 4) 
# bootstrapped nonparametric estimate of ES_alpha
lines(1-alpha, ES.boot., lty = "solid", col = 4) 
# bootstrapped 95% CI
lines(1-alpha, ES.boot.CI[1,], lty = "dotted", col = 4) 
lines(1-alpha, ES.boot.CI[2,], lty = "dotted", col = 4)

legend("bottomleft", bty = "n",
       lty = c("solid", rep(c("dashed", "solid", "dotted"), 
                            times = 2), "dashed"),
       col = c("black", rep(c(2, 4), each = 3), "black"),
       legend = c(
         ## VaR_alpha
         expression("True"~VaR[alpha]~"and"~ES[alpha]),
         expression(widehat(VaR)[alpha]),
         expression("Bootstrapped"~~widehat(VaR)[alpha]),
         expression("Bootstrapped 95% CIs"~~widehat(VaR)[alpha]),
         ## ES_alpha
         expression(widehat(ES)[alpha]),
         expression("Bootstrapped"~~widehat(ES)[alpha]),
         expression("Bootstrapped 95% CIs"~~widehat(ES)[alpha])))

Peaks-over-threshold

As an alternative estimation method, the peaks-over-threshold (POT) model for the occurrence of extremes in such data can be implemented. In particular, the GPD (Generalized Pareto Distribution) model for the excess losses over a threshold \(u\) is used to estimate the tail of the underlying loss distribution \(F\) and associated risk measures. Tail probabilities can be derived by means of the function

\[ \bar{F}(x)=P(X>x)=\bar{F}(u)\left(1+\xi\frac{x-u}{\beta}\right)^{-1/\xi}, \mbox{ for } x\geq u \]

where \(\bar{F}(u)=P(X>u)\) and \[ \left(1+\xi\frac{x-u}{\beta}\right)^{-1/\xi}=\bar{F}_u(x-u)=P(X-u>x-u|X>u), \] provided that \(x\geq u\) and \(F_u(x)=P(X-u\leq x|X>u)\) is the excess distribution over threshold \(u\), assumed to be \(GPD(\xi, \beta)\).

Hence, if we know \(F(u)\), tail probabilities can be obtained by using the formula above. The \(VaR_\alpha=q_{\alpha}(F)\) can be derived by solving \(1-\alpha=\bar{F}(x)\) for \(x\), and \(ES_\alpha\) can be calculated as \(ES_\alpha=(1/(1-\alpha))\int_{\alpha}^1 q_x(F)\mbox{d}x\). We get

\[ VaR_{\alpha}=u+\frac{\beta}{\xi}\left(\left(\frac{1-\alpha}{\bar{F}(u)}\right)^{-\xi}-1\right); \quad ES_{\alpha}=\frac{VaR_{\alpha}}{1-\xi}+\frac{\beta-\xi u}{1-\xi} \]

Estimation in practice considers the following main steps:

For a fixed \(u\) (e.g. a high quantile) we first estimate the parameters of a GPD model by fitting this distribution to the excess losses; denote these estimates with \(\hat{\xi}\) (shape) and \(\hat{\beta}\) (scale)
\(\bar{F}(u)\) is estimated by \(\hat{\bar{F}}(u)=N_{u}/n\), where \(N_u\) is the number of excess losses
\(\hat{\bar{F}}(u)\), \(\hat{\xi}\) and \(\hat{\beta}\) are replaced in the expressions for \(VaR_{\alpha}\) and \(ES_{\alpha}\), given \(\alpha \in (0,1)\).

u <- quantile(L, probs = 0.95, names = FALSE)

eL <- L[L > u] - u            # compute the excesses over u
fit <- fit_GPD_MLE(eL)        # fit a GPD to the excesses
xi <- fit$par[["shape"]]      # fitted shape xi
beta <- fit$par[["scale"]]    # fitted scale beta
Fu <- length(eL) / length(L) # number of excesses / number of losses = N_u / n
VaR.POT <- u + (beta/xi)*(((1-alpha)/Fu)^(-xi)-1) 
ES.POT <- (VaR.POT + beta-xi*u) / (1-xi)

plot(1-alpha, VaR.Par., type = "l", log = "xy",
     ylim = ran, xlab = expression(1-alpha), ylab = "") 
lines(1-alpha, ES.Par.) # true ES_alpha
##  peaks-over-threshold-based estimators
lines(1-alpha, VaR.POT, lty = "dashed")
lines(1-alpha, ES.POT,  lty = "dashed")
legend("bottomleft", bty = "n", lty = c("solid", "dashed"),
       legend=c(expression("True"~VaR[alpha]~"and"~ES[alpha]),
                expression("POT-based"~VaR[alpha]~"and"~ES[alpha]~"estimates")))

## note: CI can be constructed by means of the likelihood ratio test (not shown here)

Hofert M, Hornik K, McNeil A (2022). qrmtools: Tools for Quantitative Risk Management. R package version 0.0-16, https://CRAN.R-project.org/package=qrmtools.

Data Science for Insurance: Var and ES estimation

March 2024

Nonparametric estimates of risk measures for a fixed alpha

Bootstrapping confidence intervals for \(VaR_\alpha\), \(ES_\alpha\)

Peaks-over-threshold