# cambiare directory # setwd("inserire directory qui") # caricare il data set da un file csv # file "ripulito" per semplicita' # "header = TRUE" cosi' R sa che la prima riga contiene i nomi delle variabili data <- read.csv("perdite.csv", header = TRUE) # cosa c'e' in "data"? str(data) head(data) tail(data) # estraiamo i sinistri (eg, in milioni di euro) claims <- data[, 2] n <- length(claims) # statistiche di base: min, 25% percentile, mediana, 75% percentile, max fivenum(claims) # summary: come fivenum, piu' la media summary(claims) # media, sd, asimmetria, curtosi install.packages("moments") library(moments) c(mean(claims), sd(claims), skewness(claims), kurtosis(claims)) # istogrammi e densita' (nonparametrica) hist(x = claims, breaks = 50, freq = FALSE, ylim = c(0, 1)) lines(x = density(claims), col = "red") # fdr empirica e funzione di sopravvivenza FX <- ecdf(x = claims) SX <- function(x)1 - FX(x) curve(FX, from = 0, to = 15) curve(SX, from = 0, to = 15) # anche, piu' semplice plot(FX) plot(SX) plot(SX, 0, 5) ##################################################### # metodo storico per il calcolo del VaR e ES (TVaR) # ##################################################### # funzione "quantile"; l'opzione "type" permette di scegliere il tipo di quantile # "type = 1" per il quantile sinistro # "type = 4" per la versione continua vista in classe VaR.HS <- function(x, alpha)quantile(x = x, probs = alpha, type = 1) alpha <- 0.95 (VaR <- VaR.HS(x = claims, alpha = alpha)) # quantile e' vettorizzato rispetto a p VaR.HS(x = claims, alpha = c(0.9, 0.95, 0.99, 0.995)) # oppure direttamente # statistica d'ordine s.claims <- sort(claims) s.claims[ceiling(alpha * n)] # plot p <- seq(0.8, 1, by = 0.01) plot(p, VaR.HS(x = claims, alpha = p), type = "b", ylim = range(claims)) # ES ES.HS <- function(x, alpha){ VaR <- VaR.HS(x = x, alpha = alpha) n <- length(x) res <- (sum(x[x > VaR]) + (n * alpha - floor(n * alpha)) * VaR) / (n * (1 - alpha)) ifelse(alpha == 1, max(x), res) } ES.HS(x = claims, alpha = alpha) # ES non e' vettorizzato rispetto alpha! ES.HS(x = claims, alpha = c(0.9, 0.95, 0.99, 0.995)) # sbagliato! # corretto, con sapply sapply(X = c(0.9, 0.95, 0.99, 0.995), FUN = ES.HS, x = claims) lines(p, sapply(X = p, FUN = ES.HS, x = claims), col = "red") # usando la definizione # integrate(f = function(s)quantile(x = claims, probs = s, type = 1), lower = p, upper = 1, subdivisions = 1000)$value / (1 - p) # non funziona install.packages("cubature") library(cubature) hcubature(f = function(beta)quantile(x = claims, probs = beta, type = 1), lowerLimit = alpha, upperLimit = 1, tol = 1E-10, maxEval = 10000)$integral / (1 - alpha) # TVaR (TVaR_l.HS <- mean(claims[claims >= VaR])) (TVaR_u.HS <- mean(claims[claims > VaR])) # quanti elementi nella media? sum(claims > VaR) # intervalli di confidenza per VaR e TVaR? bootstrap nonparametrico # dimensione bootstrap m <- 10000 # prepara vettori VaR.bootstrap <- ES.bootstrap <- vector(mode = "numeric", length = m) # ciclo di bootstrap set.seed(0) for(i in 1:m){ temp <- sample(x = claims, size = n, replace = TRUE) VaR.bootstrap[i] <- VaR.HS(x = temp, alpha = alpha) ES.bootstrap[i] <- ES.HS(x = temp, alpha = alpha) } # stima di bootstrap di VaR e ES (VaR.bootstrap.c <- mean(x = VaR.bootstrap)) (ES.bootstrap.c <- mean(x = ES.bootstrap)) # 95% confidence intervals (VaR.bootstrap.ci <- quantile(x = VaR.bootstrap, probs = c(0.025, 0.975), type = 1)) (ES.bootstrap.ci <- quantile(x = ES.bootstrap, probs = c(0.025, 0.975), type = 1)) # distribuzione dei VaR e ES: istogramma hist(x = VaR.bootstrap, breaks = 50, freq = FALSE) abline(v = VaR.bootstrap.c, col = "red") abline(v = VaR.bootstrap.ci, col = "red", lty = 2) hist(x = ES.bootstrap, breaks = 50, freq = FALSE) abline(v = ES.bootstrap.c, col = "red") abline(v = ES.bootstrap.ci, col = "red", lty = 2) # alternativa: pacchetto "boot" # install.packages("boot") library(boot) VaR_ES <- function(dati, indici = 1:length(dati), alpha = 0.95){ selezione <- dati[indici] VaR <- VaR.HS(x = selezione, alpha = alpha) ES <- ES.HS(x = selezione, alpha = alpha) return(c(VaR, ES)) } VaR_ES_bootstrap <- boot(data = claims, statistic = VaR_ES, R = m, sim = "ordinary", alpha = alpha) boot.ci(boot.out = VaR_ES_bootstrap, conf = 0.99, type = "perc", index = 1) boot.ci(boot.out = VaR_ES_bootstrap, conf = 0.99, type = "perc", index = 2) ################################################# # metodo parametrico per il calcolo di VaR e ES # # fitting di una distribuzione # ################################################# install.packages(c("fitdistrplus", "actuar")) library(fitdistrplus) library(actuar) plotdist(data = claims) # fitdistrplus permette il fit di distribuzioni per cui e' definita la densita', funzione di ripartizione e la sua inversa # predefinite: eg dgamma, pgamma, qgamma, (rgamma) # altre: "norm", "lnorm", "pois", "exp", "gamma", "nbinom", "geom", "beta", "unif", "logis" # si possono usare altre distribuzioni purche' siano definite le corrispondenti funzioni e bisogna inserire i valori di partenza dell'algoritmo in "start" # actuar include molte distribuzioni aggiuntive per la perdita spesso usate in GI # Burr, GeneralizedBeta, GeneralizedPareto,Gumbel, InverseBurr, InverseExponential, InverseGamma, InverseGaussian, InverseParalogistic, InversePareto, InverseTransformedGamma, InverseWeibull, Logarithmic, Loggamma, Loglogistic, Paralogistic, Pareto, PoissonInverseGaussian, SingleParameterPareto, TransformedBeta, TransformedGamma, ZeroModifiedBinomia, ZeroModifiedGeometric, ZeroModifiedLogarithmic, ZeroModifiedNegativeBinomial, ZeroModifiedPoisson, ZeroTruncatedBinomial, ZeroTruncatedGeometric, ZeroTruncatedNegativeBinomial, ZeroTruncatedPoisson, # method = "mle": massima verosimiglianza, "mme": metodo dei momenti, "qme": metodo dei quantili # gamma fit1 <- fitdist(data = claims, distr = "gamma", method = "mle") # lognormale fit2 <- fitdist(data = claims, distr = "lnorm", method = "mle") # weibull fit3 <- fitdist(data = claims, distr = "weibull", method = "mle") # pareto (dal pacchetto "actuar") fit4 <- fitdist(data = claims, distr = "pareto", method = "mle") # gamma inversa (dal pacchetto "actuar") fit5 <- fitdist(data = claims, distr = "invgamma", method = "mle") plot(fit1) plot(fit2) plot(fit3) plot(fit4) plot(fit5) # analisi e confronto modelli <- list(fit1, fit2, fit3, fit4, fit5) leg <- c("gamma", "lognormale", "weibull", "pareto", "gamma inversa") par(mfrow = c(1, 1)) # confronto delle densita' e istogramma denscomp(ft = modelli, legendtext = leg, breaks = 50, plotstyle = "ggplot") denscomp(ft = modelli, legendtext = leg, breaks = 50, plotstyle = "ggplot", xlim = c(0, 5)) denscomp(ft = modelli, legendtext = leg, breaks = 50, plotstyle = "ggplot", xlim = c(5, 10), ylim = c(0, 0.025)) # confronto delle fdr cdfcomp(ft = modelli, legendtext = leg) cdfcomp(ft = modelli, legendtext = leg, xlim = c(0, 5)) cdfcomp(ft = modelli, legendtext = leg, xlim = c(0.1, 10), xlogscale = TRUE) # qq e pp plot qqcomp(ft = modelli, legendtext = leg, line01lty = 1) ppcomp(ft = modelli, legendtext = leg, line01lty = 1) # goodness-of-fit statistiche gofstat(f = modelli) # scegliamo la gamma inversa # calcolo di VaR e ES=TVaR # (VaR.par.gamma <- qgamma(p = alpha, shape = fit1$estimate[["shape"]], rate = fit1$estimate[["rate"]])) # (VaR.par.lnorm <- qlnorm(p = alpha, meanlog = fit2$estimate[["meanlog"]], sdlog = fit2$estimate[["sdlog"]])) # (VaR.par.weibull <- qweibull(p = alpha, shape = fit3$estimate[["shape"]], scale = fit3$estimate[["scale"]])) # (VaR.par.pareto <- qpareto(p = alpha, shape = fit4$estimate[["shape"]], scale = fit4$estimate[["scale"]])) ?dinvgamma sh <- fit5$estimate[["shape"]] sc <- fit5$estimate[["scale"]] VaR.par.invgamma <- qinvgamma(p = alpha, shape = sh, scale = sc) # calcolo del ES analitico via integrazione numerica ES.par.invgamma <- integrate(f = function(beta)qinvgamma(p = beta, shape = sh, scale = sc), lower = alpha, upper = 1)$value / (1 - alpha) confint(fit5) # con il pacchetto cvar # install.packages("cvar") library(help = "cvar") ?cvar::VaR -cvar::VaR(dist = "qinvgamma", x = alpha, shape = sh, scale = sc) cvar::ES(dist = function(z)-qinvgamma(p = 1 - z, shape = sh, scale = sc), x = 1 - alpha) # uso del bootstrap parametrico per ricavare l'intervalli di confidenza per VaR e TVaR nel caso della gamma inversa # dimensione bootstrap m <- 1000 VaR.par.bootstrap <- ES.par.bootstrap <- vector(mode = "numeric", length = m) set.seed(0) system.time(for (i in 1 : m) { # genera campione di bootstrap dalla distribuzione fittata con stessa numerosita' temp <- rinvgamma(n = n, shape = sh, scale = sc) fit.temp <- fitdist(data = temp, distr = "invgamma", method = "mle") sh.temp <- fit.temp$estimate[["shape"]] sc.temp <- fit.temp$estimate[["scale"]] VaR.par.bootstrap[i] <- qinvgamma(p = alpha, shape = sh.temp, scale = sc.temp) ES.par.bootstrap[i] <- integrate(f = function(beta)qinvgamma(p = beta, shape = sh.temp, scale = sc.temp), lower = alpha, upper = 1)$value / (1 - alpha) # oppure # calcola VaR dal campione di bootstrap via HS # VaR.par.bootstrap[i] <- VAR.HS(x = temp, alpha = alpha) # calcola ES dal campione di bootstrap via HS # ES.par.bootstrap[i] <- ES.HS(x = temp, alpha = alpha) }) # stima di bootstrap di VaR e ES (VaR.par.bootstrap.c <- mean(x = VaR.par.bootstrap)) (ES.par.bootstrap.c <- mean(x = ES.par.bootstrap)) # 95% confidence intervals (VaR.par.bootstrap.ci <- quantile(x = VaR.par.bootstrap, probs = c(0.025, 0.975), type = 1)) (ES.par.bootstrap.ci <- quantile(x = ES.par.bootstrap, probs = c(0.025, 0.975), type = 1)) # distribuzione dei VaR e TVaR: istogramma hist(x = VaR.par.bootstrap, breaks = 50, freq = FALSE) abline(v = VaR.par.bootstrap.c, col = "red") abline(v = VaR.par.bootstrap.ci, col = "red", lty = 2) hist(x = ES.par.bootstrap, breaks = 50, freq = FALSE) abline(v = ES.par.bootstrap.c, col = "red") abline(v = ES.par.bootstrap.ci, col = "red", lty = 2) ############################################# # use extreme value theory (POT) to fit the # # tail and calculate VaR and ES # ############################################# # install and load the evd library # install.packages("evd") library(evd) # plot mean excess function # whole range mrlplot(data = claims) mrlplot(data = claims, tlim = c(0, 3.5)) # 0 < u < 3.5 mrlplot(claims, tlim = c(1.4, 3)) # 2 < u < 3.5 # plot estimates of modified scale and extremal index for different thresholds # if Y = GPD(scale, shape) then (Y - u | Y > u) = GPD(scale + shape * threshold, shape) # therefore, if one reparametrizes the GPD using # modified scale = scale - shape * threshold # the above property becomes # if Y = GPD(modified scale, shape) then (Y - u| Y > u) = GPD(modified scale, shape) # strategy: estimate a GPD on excesses above a range of thresholds and choose the threshold such that parameters are (approximately) constant from there par(mfrow = c(1, 2)) tcplot(claims, tlim = c(0, 3)) tcplot(claims, tlim = c(1, 3)) tcplot(claims, tlim = c(2, 3)) tcplot(claims, tlim = c(1.4, 3)) par(mfrow = c(1, 1)) # set the threshold u <- 1.4 # number of excesses sum(claims > u) / n # one more time tcplot(claims, tlim = c(u, 2)) tcplot(claims, tlim = c(u, 3)) # fit a gpd model to the dataset using ML and threshold = u ?fpot fit.evd <- fpot(x = claims, threshold = u) # explore the result summary(fit.evd) fit.evd # parameters estimates fit.evd$estimate # standard errors fit.evd$std.err # deviance = - 2 * loglikelihood fit.evd$deviance # variance covariance matrix of estimators (normal approximation) fit.evd$var.cov # confidence intervals confint(object = fit.evd, level = 0.99) # goodness of fit par(mfrow = c(1, 1)) # plot one at a time plot(fit.evd, 1) # PP plot plot(fit.evd, 2, xlim = c(2, 15), ylim = c(0, 40)) # QQ plot plot(fit.evd, 2, xlim = c(2, 10), ylim = c(0, 20)) # QQ plot, change range plot(fit.evd, 2, xlim = c(4, 14)) # QQ plot, change range plot(fit.evd, 3) # kernel density vs model plot plot(fit.evd, 3, xlim = c(1, 10)) # change range plot(fit.evd, 4, ylim = c(0, 40)) # return level plot # how to use the estimated model: compute P(Y>x) where Y = (X - u|X > u) sigma <- as.numeric(fit.evd$estimate["scale"]) # scale parameter xi <- as.numeric(fit.evd$estimate["shape"]) # extremal index # survival of GPD with given parameters pgpd(q = 5, scale = sigma, shape = xi, lower.tail = FALSE) # proportion of excesses Su <- fit.evd$pat # also # SX(u) # sum(claims > u) / length(claims) # VaR VaR.evt <- u + (sigma / xi) * (((1 - alpha)/Su) ^ (-xi) -1) # ES ES.evt <- VaR.evt + (sigma + xi * (VaR.evt - u)) / (1 - xi)