# cambiare directory
# setwd("inserire directory qui")

# caricare il data set da un file csv
# file "ripulito" per semplicita'
# "header = TRUE" cosi' R sa che la prima riga contiene i nomi delle variabili
data <- read.csv("perdite.csv", header = TRUE)

# cosa c'e' in "data"?
str(data)
head(data)
tail(data)

# estraiamo i sinistri (eg, in milioni di euro)
claims <- data[, 2]
n <- length(claims)

# statistiche di base: min, 25% percentile, mediana, 75% percentile, max
fivenum(claims)
# summary: come fivenum, piu' la media
summary(claims)

# media, sd, asimmetria, curtosi
install.packages("moments")
library(moments)
c(mean(claims), sd(claims), skewness(claims), kurtosis(claims))


# istogrammi e densita' (nonparametrica)
hist(x = claims, breaks = 50, freq = FALSE, ylim = c(0, 1))
lines(x = density(claims), col = "red")

# fdr empirica e funzione di sopravvivenza
FX <- ecdf(x = claims)
SX <- function(x)1 - FX(x)

curve(FX, from = 0, to = 15)
curve(SX, from = 0, to = 15)
# anche, piu' semplice
plot(FX)
plot(SX)
plot(SX, 0, 5)

#####################################################
# metodo storico per il calcolo del VaR e ES (TVaR) #
#####################################################

# funzione "quantile"; l'opzione "type" permette di scegliere il tipo di quantile
# "type = 1" per il quantile sinistro
# "type = 4" per la versione continua vista in classe
VaR.HS <- function(x, alpha)quantile(x = x, probs = alpha, type = 1)

alpha <- 0.95
(VaR <- VaR.HS(x = claims, alpha = alpha))

# quantile e' vettorizzato rispetto a p
VaR.HS(x = claims, alpha = c(0.9, 0.95, 0.99, 0.995))

# oppure direttamente
# statistica d'ordine
s.claims <- sort(claims)
s.claims[ceiling(alpha * n)]

# plot
p <- seq(0.8, 1, by = 0.01)
plot(p, VaR.HS(x = claims, alpha = p), type = "b", ylim = range(claims))

# ES
ES.HS <- function(x, alpha){
  VaR <- VaR.HS(x = x, alpha = alpha)
  n <- length(x)
  res <- (sum(x[x > VaR]) + (n * alpha - floor(n * alpha)) * VaR) / (n * (1 - alpha))
  ifelse(alpha == 1, max(x), res)
  }

ES.HS(x = claims, alpha = alpha)

# ES non e' vettorizzato rispetto alpha!
ES.HS(x = claims, alpha = c(0.9, 0.95, 0.99, 0.995)) # sbagliato!
# corretto, con sapply
sapply(X = c(0.9, 0.95, 0.99, 0.995), FUN = ES.HS, x = claims)

lines(p, sapply(X = p, FUN = ES.HS, x = claims), col = "red")

# usando la definizione
# integrate(f = function(s)quantile(x = claims, probs = s, type = 1), lower = p, upper = 1, subdivisions = 1000)$value / (1 - p) # non funziona
install.packages("cubature")
library(cubature)
hcubature(f = function(beta)quantile(x = claims, probs = beta, type = 1), lowerLimit = alpha, upperLimit = 1, tol = 1E-10, maxEval = 10000)$integral / (1 - alpha)

# TVaR
(TVaR_l.HS <- mean(claims[claims >= VaR]))
(TVaR_u.HS <- mean(claims[claims > VaR]))
# quanti elementi nella media?
sum(claims > VaR)

# intervalli di confidenza per VaR e TVaR? bootstrap nonparametrico
# dimensione bootstrap
m <- 10000

# prepara vettori
VaR.bootstrap <- ES.bootstrap <- vector(mode = "numeric", length = m)

# ciclo di bootstrap
set.seed(0)
for(i in 1:m){
  temp <- sample(x = claims, size = n, replace = TRUE)
  VaR.bootstrap[i] <- VaR.HS(x = temp, alpha = alpha)
  ES.bootstrap[i] <- ES.HS(x = temp, alpha = alpha)
  }

# stima di bootstrap di VaR e ES
(VaR.bootstrap.c <- mean(x = VaR.bootstrap))
(ES.bootstrap.c <- mean(x = ES.bootstrap))

# 95% confidence intervals
(VaR.bootstrap.ci <- quantile(x = VaR.bootstrap, probs = c(0.025, 0.975), type = 1))
(ES.bootstrap.ci <- quantile(x = ES.bootstrap, probs = c(0.025, 0.975), type = 1))

# distribuzione dei VaR e ES: istogramma
hist(x = VaR.bootstrap, breaks = 50, freq = FALSE)
abline(v = VaR.bootstrap.c, col = "red")
abline(v = VaR.bootstrap.ci, col = "red", lty = 2)

hist(x = ES.bootstrap, breaks = 50, freq = FALSE)
abline(v = ES.bootstrap.c, col = "red")
abline(v = ES.bootstrap.ci, col = "red", lty = 2)

# alternativa: pacchetto "boot"
# install.packages("boot")
library(boot)

VaR_ES <- function(dati, indici = 1:length(dati), alpha = 0.95){
  selezione <- dati[indici]
  VaR <- VaR.HS(x = selezione, alpha = alpha)
  ES <- ES.HS(x = selezione, alpha = alpha)
  return(c(VaR, ES))
}

VaR_ES_bootstrap <- boot(data = claims, statistic = VaR_ES, R = m, sim = "ordinary", alpha = alpha)
boot.ci(boot.out = VaR_ES_bootstrap, conf = 0.99, type = "perc", index = 1)
boot.ci(boot.out = VaR_ES_bootstrap, conf = 0.99, type = "perc", index = 2)



#################################################
# metodo parametrico per il calcolo di VaR e ES #
# fitting di una distribuzione                  #
#################################################

install.packages(c("fitdistrplus", "actuar"))
library(fitdistrplus)
library(actuar)

plotdist(data = claims)

# fitdistrplus permette il fit di distribuzioni per cui e' definita la densita', funzione di ripartizione e la sua inversa
# predefinite: eg dgamma, pgamma, qgamma, (rgamma)
# altre: "norm", "lnorm", "pois", "exp", "gamma", "nbinom", "geom", "beta", "unif", "logis"
# si possono usare altre distribuzioni purche' siano definite le corrispondenti funzioni e bisogna inserire i valori di partenza dell'algoritmo in "start"

# actuar include molte distribuzioni aggiuntive per la perdita spesso usate in GI
# Burr, GeneralizedBeta, GeneralizedPareto,Gumbel, InverseBurr, InverseExponential, InverseGamma, InverseGaussian, InverseParalogistic, InversePareto, InverseTransformedGamma, InverseWeibull, Logarithmic, Loggamma, Loglogistic, Paralogistic, Pareto, PoissonInverseGaussian, SingleParameterPareto, TransformedBeta, TransformedGamma, ZeroModifiedBinomia, ZeroModifiedGeometric, ZeroModifiedLogarithmic, ZeroModifiedNegativeBinomial, ZeroModifiedPoisson, ZeroTruncatedBinomial, ZeroTruncatedGeometric, ZeroTruncatedNegativeBinomial, ZeroTruncatedPoisson,


# method = "mle": massima verosimiglianza, "mme": metodo dei momenti, "qme": metodo dei quantili

# gamma
fit1 <- fitdist(data = claims, distr = "gamma", method = "mle")


# lognormale
fit2 <- fitdist(data = claims, distr = "lnorm", method = "mle")

# weibull
fit3 <- fitdist(data = claims, distr = "weibull", method = "mle")

# pareto (dal pacchetto "actuar")
fit4 <- fitdist(data = claims, distr = "pareto", method = "mle")

# gamma inversa (dal pacchetto "actuar")
fit5 <- fitdist(data = claims, distr = "invgamma", method = "mle")

plot(fit1)
plot(fit2)
plot(fit3)
plot(fit4)
plot(fit5)

# analisi e confronto
modelli <- list(fit1, fit2, fit3, fit4, fit5)
leg <- c("gamma", "lognormale", "weibull", "pareto", "gamma inversa")


par(mfrow = c(1, 1))
# confronto delle densita' e istogramma

denscomp(ft = modelli, legendtext = leg, breaks = 50, plotstyle = "ggplot")
denscomp(ft = modelli, legendtext = leg, breaks = 50, plotstyle = "ggplot", xlim = c(0, 5))
denscomp(ft = modelli, legendtext = leg, breaks = 50, plotstyle = "ggplot", xlim = c(5, 10), ylim = c(0, 0.025))

# confronto delle fdr
cdfcomp(ft = modelli, legendtext = leg)
cdfcomp(ft = modelli, legendtext = leg, xlim = c(0, 5))
cdfcomp(ft = modelli, legendtext = leg, xlim = c(0.1, 10), xlogscale = TRUE)

# qq e pp plot
qqcomp(ft = modelli, legendtext = leg, line01lty = 1)
ppcomp(ft = modelli, legendtext = leg, line01lty = 1)

# goodness-of-fit statistiche
gofstat(f = modelli)

# scegliamo la gamma inversa

# calcolo di VaR e ES=TVaR
# (VaR.par.gamma <- qgamma(p = alpha, shape = fit1$estimate[["shape"]], rate = fit1$estimate[["rate"]]))

# (VaR.par.lnorm <- qlnorm(p = alpha, meanlog = fit2$estimate[["meanlog"]], sdlog = fit2$estimate[["sdlog"]]))

# (VaR.par.weibull <- qweibull(p = alpha, shape = fit3$estimate[["shape"]], scale = fit3$estimate[["scale"]]))

# (VaR.par.pareto <- qpareto(p = alpha, shape = fit4$estimate[["shape"]], scale = fit4$estimate[["scale"]]))

?dinvgamma
sh <-  fit5$estimate[["shape"]]
sc <- fit5$estimate[["scale"]]
VaR.par.invgamma <- qinvgamma(p = alpha, shape = sh, scale = sc)

# calcolo del ES analitico via integrazione numerica
ES.par.invgamma <- integrate(f = function(beta)qinvgamma(p = beta, shape = sh, scale = sc), lower = alpha, upper = 1)$value / (1 - alpha)

confint(fit5)

# con il pacchetto cvar
# install.packages("cvar")
library(help = "cvar")
?cvar::VaR
-cvar::VaR(dist = "qinvgamma", x = alpha, shape = sh, scale = sc)

cvar::ES(dist = function(z)-qinvgamma(p = 1 - z, shape = sh, scale = sc), x = 1 - alpha)

# uso del bootstrap parametrico per ricavare l'intervalli di confidenza per VaR e TVaR nel caso della gamma inversa
# dimensione bootstrap
m <- 1000

VaR.par.bootstrap <- ES.par.bootstrap <- vector(mode = "numeric", length = m)

set.seed(0)
system.time(for (i in 1 : m)
{
  # genera campione di bootstrap dalla distribuzione fittata con stessa numerosita'
  temp <- rinvgamma(n = n, shape = sh, scale = sc)

  fit.temp <- fitdist(data = temp, distr = "invgamma", method = "mle")

  sh.temp <- fit.temp$estimate[["shape"]]
  sc.temp <- fit.temp$estimate[["scale"]]

  VaR.par.bootstrap[i] <- qinvgamma(p = alpha, shape = sh.temp, scale = sc.temp)

  ES.par.bootstrap[i] <- integrate(f = function(beta)qinvgamma(p = beta, shape = sh.temp, scale = sc.temp), lower = alpha, upper = 1)$value / (1 - alpha)

  # oppure
  # calcola VaR dal campione di bootstrap via HS
  # VaR.par.bootstrap[i] <- VAR.HS(x = temp, alpha = alpha)
  # calcola ES dal campione di bootstrap via HS
  # ES.par.bootstrap[i] <- ES.HS(x = temp, alpha = alpha)
})

# stima di bootstrap di VaR e ES
(VaR.par.bootstrap.c <- mean(x = VaR.par.bootstrap))
(ES.par.bootstrap.c <- mean(x = ES.par.bootstrap))

# 95% confidence intervals
(VaR.par.bootstrap.ci <- quantile(x = VaR.par.bootstrap, probs = c(0.025, 0.975), type = 1))
(ES.par.bootstrap.ci <- quantile(x = ES.par.bootstrap, probs = c(0.025, 0.975), type = 1))

# distribuzione dei VaR e TVaR: istogramma
hist(x = VaR.par.bootstrap, breaks = 50, freq = FALSE)
abline(v = VaR.par.bootstrap.c, col = "red")
abline(v = VaR.par.bootstrap.ci, col = "red", lty = 2)

hist(x = ES.par.bootstrap, breaks = 50, freq = FALSE)
abline(v = ES.par.bootstrap.c, col = "red")
abline(v = ES.par.bootstrap.ci, col = "red", lty = 2)

#############################################
# use extreme value theory (POT) to fit the #
# tail and calculate VaR and ES             #
#############################################


# install and load the evd library
# install.packages("evd")
library(evd)

# plot mean excess function
# whole range
mrlplot(data = claims)
mrlplot(data = claims, tlim = c(0, 3.5)) # 0 < u < 3.5
mrlplot(claims, tlim = c(1.4, 3)) # 2 < u < 3.5


# plot estimates of modified scale and extremal index for different thresholds
# if Y = GPD(scale, shape) then (Y - u | Y > u) = GPD(scale + shape * threshold, shape)
# therefore, if one reparametrizes the GPD using
# modified scale = scale - shape * threshold
# the above property becomes
# if Y = GPD(modified scale, shape) then (Y - u| Y > u) = GPD(modified scale, shape)
# strategy: estimate a GPD on excesses above a range of thresholds and choose the threshold such that parameters are (approximately) constant from there
par(mfrow = c(1, 2))

tcplot(claims, tlim = c(0, 3))
tcplot(claims, tlim = c(1, 3))
tcplot(claims, tlim = c(2, 3))
tcplot(claims, tlim = c(1.4, 3))
par(mfrow = c(1, 1))


# set the threshold
u <- 1.4

# number of excesses
sum(claims > u) / n

# one more time
tcplot(claims, tlim = c(u, 2))
tcplot(claims, tlim = c(u, 3))

# fit a gpd model to the dataset using ML and threshold = u

?fpot
fit.evd <- fpot(x = claims, threshold = u)


# explore the result
summary(fit.evd)
fit.evd

# parameters estimates
fit.evd$estimate

# standard errors
fit.evd$std.err

# deviance = - 2 * loglikelihood
fit.evd$deviance

# variance covariance matrix of estimators (normal approximation)
fit.evd$var.cov

# confidence intervals
confint(object = fit.evd, level = 0.99)

# goodness of fit
par(mfrow = c(1, 1)) # plot one at a time
plot(fit.evd, 1) # PP plot

plot(fit.evd, 2, xlim = c(2, 15), ylim = c(0, 40)) # QQ plot
plot(fit.evd, 2, xlim = c(2, 10), ylim = c(0, 20)) # QQ plot, change range
plot(fit.evd, 2, xlim = c(4, 14)) # QQ plot, change range

plot(fit.evd, 3) # kernel density vs model plot
plot(fit.evd, 3, xlim = c(1, 10)) # change range

plot(fit.evd, 4, ylim = c(0, 40)) # return level plot

# how to use the estimated model: compute P(Y>x) where Y = (X - u|X > u)
sigma <- as.numeric(fit.evd$estimate["scale"]) # scale parameter
xi <- as.numeric(fit.evd$estimate["shape"]) # extremal index

# survival of GPD with given parameters
pgpd(q = 5, scale = sigma, shape = xi, lower.tail = FALSE)

# proportion of excesses
Su <- fit.evd$pat
# also
# SX(u)
# sum(claims > u) / length(claims)

# VaR

VaR.evt <- u + (sigma / xi) * (((1 - alpha)/Su) ^ (-xi) -1)

# ES

ES.evt <- VaR.evt + (sigma + xi * (VaR.evt - u)) / (1 - xi)