# cambiare directory
setwd("inserire directory qui")

# caricare il data set da un file csv
# file "ripulito" per semplicita'
# "header = TRUE" cosi' R sa che la prima riga contiene i nomi delle variabili
data <- read.csv("perdite.csv", header = TRUE)

# cosa c'e' in "data"?
str(data)
head(data)

# estraiamo i sinistri (eg, in milioni di euro)
claims <- data[, 2]
n <- length(claims)

# statistiche di base: min, 25% percentile, mediana, 75% percentile, max
fivenum(claims)

# media, sd, asimmetria, curtosi
# install.packages(moments)
library(moments)
c(mean(claims), sd(claims), skewness(claims), kurtosis(claims))


# istogrammi e densita' (nonparametrica)
hist(x = claims, breaks = 50, freq = FALSE)
lines(x = density(claims), col = "red")

# fdr empirica e funzione di sopravvivenza
FX <- ecdf(x = claims)
SX <- function(x)1 - FX(x)
curve(FX, from = 0, to = 15)
curve(SX, from = 0, to = 15)

#####################################################
# metodo storico per il calcolo del VaR e TVaR (ES) #
#####################################################

# funzione "quantile"; l'opzione "type" permette di scegliere il tipo di quantile
# "type = 1" per il quantile sinistro
p <- 0.95
VaR.HS <- quantile(claims, probs = p, type = 1)

# oppure
s.claims <- sort(claims)
s.claims[p * length(claims)]

# TVaR
TVaR.HS <- mean(claims[claims >= VaR.HS])
TVaR1.HS <- mean(claims[claims > VaR.HS])

# intervalli di confidenza per VaR e TVaR? bootstrap nonparametrico
# dimensione bootstrap
m <- 10000

# prepara vettori
VaR.bootstrap <- TVaR.bootstrap <- vector(mode = "numeric", length = m)

# ciclo di bootstrap
set.seed(0)
for (i in 1 : m)
{
  # genera campione di bootstrap dalle perdite simulando con rimpiazzo
  temp <- sample(x = claims, replace = TRUE, size = length(claims))
  # calcola VaR dal campione di bootstrap
  VaR.bootstrap[i] <- quantile(temp, probs = p, type = 1)
  # calcola TVaR dal campione di bootstrap
  TVaR.bootstrap[i] <- mean(temp[temp >= VaR.bootstrap[i]])
}

# stima di bootstrap di VaR e TVaR
VaR.bootstrap.c <- mean(x = VaR.bootstrap)
TVaR.bootstrap.c <- mean(x = TVaR.bootstrap)

# 95% confidence intervals
VaR.bootstrap.ci <- quantile(x = VaR.bootstrap, probs = c(0.025, 0.975), type = 1)
TVaR.bootstrap.ci <- quantile(x = TVaR.bootstrap, probs = c(0.025, 0.975), type = 1)

# distribuzione dei VaR e TVaR: istogramma
hist(x = VaR.bootstrap, breaks = 50, freq = FALSE)
abline(v = VaR.bootstrap.c, col = "red")
abline(v = VaR.bootstrap.ci, col = "red", lty = 2)

hist(x = TVaR.bootstrap, breaks = 50, freq = FALSE)
abline(v = TVaR.bootstrap.c, col = "red")
abline(v = TVaR.bootstrap.ci, col = "red", lty = 2)

# alternativa: pacchetto "boot"
install.packages(boot)
library(boot)
VaR_TVaR <- function(dati, indici = 1 : length(data), type = 1, p = 0.95)
{
  selezione <- dati[indici]
  VaR <- quantile(selezione, p = p, type = type)
  TVaR <- mean(selezione[selezione > VaR])
  return(c(VaR, TVaR))
}
VaR_TVaR_bootstrap <- boot(data = claims, statistic = VaR_TVaR, R = m, sim = "ordinary", p = p, type = 1)
boot.ci(VaR_TVaR_bootstrap, conf = 0.95)

###################################################
# metodo parametrico per il calcolo di VaR e TVaR #
# fitting di una distribuzione                    #
###################################################

install.packages(c("fitdistrplus", "actuar"))
library(fitdistrplus)
library(actuar)

plotdist(data = claims)

# fitdistr permette il fit di distribuzioni per cui e' definita la densita', funzione di ripartizione e la sua inversa
# predefinite: eg dgamma, pgamma, qgamma, (rgamma)
# altre: "norm", "lnorm", "pois", "exp", "gamma", "nbinom", "geom", "beta", "unif", "logis"
# si possono usare altre distribuzioni purche' siano definite le corrispondenti funzioni e bisogna inserire i valori di partenza dell'algoritmo in "start"

# method = "mle": massima verosimiglianza, "mme": metodo dei momenti, "qme": metodo dei quantili

# gamma
fit1 <- fitdist(data = claims, distr = "gamma", method = "mle")

# lognormale
fit2 <- fitdist(data = claims, distr = "lnorm", method = "mle")

# weibull
fit3 <- fitdist(data = claims, distr = "weibull", method = "mle")

# pareto (need package "actuar")
fit4 <- fitdist(data = claims, distr = "pareto", method = "mle")

# gamma inversa (dal pacchetto "actuar")
fit5 <- fitdist(data = claims, distr = "invgamma", method = "mle")

plot(fit1)
plot(fit2)
plot(fit3)
plot(fit4)
plot(fit5)


# analisi e confronto
modelli <- list(fit1, fit2, fit3, fit4, fit5)
leg <- c("gamma", "lognormal", "weibull", "pareto", "inverse gamma")
par(mfrow = c(1, 1))

# confronto delle densita' e istogramma
denscomp(ft = modelli, legendtext = leg, breaks = 50, plotstyle = "ggplot")
denscomp(ft = modelli, legendtext = leg, breaks = 50, plotstyle = "ggplot", xlim = c(0, 5))
denscomp(ft = modelli, legendtext = leg, breaks = 50, plotstyle = "ggplot", xlim = c(5, 10), ylim = c(0, 0.025))

# confronto delle fdr
cdfcomp(ft = modelli, legendtext = leg)
cdfcomp(ft = modelli, legendtext = leg, xlim = c(0, 5))

# qq e pp plot
qqcomp(ft = modelli, legendtext = leg, line01lty = 2)
ppcomp(ft = modelli, legendtext = leg, line01lty = 2)

gofstat(f = modelli)

# calcolo di VaR e TVaR = ES
VaR.par.gamma <- qgamma(p = p, shape = fit1$estimate[["shape"]], rate = fit1$estimate[["rate"]])

VaR.par.lnorm <- qlnorm(p = p, meanlog = fit2$estimate[["meanlog"]], sdlog = fit2$estimate[["sdlog"]])

VaR.par.weibull <- qweibull(p = p, shape = fit3$estimate[["shape"]], scale = fit3$estimate[["scale"]])

VaR.par.pareto <- qpareto(p = p, shape = fit4$estimate[["shape"]], scale = fit4$estimate[["scale"]])

VaR.par.Igamma <- qinvgamma(p = p, shape = fit5$estimate[["shape"]], scale = fit5$estimate[["scale"]])

# calcolo del TVaR analitico via integrazione numerica
TVaR.par.Igamma <- integrate(f = function(z)qinvgamma(p = z, shape = fit5$estimate[["shape"]], scale = fit5$estimate[["scale"]]), lower = p, upper = 1)$value / (1 - p)


# uso del bootstrap parametrico per ricavare l'intervalli di confidenza per VaR e TVaR nel caso della gamma inversa 
# dimensione bootstrap
m <- 10000

VaR.par.bootstrap <- TVaR.par.bootstrap <- vector(mode = "numeric", length = m)

set.seed(0)
for (i in 1 : m)
{
  # genera campione di bootstrap dalla distribuzione fittata
  temp <- rinvgamma(n = length(claims), shape = fit5$estimate[["shape"]], scale = fit5$estimate[["scale"]])
  # calcola VaR dal campione di bootstrap via HS
  VaR.par.bootstrap[i] <- quantile(temp, probs = p, type = 1)
  # calcola TVaR dal campione di bootstrap via HS
  TVaR.par.bootstrap[i] <- mean(temp[temp >= VaR.par.bootstrap[i]])
}
# alternativa: generare campioni dalle perdite con rimpiazzo, rifittare la distribuzione gamma inversa e calcolare VaR e TVaR analiticamente

# stima di bootstrap di VaR e TVaR
VaR.par.bootstrap.c <- mean(x = VaR.par.bootstrap)
TVaR.par.bootstrap.c <- mean(x = TVaR.par.bootstrap)

# 95% confidence intervals
VaR.par.bootstrap.ci <- quantile(x = VaR.par.bootstrap, probs = c(0.025, 0.975), type = 1)
TVaR.par.bootstrap.ci <- quantile(x = TVaR.par.bootstrap, probs = c(0.025, 0.975), type = 1)

# distribuzione dei VaR e TVaR: istogramma
hist(x = VaR.par.bootstrap, breaks = 50, freq = FALSE)
abline(v = VaR.par.bootstrap.c, col = "red")
abline(v = VaR.par.bootstrap.ci, col = "red", lty = 2)

hist(x = TVaR.par.bootstrap, breaks = 50, freq = FALSE)
hist(x = TVaR.par.bootstrap, breaks = 50, freq = FALSE, xlim = c(2, 20))
abline(v = TVaR.par.bootstrap.c, col = "red")
abline(v = TVaR.par.bootstrap.ci, col = "red", lty = 2)



#############################################
# use extreme value theory (POT) to fit the #
# tail and calculate VaR and ES             #
#############################################


# install and load the evd library
# install.packages("evd")
library(evd)


# plot mean excess function
mrlplot(claims) # whole range
mrlplot(claims, tlim = c(0, 3.5)) # 0 < u < 3.5
mrlplot(claims, tlim = c(2, 3.5)) # 2 < u < 3.5

# plot estimates of modified scale and extremal index for different thresholds
# if Y = GPD(scale, shape) then (Y - u | Y > u) = GPD(scale + shape * threshold, shape)
# therefore, if one reparametrizes the GPD using
# modified scale = scale - shape * threshold
# the above property becomes
# if Y = GPD(modified scale, shape) then (Y - u| Y > u) = GPD(modified scale, shape)
# strategy: estimate a GPD on excesses above a range of thresholds and choose the threshold such that parameters are (approximately) constant from there
par(mfrow = c(1, 2))
tcplot(claims, tlim = c(0, 4))
tcplot(claims, tlim = c(1, 3))
tcplot(claims, tlim = c(2, 3))
tcplot(claims, tlim = c(1, 2))
par(mfrow = c(1, 1))


# set the threshold
u <- 1.4

# number of excesses
sum(claims > u)

# one more time
tcplot(claims, tlim = c(u, 2))
tcplot(claims, tlim = c(u, 3))

# fit a gpd model to the dataset using ML and threshold = u
fit.evd <- fpot(x = claims, threshold = u)
?fpot

# explore the result
summary(fit.evd)
fit.evd

# parameters estimates
fit.evd$estimate

# standard errors
fit.evd$std.err

# deviance = - 2 * loglikelihood
fit.evd$deviance

# variance covariance matrix of estimators (normal approximation)
fit.evd$var.cov

# confidence intervals
confint(object = fit.evd, level = 0.99)

# goodness of fit
par(mfrow = c(1, 1)) # plot one at a time
plot(fit.evd, 1) # PP plot

plot(fit.evd, 2, xlim = c(2, 15), ylim = c(0, 40)) # QQ plot
plot(fit.evd, 2, xlim = c(2, 10), ylim = c(0, 20)) # QQ plot, change range
plot(fit.evd, 2, xlim = c(4, 14)) # QQ plot, change range

plot(fit.evd, 3) # kernel density vs model plot
plot(fit.evd, 3, xlim = c(1, 10)) # change range

plot(fit.evd, 4, ylim = c(0, 40)) # return level plot

# how to use the estimated model: compute P(Y>x) where Y = (X - u|X > u)
sigma <- as.numeric(fit.evd$estimate["scale"]) # scale parameter
xi <- as.numeric(fit.evd$estimate["shape"]) # extremal index

# survival of GPD with given parameters
pgpd(q = 5, scale = sigma, shape = xi, lower.tail = FALSE)

# proportion of excesses
Su <- fit.evd$pat
# also
# SX(u)
# sum(claims > u) / length(claims)

# VaR

VaR.evd <- u + evd::qgpd(p = 1 - (1 - p) / Su, scale = sigma, shape = xi)

# ES

mef <- function(u, scale, shape)(scale + shape * u) / (1 - shape)
ES.evd <- VaR.evd + mef(VaR.evd - u, scale = sigma, shape = xi)