# R lab copulas (part 2)
library(copula)
library(mnormt)
library(lattice)
library(mvtnorm)
library(VGAM)

data(loss)
View(loss)
# ?loss
LOSS<-loss$loss
ALAE<-loss$alae
plot(log(LOSS), log(ALAE))
plot(LOSS,ALAE)

#PIT U1=(n/(n+1))*F_n(LOSS),
# U2=(n/(n+1))*F_n(ALAE) 
#pobs: scaled ranks
# nx2 dimensional dataset (LOSS, ALAE)
Z<-pobs(cbind(LOSS, ALAE))
plot(Z, main="pseudo-copula observations")

par(mfrow=c(1,2))
hist(LOSS, breaks = 30)
hist(ALAE, breaks = 30)

# 1. Fit marginal Pareto distribution 
layout(1:1)
library(VGAM)
# no covariates
# Pareto II distr.: loc=0; scale and shape in log scale   
fit.ALAE<-vglm(ALAE~1, paretoII(location=0, 
                  lscale="loglink",               
                  lshape="loglink"))

coef(fit.ALAE, matrix=TRUE)

theta.ALAE<-Coef(fit.ALAE)[1]
alpha.ALAE<-Coef(fit.ALAE)[2]
theta.ALAE
alpha.ALAE

# PIT using the fitted Pareto distribution 
#U=F(ALAE)
# F(x)=1-(1+x/theta)^{-alpha}
u1=1-(1+(ALAE/theta.ALAE))^(-alpha.ALAE)
hist(u1)

## loss
fit.LOSS<-vglm(LOSS~1, paretoII(location=0, 
                                lscale="loglink",               
                                lshape="loglink"))

coef(fit.LOSS, matrix=TRUE)
theta.LOSS<-Coef(fit.LOSS)[1]
alpha.LOSS<-Coef(fit.LOSS)[2]
theta.LOSS
alpha.LOSS

# PIT using the fitted Pareto distribution 
# U=F(LOSS)
# F(x)=1-(1+x/theta)^{-alpha}
u2=1-(1+(LOSS/theta.LOSS))^(-alpha.LOSS)
length(u2)
hist(u2)
qqplot(qunif(ppoints(length(u2))), sort(u2))
abline(0,1)

# studying dependence between LOSS and ALAE
# testing independence
C.data<-cbind(LOSS, ALAE)
dim(C.data)
L<-dim(C.data)[1]

empsamp<-indepTestSim(L, p=2, N=500)
indepTest(C.data, empsamp)
# scatter plot of (u1,u2)
plot(u1, u2, xlab=quote(U[1]), ylab=quote(U[2]))

# PIT observations (using Pareto margins)
U.<-cbind(u1, u2)
cor(U., method = "kendall")
# IFM: fitted margins and U. as input 
cop_name<-c("clayton", "frank", 
            "gumbel", "normal", "t", "joe")
cop.fam<-c(claytonCopula(dim=2), 
           frankCopula(dim=2), 
           gumbelCopula(dim=2),
           normalCopula(dim=2),
           tCopula(dim=2, df.fixed = FALSE), 
           joeCopula(dim=2)) 
fit.copula.l<-list()
 
for(i in 1:length(cop.fam)){
  fit.copula.l[[i]]<-fitCopula(cop.fam[[i]], U., 
                               method="ml")
} 

fit.estimate<-vector()
fit.loglink<-vector()
aic.result<-vector()
for(i in 1:length(cop.fam)){
  fit.estimate[i]<-coef(fit.copula.l[[i]])[1]
  fit.loglink[i]<-fit.copula.l[[i]]@loglik
  aic.result[i]<-AIC(fit.copula.l[[i]])
}

fit.estimate
fit.loglink
aic.result

# Gumbel copula 
fit.gc<-fitCopula(gumbelCopula(dim=2), data=U., 
                  method="ml")
summary(fit.gc)
gc<-fit.gc@copula
param<-fit.gc@estimate

# GOF test
set.seed(270)
N<-1000
gofCopula(gc, x=U., N=N, simulation="mult", ties.method="max")

par(mfrow=c(1,2))
par(mar=c(3.2, 3, 0.2, 0.2))
persp(gc, pCopula, 
      theta=50, 
      cex.axis=0.5, col=2)
persp(gc, dCopula, 
      theta=0.6, 
      cex.axis=0.5, col=3)

## graphical comparison of the selected 
# copula and the empirical data

#dev.off()
layout(1:1)
n<-nrow(U.)

plot(U.)
points(rCopula(n, gumbelCopula(param = param, 
                               dim=2)), col=2)

Udex<- (1:n)/(n+1)
Cn<- C.n(u=cbind(rep(Udex, n), rep(Udex, 
         each=n)), 
         X=U.)
EmpCop<- expression(contour(Udex, Udex, 
                      matrix(Cn, n, n), 
                      col=2, add=T))
contour(gumbelCopula(param=param, dim=2), 
        pCopula, main=expression(hat(C)), 
        xlab=expression(hat(U)[1]), 
        ylab=expression(hat(U)[2]))
eval(EmpCop)

### VaR and ES 

## F1^{-1}(u1), F2^{-1}(u2)
simLOSS<-theta.LOSS*((1-U.[,1])^(-1/alpha.LOSS)-1) 
simALAE<-theta.ALAE*((1-U.[,2])^(-1/alpha.ALAE)-1)
TOTLOSS<-simLOSS+simALAE
## VaR (95%)
quantile(TOTLOSS, 0.95)
## ES (95%)
mean(TOTLOSS[TOTLOSS>quantile(TOTLOSS, 0.95)])