---
title: "Introduction to copulas"
output:
  pdf_document: default
  html_document:
    df_print: paged
editor_options:
  chunk_output_type: console
---


```{r, }
library(copula)
library(mnormt)               
library(lattice)               
library(mvtnorm)
```

Simulation from copula models

```{r, fig.width=4, fig.height=4}
# sampling from independence copula 
ic<-indepCopula(dim=2)
wireframe2(ic, FUN=pCopula)
contourplot2(ic, FUN=pCopula)

set.seed(332)
n<-1000
U<-rCopula(n, copula=ic)
plot(U, xlab=quote(U[1]), ylab=quote(U[2]))

# sampling from bivariate normal distribution
d<-2
rho<-0.7
P<-matrix(rho, nrow=d, ncol=d) # correlation matrix
diag(P)<-1
X<-rmvnorm(1000, sigma = P)  # bivariate normal obs.
plot(X, xlim=c(-4, 4), ylim=c(-4, 4), 
     xlab=quote(X[1]), ylab=quote(X[2]))
# transform to copula observations via PIT 
U<-pnorm(X) 

### same as
#set.seed(332)
#U.<-rCopula(1000, normalCopula(rho, dim=2))

plot(U, xlim=c(0, 1), ylim=c(0, 1), 
     xlab=quote(U[1]), ylab=quote(U[2]))

#transform U to exp(2) margins	
Y<-qexp(U, 2)  
plot(Y, xlim=c(0, 4), ylim=c(0, 4), 
     xlab=quote(Y[1]), ylab=quote(Y[2]))

## Copula wire frame and level curves
nc<-normalCopula(rho, dim=2)
par(mfrow=c(1,3))
wireframe2(nc, FUN = pCopula, draw.4.pCoplines=FALSE)
contourplot2(nc, FUN = pCopula)
contourplot2(nc, FUN = dCopula) # density
```

```{r, fig.width=8, fig.height=4}
#Sampling from Clayton and survival Clayton copulas

cop<-claytonCopula(2)
U<-rCopula(1000, copula=cop)
V<-1-U   #sample from the survival Clayton copula
par(mfrow=c(1,2))
plot(U, xlab = quote(U[1]),  ylab = quote(U[2]))
plot(V, xlab = quote(V[1]), ylab = quote(V[2]))

#wireframe2(rotCopula(cop), FUN=dCopula, delta=0.025)

# Student's t copula
nu <- 4 
param<-0.8
tc <- tCopula(param, df = nu)
#contourplot2(tc, FUN = pCopula) # copula
contourplot2(tc, FUN = dCopula) # density

# inversion of Kendall's tau
param1<-iTau(tCopula(df = nu), tau = 0.6)
nu <- 4 
tc1 <- tCopula(param, df = nu)
tc1
# sample from the t copula
set.seed(271)
par(mfrow=c(1,2))
U <- rCopula(1000, copula = tc) 
plot(U, xlab = quote(U[1]), ylab = quote(U[2])) 
param2<-iTau(tCopula(df = nu), tau = 0.8)
tc2 <- tCopula(param2, df = nu)
V <- rCopula(1000, copula = tc2) 
plot(V, xlab = quote(V[1]), ylab = quote(V[2])) 
```

Copula estimation and goodness-of-fit: Application to losses and expenses data LOSS/ALAE

```{r,  fig.width=8, fig.height=4}
# LOSS/ALAE data: n=1500  obs., two variables: losses and expenses.
#LOSS: general liability claims from the Insurance Services Office
# ALAE: specifically attributable to the settlement of individual claims 
# (e.g. lawyer’s fees, claims investigation expenses)

data(loss) # loss data from copula library
dim(loss)

LOSS <- loss$loss
ALAE <- loss$alae
par(mfrow=c(1, 2))
plot(LOSS, ALAE, cex=.5) # dollar scale
plot(log(LOSS), log(ALAE),cex=.5) # log scale

#pseudo-copula observations
Z<-pobs(cbind(LOSS, ALAE))
plot(Z,  xlab = quote(U[1]), ylab = quote(U[2])) 

```

1. Fit marginal Pareto distributions

```{r,  fig.width=8, fig.height=5, message=FALSE, warning=FALSE}
layout(1:1)

library(VGAM)
#ALAE∼Pareto II(0,theta,alpha) 
#F(x)=1−(1+x/theta)^{-alpha}

# estimating a distribution for ALAE (no covariates)
# location param = 0; scale parameter is modeled on the log scale (positive); shape parameter also uses a log link to ensure positivity

fit.ALAE <- vglm(ALAE ~ 1, paretoII(location=0, lscale="loglink", lshape="loglink")) 

# scale and shape estimates
coef(fit.ALAE, matrix=TRUE) 
# extract fitted model coefficients, matrix=TRUE gives logarithm of estimated parameters instead of default normal scale estimates
#parameters on the natural scale
theta.ALAE <- Coef(fit.ALAE)[1]
alpha.ALAE <- Coef(fit.ALAE)[2]
theta.ALAE
alpha.ALAE

# PIT using your fitted Pareto II model
u1 = 1 - (1 + (ALAE/theta.ALAE))^(-alpha.ALAE)   
## F_1(ALAE)~U(0,1)
hist(u1, main="", xlab="")
#hist(qnorm(u1), main="", xlab="")

## LOSS distribution
fit.LOSS <- vglm(LOSS ~ 1, paretoII, trace=TRUE)

theta.LOSS <- Coef(fit.LOSS)[1]
alpha.LOSS <- Coef(fit.LOSS)[2]
theta.LOSS
alpha.LOSS

u2 = 1 - (1 + (LOSS/theta.LOSS))^(-alpha.LOSS) 
qqplot(qunif(ppoints(length(u2))), sort(u2))
abline(0,1)
```

```{r,  fig.width=8, fig.height=4.5}
#testing independence
#L<-nrow(cbind(LOSS, ALAE))
#empsamp <- indepTestSim(L, p = 2, N = 500)
#indepTest(cbind(LOSS, ALAE), empsamp)  

par(mfrow = c(1, 2))
#Scatter plot for transformed variables
plot(u1, u2, xlab = quote(U[1]), ylab = quote(U[2]))
plot(qnorm(u1), qnorm(u2), 
  xlab = expression("qnorm"(u[1])), ylab=expression("qnorm"(u[2])))
cor(u1, u2, method = "kendall")

```

2. Joint Modeling with Copulas

```{r,  fig.width=8, fig.height=4.5}

U.<-cbind(u1, u2)

# IFM: use the fitted marginal distributions,  u1  and  u2 , as inputs 

cop_name <- c('clayton','frank','gumbel','normal','t','joe')
cop.fam <- c(claytonCopula(dim=2),
           frankCopula(dim=2),
           gumbelCopula(dim=2),
           normalCopula(dim=2),
           tCopula(dim = 2, df.fixed=FALSE), 
           joeCopula(dim=2))
fit.copula.l <- list()

#
for (i in 1:length(cop.fam)){
  fit.copula.l[[i]] <- fitCopula(cop.fam[[i]], U., method="ml")
}
fit.estimate2D <- vector()
fit.loglik2D      <- vector()
aic.result2D     <- vector()

for (i in 1:length(cop.fam)){
  fit.estimate2D[i] <- coef(fit.copula.l[[i]])[1]
  fit.loglik2D[i]   <- fit.copula.l[[i]]@loglik
  aic.result2D[i]   <- AIC(fit.copula.l[[i]])
}
WIN_AIC <- which.min(cbind(aic.result2D))
cop_name[WIN_AIC]

best_cop <- fit.copula.l[[WIN_AIC]]
coef(best_cop)

# the winner is the Gumbel copula
fit.gc <- fitCopula(gumbelCopula(dim = 2),data = U., method = "ml")  
summary(fit.gc)
fit.gc@estimate # estimated copula parameter
gc <- fit.gc@copula # fitted copula

set.seed(270) # for reproducibility
N <- 1000
gofCopula(gc, x = U., N = N, simulation="mult", 
                    ties.method = "max") 

par(mfrow=c(1,2))
par(mar=c(3.2,3,.2,.2),mfrow=c(1,2))
persp(gc, pCopula, theta=50, zlab="C(u,v)", xlab ="u", ylab="v", cex.lab=0.9, cex.axis = 0.5)
persp(gc, dCopula, theta=0, zlab="c(u,v)",  xlab ="u", ylab="v", cex.lab=0.9, cex.axis = 0.5)
```

```{r,  fig.width=4.5, fig.height=4.5}

## comparison between data and the fitted model

n<-nrow(U.)
plot(U.)
points(rCopula(n, gumbelCopula(param=fit.gc@estimate, dim = 2)), col=3)

Udex = (1:n)/(n+1)
Cn <- C.n(u = cbind(rep(Udex, n), rep(Udex, each=n)) , X = U.)
EmpCop <- expression(contour(Udex,Udex,matrix(Cn,n,n), col=3, add=T))

contour(gumbelCopula(param=fit.gc@estimate, dim = 2), 
        pCopula, main = expression(hat(C)[G]),
        xlab = expression(hat(U)[1]), ylab = expression(hat(U)[2]))
eval(EmpCop)

##
dev.off()
layout(1:1)
theta_hat <- fit.gc@estimate   # theta estimated 
tau_hat <- 1 - 1/theta_hat     # Kendall's tau
tau_hat
set.seed(345)
simU <- rCopula(1000, gc)
plot(simU, pch=16, col=4, main="Gumbel copula simulation")
points(simU[simU[,1] > 0.8 & simU[,2] > 0.8, ], pch=16, col=2)

```

```{r}
# Inverse transformation in LOSS e ALAE
simLOSS <- theta.LOSS * ((1 - simU[,1])^(-1/alpha.LOSS) - 1)
simALAE <- theta.ALAE * ((1 - simU[,2])^(-1/alpha.ALAE) - 1)
TOTLOSS<-simLOSS+simALAE
VaR95  <- quantile(TOTLOSS, 0.95)
ES95  <- mean(TOTLOSS[TOTLOSS > VaR95])
VaR95
ES95
#hist(TOTLOSS, breaks=100, main="", xlab="")
# abline(v = c(VaR95, ES95),col=c("blue","red"),
# lwd=2, lty=2)

```