Introduction to copulas

library(copula)
library(mnormt)               
library(lattice)               
library(mvtnorm)

Simulation from copula models

# sampling from independence copula 
ic<-indepCopula(dim=2)
wireframe2(ic, FUN=pCopula)

contourplot2(ic, FUN=pCopula)

set.seed(332)
n<-1000
U<-rCopula(n, copula=ic)
plot(U, xlab=quote(U[1]), ylab=quote(U[2]))

# sampling from bivariate normal distribution
d<-2
rho<-0.7
P<-matrix(rho, nrow=d, ncol=d) # correlation matrix
diag(P)<-1
X<-rmvnorm(1000, sigma = P)  # bivariate normal obs.
plot(X, xlim=c(-4, 4), ylim=c(-4, 4), 
     xlab=quote(X[1]), ylab=quote(X[2]))

# transform to copula observations via PIT 
U<-pnorm(X) 

### same as
#set.seed(332)
#U.<-rCopula(1000, normalCopula(rho, dim=2))

plot(U, xlim=c(0, 1), ylim=c(0, 1), 
     xlab=quote(U[1]), ylab=quote(U[2]))

#transform U to exp(2) margins  
Y<-qexp(U, 2)  
plot(Y, xlim=c(0, 4), ylim=c(0, 4), 
     xlab=quote(Y[1]), ylab=quote(Y[2]))

## Copula wire frame and level curves
nc<-normalCopula(rho, dim=2)
par(mfrow=c(1,3))
wireframe2(nc, FUN = pCopula, draw.4.pCoplines=FALSE)

contourplot2(nc, FUN = pCopula)

contourplot2(nc, FUN = dCopula) # density

#Sampling from Clayton and survival Clayton copulas

cop<-claytonCopula(2)
U<-rCopula(1000, copula=cop)
V<-1-U   #sample from the survival Clayton copula
par(mfrow=c(1,2))
plot(U, xlab = quote(U[1]),  ylab = quote(U[2]))
plot(V, xlab = quote(V[1]), ylab = quote(V[2]))

#wireframe2(rotCopula(cop), FUN=dCopula, delta=0.025)

# Student's t copula
nu <- 4 
param<-0.8
tc <- tCopula(param, df = nu)
#contourplot2(tc, FUN = pCopula) # copula
contourplot2(tc, FUN = dCopula) # density

# inversion of Kendall's tau
param1<-iTau(tCopula(df = nu), tau = 0.6)
nu <- 4 
tc1 <- tCopula(param, df = nu)
tc1

## t-copula, dim. d = 2 
## Dimension:  2 
## Parameters:
##   rho.1   = 0.8
##   df      = 4.0

# sample from the t copula
set.seed(271)
par(mfrow=c(1,2))
U <- rCopula(1000, copula = tc) 
plot(U, xlab = quote(U[1]), ylab = quote(U[2])) 
param2<-iTau(tCopula(df = nu), tau = 0.8)
tc2 <- tCopula(param2, df = nu)
V <- rCopula(1000, copula = tc2) 
plot(V, xlab = quote(V[1]), ylab = quote(V[2]))

Copula estimation and goodness-of-fit: Application to losses and expenses data LOSS/ALAE

# LOSS/ALAE data: n=1500  obs., two variables: losses and expenses.
#LOSS: general liability claims from the Insurance Services Office
# ALAE: specifically attributable to the settlement of individual claims 
# (e.g. lawyer’s fees, claims investigation expenses)

data(loss) # loss data from copula library
dim(loss)

## [1] 1500    4

LOSS <- loss$loss
ALAE <- loss$alae
par(mfrow=c(1, 2))
plot(LOSS, ALAE, cex=.5) # dollar scale
plot(log(LOSS), log(ALAE),cex=.5) # log scale

#pseudo-copula observations
Z<-pobs(cbind(LOSS, ALAE))
plot(Z,  xlab = quote(U[1]), ylab = quote(U[2]))

Fit marginal Pareto distributions

layout(1:1)

library(VGAM)
#ALAE∼Pareto II(0,theta,alpha) 
#F(x)=1−(1+x/theta)^{-alpha}

# estimating a distribution for ALAE (no covariates)
# location param = 0; scale parameter is modeled on the log scale (positive); shape parameter also uses a log link to ensure positivity

fit.ALAE <- vglm(ALAE ~ 1, paretoII(location=0, lscale="loglink", lshape="loglink")) 

# scale and shape estimates
coef(fit.ALAE, matrix=TRUE)

##             loglink(scale) loglink(shape)
## (Intercept)       9.624673      0.7988753

# extract fitted model coefficients, matrix=TRUE gives logarithm of estimated parameters instead of default normal scale estimates
#parameters on the natural scale
theta.ALAE <- Coef(fit.ALAE)[1]
alpha.ALAE <- Coef(fit.ALAE)[2]
theta.ALAE

##   scale 
## 15133.6

alpha.ALAE

##    shape 
## 2.223039

# PIT using your fitted Pareto II model
u1 = 1 - (1 + (ALAE/theta.ALAE))^(-alpha.ALAE)   
## F_1(ALAE)~U(0,1)
hist(u1, main="", xlab="")

#hist(qnorm(u1), main="", xlab="")

## LOSS distribution
fit.LOSS <- vglm(LOSS ~ 1, paretoII, trace=TRUE)

## Iteration 1: loglikelihood = -16961.9685
## Iteration 2: loglikelihood = -16935.9218
## Iteration 3: loglikelihood = -16933.9178
## Iteration 4: loglikelihood = -16933.8858
## Iteration 5: loglikelihood = -16933.8856
## Iteration 6: loglikelihood = -16933.8856

theta.LOSS <- Coef(fit.LOSS)[1]
alpha.LOSS <- Coef(fit.LOSS)[2]
theta.LOSS

##    scale 
## 16228.15

alpha.LOSS

##   shape 
## 1.23766

u2 = 1 - (1 + (LOSS/theta.LOSS))^(-alpha.LOSS) 
qqplot(qunif(ppoints(length(u2))), sort(u2))
abline(0,1)

#testing independence
#L<-nrow(cbind(LOSS, ALAE))
#empsamp <- indepTestSim(L, p = 2, N = 500)
#indepTest(cbind(LOSS, ALAE), empsamp)  

par(mfrow = c(1, 2))
#Scatter plot for transformed variables
plot(u1, u2, xlab = quote(U[1]), ylab = quote(U[2]))
plot(qnorm(u1), qnorm(u2), 
  xlab = expression("qnorm"(u[1])), ylab=expression("qnorm"(u[2])))

cor(u1, u2, method = "kendall")

## [1] 0.3154175

Joint Modeling with Copulas

U.<-cbind(u1, u2)

# IFM: use the fitted marginal distributions,  u1  and  u2 , as inputs 

cop_name <- c('clayton','frank','gumbel','normal','t','joe')
cop.fam <- c(claytonCopula(dim=2),
           frankCopula(dim=2),
           gumbelCopula(dim=2),
           normalCopula(dim=2),
           tCopula(dim = 2, df.fixed=FALSE), 
           joeCopula(dim=2))
fit.copula.l <- list()

#
for (i in 1:length(cop.fam)){
  fit.copula.l[[i]] <- fitCopula(cop.fam[[i]], U., method="ml")
}
fit.estimate2D <- vector()
fit.loglik2D      <- vector()
aic.result2D     <- vector()

for (i in 1:length(cop.fam)){
  fit.estimate2D[i] <- coef(fit.copula.l[[i]])[1]
  fit.loglik2D[i]   <- fit.copula.l[[i]]@loglik
  aic.result2D[i]   <- AIC(fit.copula.l[[i]])
}
WIN_AIC <- which.min(cbind(aic.result2D))
cop_name[WIN_AIC]

## [1] "gumbel"

best_cop <- fit.copula.l[[WIN_AIC]]
coef(best_cop)

##    alpha 
## 1.444397

# the winner is the Gumbel copula
fit.gc <- fitCopula(gumbelCopula(dim = 2),data = U., method = "ml")  
summary(fit.gc)

## Call: fitCopula(gumbelCopula(dim = 2), data = U., method = "ml")
## Fit based on "maximum likelihood" and 1500 2-dimensional observations.
## Gumbel copula, dim. d = 2 
##       Estimate Std. Error
## alpha    1.444      0.029
## The maximized loglikelihood is 204.9 
## Optimization converged
## Number of loglikelihood evaluations:
## function gradient 
##        6        6

fit.gc@estimate # estimated copula parameter

## [1] 1.444397

gc <- fit.gc@copula # fitted copula

set.seed(270) # for reproducibility
N <- 1000
gofCopula(gc, x = U., N = N, simulation="mult", 
                    ties.method = "max")

## 
##  Multiplier bootstrap-based goodness-of-fit test of Gumbel copula, dim.
##  d = 2, with 'method'="Sn", 'estim.method'="mpl":
## 
## data:  x
## statistic = 0.026645, parameter = 1.4417, p-value = 0.2463

par(mfrow=c(1,2))
par(mar=c(3.2,3,.2,.2),mfrow=c(1,2))
persp(gc, pCopula, theta=50, zlab="C(u,v)", xlab ="u", ylab="v", cex.lab=0.9, cex.axis = 0.5)
persp(gc, dCopula, theta=0, zlab="c(u,v)",  xlab ="u", ylab="v", cex.lab=0.9, cex.axis = 0.5)

## comparison between data and the fitted model

n<-nrow(U.)
plot(U.)
points(rCopula(n, gumbelCopula(param=fit.gc@estimate, dim = 2)), col=3)

Udex = (1:n)/(n+1)
Cn <- C.n(u = cbind(rep(Udex, n), rep(Udex, each=n)) , X = U.)
EmpCop <- expression(contour(Udex,Udex,matrix(Cn,n,n), col=3, add=T))

contour(gumbelCopula(param=fit.gc@estimate, dim = 2), 
        pCopula, main = expression(hat(C)[G]),
        xlab = expression(hat(U)[1]), ylab = expression(hat(U)[2]))
eval(EmpCop)

##
dev.off()

## null device 
##           1

layout(1:1)
theta_hat <- fit.gc@estimate   # theta estimated 
tau_hat <- 1 - 1/theta_hat     # Kendall's tau
tau_hat

## [1] 0.3076696

set.seed(345)
simU <- rCopula(1000, gc)
plot(simU, pch=16, col=4, main="Gumbel copula simulation")
points(simU[simU[,1] > 0.8 & simU[,2] > 0.8, ], pch=16, col=2)

# Inverse transformation in LOSS e ALAE
simLOSS <- theta.LOSS * ((1 - simU[,1])^(-1/alpha.LOSS) - 1)
simALAE <- theta.ALAE * ((1 - simU[,2])^(-1/alpha.ALAE) - 1)
TOTLOSS<-simLOSS+simALAE
VaR95  <- quantile(TOTLOSS, 0.95)
ES95  <- mean(TOTLOSS[TOTLOSS > VaR95])
VaR95

##      95% 
## 165823.6

ES95

## [1] 536647.6

#hist(TOTLOSS, breaks=100, main="", xlab="")
# abline(v = c(VaR95, ES95),col=c("blue","red"),
# lwd=2, lty=2)