library(copula)
library(mnormt)
library(lattice)
library(mvtnorm)
Simulation from copula models
# sampling from independence copula
ic<-indepCopula(dim=2)
wireframe2(ic, FUN=pCopula)
contourplot2(ic, FUN=pCopula)
set.seed(332)
n<-1000
U<-rCopula(n, copula=ic)
plot(U, xlab=quote(U[1]), ylab=quote(U[2]))
# sampling from bivariate normal distribution
d<-2
rho<-0.7
P<-matrix(rho, nrow=d, ncol=d) # correlation matrix
diag(P)<-1
X<-rmvnorm(1000, sigma = P) # bivariate normal obs.
plot(X, xlim=c(-4, 4), ylim=c(-4, 4),
xlab=quote(X[1]), ylab=quote(X[2]))
# transform to copula observations via PIT
U<-pnorm(X)
### same as
#set.seed(332)
#U.<-rCopula(1000, normalCopula(rho, dim=2))
plot(U, xlim=c(0, 1), ylim=c(0, 1),
xlab=quote(U[1]), ylab=quote(U[2]))
#transform U to exp(2) margins
Y<-qexp(U, 2)
plot(Y, xlim=c(0, 4), ylim=c(0, 4),
xlab=quote(Y[1]), ylab=quote(Y[2]))
## Copula wire frame and level curves
nc<-normalCopula(rho, dim=2)
par(mfrow=c(1,3))
wireframe2(nc, FUN = pCopula, draw.4.pCoplines=FALSE)
contourplot2(nc, FUN = pCopula)
contourplot2(nc, FUN = dCopula) # density
#Sampling from Clayton and survival Clayton copulas
cop<-claytonCopula(2)
U<-rCopula(1000, copula=cop)
V<-1-U #sample from the survival Clayton copula
par(mfrow=c(1,2))
plot(U, xlab = quote(U[1]), ylab = quote(U[2]))
plot(V, xlab = quote(V[1]), ylab = quote(V[2]))
#wireframe2(rotCopula(cop), FUN=dCopula, delta=0.025)
# Student's t copula
nu <- 4
param<-0.8
tc <- tCopula(param, df = nu)
#contourplot2(tc, FUN = pCopula) # copula
contourplot2(tc, FUN = dCopula) # density
# inversion of Kendall's tau
param1<-iTau(tCopula(df = nu), tau = 0.6)
nu <- 4
tc1 <- tCopula(param, df = nu)
tc1
## t-copula, dim. d = 2
## Dimension: 2
## Parameters:
## rho.1 = 0.8
## df = 4.0
# sample from the t copula
set.seed(271)
par(mfrow=c(1,2))
U <- rCopula(1000, copula = tc)
plot(U, xlab = quote(U[1]), ylab = quote(U[2]))
param2<-iTau(tCopula(df = nu), tau = 0.8)
tc2 <- tCopula(param2, df = nu)
V <- rCopula(1000, copula = tc2)
plot(V, xlab = quote(V[1]), ylab = quote(V[2]))
Copula estimation and goodness-of-fit: Application to losses and expenses data LOSS/ALAE
# LOSS/ALAE data: n=1500 obs., two variables: losses and expenses.
#LOSS: general liability claims from the Insurance Services Office
# ALAE: specifically attributable to the settlement of individual claims
# (e.g. lawyer’s fees, claims investigation expenses)
data(loss) # loss data from copula library
dim(loss)
## [1] 1500 4
LOSS <- loss$loss
ALAE <- loss$alae
par(mfrow=c(1, 2))
plot(LOSS, ALAE, cex=.5) # dollar scale
plot(log(LOSS), log(ALAE),cex=.5) # log scale
#pseudo-copula observations
Z<-pobs(cbind(LOSS, ALAE))
plot(Z, xlab = quote(U[1]), ylab = quote(U[2]))
layout(1:1)
library(VGAM)
#ALAE∼Pareto II(0,theta,alpha)
#F(x)=1−(1+x/theta)^{-alpha}
# estimating a distribution for ALAE (no covariates)
# location param = 0; scale parameter is modeled on the log scale (positive); shape parameter also uses a log link to ensure positivity
fit.ALAE <- vglm(ALAE ~ 1, paretoII(location=0, lscale="loglink", lshape="loglink"))
# scale and shape estimates
coef(fit.ALAE, matrix=TRUE)
## loglink(scale) loglink(shape)
## (Intercept) 9.624673 0.7988753
# extract fitted model coefficients, matrix=TRUE gives logarithm of estimated parameters instead of default normal scale estimates
#parameters on the natural scale
theta.ALAE <- Coef(fit.ALAE)[1]
alpha.ALAE <- Coef(fit.ALAE)[2]
theta.ALAE
## scale
## 15133.6
alpha.ALAE
## shape
## 2.223039
# PIT using your fitted Pareto II model
u1 = 1 - (1 + (ALAE/theta.ALAE))^(-alpha.ALAE)
## F_1(ALAE)~U(0,1)
hist(u1, main="", xlab="")
#hist(qnorm(u1), main="", xlab="")
## LOSS distribution
fit.LOSS <- vglm(LOSS ~ 1, paretoII, trace=TRUE)
## Iteration 1: loglikelihood = -16961.9685
## Iteration 2: loglikelihood = -16935.9218
## Iteration 3: loglikelihood = -16933.9178
## Iteration 4: loglikelihood = -16933.8858
## Iteration 5: loglikelihood = -16933.8856
## Iteration 6: loglikelihood = -16933.8856
theta.LOSS <- Coef(fit.LOSS)[1]
alpha.LOSS <- Coef(fit.LOSS)[2]
theta.LOSS
## scale
## 16228.15
alpha.LOSS
## shape
## 1.23766
u2 = 1 - (1 + (LOSS/theta.LOSS))^(-alpha.LOSS)
qqplot(qunif(ppoints(length(u2))), sort(u2))
abline(0,1)
#testing independence
#L<-nrow(cbind(LOSS, ALAE))
#empsamp <- indepTestSim(L, p = 2, N = 500)
#indepTest(cbind(LOSS, ALAE), empsamp)
par(mfrow = c(1, 2))
#Scatter plot for transformed variables
plot(u1, u2, xlab = quote(U[1]), ylab = quote(U[2]))
plot(qnorm(u1), qnorm(u2),
xlab = expression("qnorm"(u[1])), ylab=expression("qnorm"(u[2])))
cor(u1, u2, method = "kendall")
## [1] 0.3154175
U.<-cbind(u1, u2)
# IFM: use the fitted marginal distributions, u1 and u2 , as inputs
cop_name <- c('clayton','frank','gumbel','normal','t','joe')
cop.fam <- c(claytonCopula(dim=2),
frankCopula(dim=2),
gumbelCopula(dim=2),
normalCopula(dim=2),
tCopula(dim = 2, df.fixed=FALSE),
joeCopula(dim=2))
fit.copula.l <- list()
#
for (i in 1:length(cop.fam)){
fit.copula.l[[i]] <- fitCopula(cop.fam[[i]], U., method="ml")
}
fit.estimate2D <- vector()
fit.loglik2D <- vector()
aic.result2D <- vector()
for (i in 1:length(cop.fam)){
fit.estimate2D[i] <- coef(fit.copula.l[[i]])[1]
fit.loglik2D[i] <- fit.copula.l[[i]]@loglik
aic.result2D[i] <- AIC(fit.copula.l[[i]])
}
WIN_AIC <- which.min(cbind(aic.result2D))
cop_name[WIN_AIC]
## [1] "gumbel"
best_cop <- fit.copula.l[[WIN_AIC]]
coef(best_cop)
## alpha
## 1.444397
# the winner is the Gumbel copula
fit.gc <- fitCopula(gumbelCopula(dim = 2),data = U., method = "ml")
summary(fit.gc)
## Call: fitCopula(gumbelCopula(dim = 2), data = U., method = "ml")
## Fit based on "maximum likelihood" and 1500 2-dimensional observations.
## Gumbel copula, dim. d = 2
## Estimate Std. Error
## alpha 1.444 0.029
## The maximized loglikelihood is 204.9
## Optimization converged
## Number of loglikelihood evaluations:
## function gradient
## 6 6
fit.gc@estimate # estimated copula parameter
## [1] 1.444397
gc <- fit.gc@copula # fitted copula
set.seed(270) # for reproducibility
N <- 1000
gofCopula(gc, x = U., N = N, simulation="mult",
ties.method = "max")
##
## Multiplier bootstrap-based goodness-of-fit test of Gumbel copula, dim.
## d = 2, with 'method'="Sn", 'estim.method'="mpl":
##
## data: x
## statistic = 0.026645, parameter = 1.4417, p-value = 0.2463
par(mfrow=c(1,2))
par(mar=c(3.2,3,.2,.2),mfrow=c(1,2))
persp(gc, pCopula, theta=50, zlab="C(u,v)", xlab ="u", ylab="v", cex.lab=0.9, cex.axis = 0.5)
persp(gc, dCopula, theta=0, zlab="c(u,v)", xlab ="u", ylab="v", cex.lab=0.9, cex.axis = 0.5)
## comparison between data and the fitted model
n<-nrow(U.)
plot(U.)
points(rCopula(n, gumbelCopula(param=fit.gc@estimate, dim = 2)), col=3)
Udex = (1:n)/(n+1)
Cn <- C.n(u = cbind(rep(Udex, n), rep(Udex, each=n)) , X = U.)
EmpCop <- expression(contour(Udex,Udex,matrix(Cn,n,n), col=3, add=T))
contour(gumbelCopula(param=fit.gc@estimate, dim = 2),
pCopula, main = expression(hat(C)[G]),
xlab = expression(hat(U)[1]), ylab = expression(hat(U)[2]))
eval(EmpCop)
##
dev.off()
## null device
## 1
layout(1:1)
theta_hat <- fit.gc@estimate # theta estimated
tau_hat <- 1 - 1/theta_hat # Kendall's tau
tau_hat
## [1] 0.3076696
set.seed(345)
simU <- rCopula(1000, gc)
plot(simU, pch=16, col=4, main="Gumbel copula simulation")
points(simU[simU[,1] > 0.8 & simU[,2] > 0.8, ], pch=16, col=2)
# Inverse transformation in LOSS e ALAE
simLOSS <- theta.LOSS * ((1 - simU[,1])^(-1/alpha.LOSS) - 1)
simALAE <- theta.ALAE * ((1 - simU[,2])^(-1/alpha.ALAE) - 1)
TOTLOSS<-simLOSS+simALAE
VaR95 <- quantile(TOTLOSS, 0.95)
ES95 <- mean(TOTLOSS[TOTLOSS > VaR95])
VaR95
## 95%
## 165823.6
ES95
## [1] 536647.6
#hist(TOTLOSS, breaks=100, main="", xlab="")
# abline(v = c(VaR95, ES95),col=c("blue","red"),
# lwd=2, lty=2)