--- title: "Introduction to copulas" output: pdf_document: default html_document: df_print: paged editor_options: chunk_output_type: console --- ```{r, } library(copula) library(mnormt) library(lattice) library(mvtnorm) ``` Simulation from copula models ```{r, fig.width=4, fig.height=4} # sampling from independence copula ic<-indepCopula(dim=2) wireframe2(ic, FUN=pCopula) contourplot2(ic, FUN=pCopula) set.seed(332) n<-1000 U<-rCopula(n, copula=ic) plot(U, xlab=quote(U[1]), ylab=quote(U[2])) # sampling from bivariate normal distribution d<-2 rho<-0.7 P<-matrix(rho, nrow=d, ncol=d) # correlation matrix diag(P)<-1 X<-rmvnorm(1000, sigma = P) # bivariate normal obs. plot(X, xlim=c(-4, 4), ylim=c(-4, 4), xlab=quote(X[1]), ylab=quote(X[2])) # transform to copula observations via PIT U<-pnorm(X) ### same as #set.seed(332) #U.<-rCopula(1000, normalCopula(rho, dim=2)) plot(U, xlim=c(0, 1), ylim=c(0, 1), xlab=quote(U[1]), ylab=quote(U[2])) #transform U to exp(2) margins Y<-qexp(U, 2) plot(Y, xlim=c(0, 4), ylim=c(0, 4), xlab=quote(Y[1]), ylab=quote(Y[2])) ## Copula wire frame and level curves nc<-normalCopula(rho, dim=2) par(mfrow=c(1,3)) wireframe2(nc, FUN = pCopula, draw.4.pCoplines=FALSE) contourplot2(nc, FUN = pCopula) contourplot2(nc, FUN = dCopula) # density ``` ```{r, fig.width=8, fig.height=4} #Sampling from Clayton and survival Clayton copulas cop<-claytonCopula(2) U<-rCopula(1000, copula=cop) V<-1-U #sample from the survival Clayton copula par(mfrow=c(1,2)) plot(U, xlab = quote(U[1]), ylab = quote(U[2])) plot(V, xlab = quote(V[1]), ylab = quote(V[2])) #wireframe2(rotCopula(cop), FUN=dCopula, delta=0.025) # Student's t copula nu <- 4 param<-0.8 tc <- tCopula(param, df = nu) #contourplot2(tc, FUN = pCopula) # copula contourplot2(tc, FUN = dCopula) # density # inversion of Kendall's tau param1<-iTau(tCopula(df = nu), tau = 0.6) nu <- 4 tc1 <- tCopula(param, df = nu) tc1 # sample from the t copula set.seed(271) par(mfrow=c(1,2)) U <- rCopula(1000, copula = tc) plot(U, xlab = quote(U[1]), ylab = quote(U[2])) param2<-iTau(tCopula(df = nu), tau = 0.8) tc2 <- tCopula(param2, df = nu) V <- rCopula(1000, copula = tc2) plot(V, xlab = quote(V[1]), ylab = quote(V[2])) ``` Copula estimation and goodness-of-fit: Application to losses and expenses data LOSS/ALAE ```{r, fig.width=8, fig.height=4} # LOSS/ALAE data: n=1500 obs., two variables: losses and expenses. #LOSS: general liability claims from the Insurance Services Office # ALAE: specifically attributable to the settlement of individual claims # (e.g. lawyer’s fees, claims investigation expenses) data(loss) # loss data from copula library dim(loss) LOSS <- loss$loss ALAE <- loss$alae par(mfrow=c(1, 2)) plot(LOSS, ALAE, cex=.5) # dollar scale plot(log(LOSS), log(ALAE),cex=.5) # log scale #pseudo-copula observations Z<-pobs(cbind(LOSS, ALAE)) plot(Z, xlab = quote(U[1]), ylab = quote(U[2])) ``` 1. Fit marginal Pareto distributions ```{r, fig.width=8, fig.height=5, message=FALSE, warning=FALSE} layout(1:1) library(VGAM) #ALAE∼Pareto II(0,theta,alpha) #F(x)=1−(1+x/theta)^{-alpha} # estimating a distribution for ALAE (no covariates) # location param = 0; scale parameter is modeled on the log scale (positive); shape parameter also uses a log link to ensure positivity fit.ALAE <- vglm(ALAE ~ 1, paretoII(location=0, lscale="loglink", lshape="loglink")) # scale and shape estimates coef(fit.ALAE, matrix=TRUE) # extract fitted model coefficients, matrix=TRUE gives logarithm of estimated parameters instead of default normal scale estimates #parameters on the natural scale theta.ALAE <- Coef(fit.ALAE)[1] alpha.ALAE <- Coef(fit.ALAE)[2] theta.ALAE alpha.ALAE # PIT using your fitted Pareto II model u1 = 1 - (1 + (ALAE/theta.ALAE))^(-alpha.ALAE) ## F_1(ALAE)~U(0,1) hist(u1, main="", xlab="") #hist(qnorm(u1), main="", xlab="") ## LOSS distribution fit.LOSS <- vglm(LOSS ~ 1, paretoII, trace=TRUE) theta.LOSS <- Coef(fit.LOSS)[1] alpha.LOSS <- Coef(fit.LOSS)[2] theta.LOSS alpha.LOSS u2 = 1 - (1 + (LOSS/theta.LOSS))^(-alpha.LOSS) qqplot(qunif(ppoints(length(u2))), sort(u2)) abline(0,1) ``` ```{r, fig.width=8, fig.height=4.5} #testing independence #L<-nrow(cbind(LOSS, ALAE)) #empsamp <- indepTestSim(L, p = 2, N = 500) #indepTest(cbind(LOSS, ALAE), empsamp) par(mfrow = c(1, 2)) #Scatter plot for transformed variables plot(u1, u2, xlab = quote(U[1]), ylab = quote(U[2])) plot(qnorm(u1), qnorm(u2), xlab = expression("qnorm"(u[1])), ylab=expression("qnorm"(u[2]))) cor(u1, u2, method = "kendall") ``` 2. Joint Modeling with Copulas ```{r, fig.width=8, fig.height=4.5} U.<-cbind(u1, u2) # IFM: use the fitted marginal distributions, u1 and u2 , as inputs cop_name <- c('clayton','frank','gumbel','normal','t','joe') cop.fam <- c(claytonCopula(dim=2), frankCopula(dim=2), gumbelCopula(dim=2), normalCopula(dim=2), tCopula(dim = 2, df.fixed=FALSE), joeCopula(dim=2)) fit.copula.l <- list() # for (i in 1:length(cop.fam)){ fit.copula.l[[i]] <- fitCopula(cop.fam[[i]], U., method="ml") } fit.estimate2D <- vector() fit.loglik2D <- vector() aic.result2D <- vector() for (i in 1:length(cop.fam)){ fit.estimate2D[i] <- coef(fit.copula.l[[i]])[1] fit.loglik2D[i] <- fit.copula.l[[i]]@loglik aic.result2D[i] <- AIC(fit.copula.l[[i]]) } WIN_AIC <- which.min(cbind(aic.result2D)) cop_name[WIN_AIC] best_cop <- fit.copula.l[[WIN_AIC]] coef(best_cop) # the winner is the Gumbel copula fit.gc <- fitCopula(gumbelCopula(dim = 2),data = U., method = "ml") summary(fit.gc) fit.gc@estimate # estimated copula parameter gc <- fit.gc@copula # fitted copula set.seed(270) # for reproducibility N <- 1000 gofCopula(gc, x = U., N = N, simulation="mult", ties.method = "max") par(mfrow=c(1,2)) par(mar=c(3.2,3,.2,.2),mfrow=c(1,2)) persp(gc, pCopula, theta=50, zlab="C(u,v)", xlab ="u", ylab="v", cex.lab=0.9, cex.axis = 0.5) persp(gc, dCopula, theta=0, zlab="c(u,v)", xlab ="u", ylab="v", cex.lab=0.9, cex.axis = 0.5) ``` ```{r, fig.width=4.5, fig.height=4.5} ## comparison between data and the fitted model n<-nrow(U.) plot(U.) points(rCopula(n, gumbelCopula(param=fit.gc@estimate, dim = 2)), col=3) Udex = (1:n)/(n+1) Cn <- C.n(u = cbind(rep(Udex, n), rep(Udex, each=n)) , X = U.) EmpCop <- expression(contour(Udex,Udex,matrix(Cn,n,n), col=3, add=T)) contour(gumbelCopula(param=fit.gc@estimate, dim = 2), pCopula, main = expression(hat(C)[G]), xlab = expression(hat(U)[1]), ylab = expression(hat(U)[2])) eval(EmpCop) ## dev.off() layout(1:1) theta_hat <- fit.gc@estimate # theta estimated tau_hat <- 1 - 1/theta_hat # Kendall's tau tau_hat set.seed(345) simU <- rCopula(1000, gc) plot(simU, pch=16, col=4, main="Gumbel copula simulation") points(simU[simU[,1] > 0.8 & simU[,2] > 0.8, ], pch=16, col=2) ``` ```{r} # Inverse transformation in LOSS e ALAE simLOSS <- theta.LOSS * ((1 - simU[,1])^(-1/alpha.LOSS) - 1) simALAE <- theta.ALAE * ((1 - simU[,2])^(-1/alpha.ALAE) - 1) TOTLOSS<-simLOSS+simALAE VaR95 <- quantile(TOTLOSS, 0.95) ES95 <- mean(TOTLOSS[TOTLOSS > VaR95]) VaR95 ES95 #hist(TOTLOSS, breaks=100, main="", xlab="") # abline(v = c(VaR95, ES95),col=c("blue","red"), # lwd=2, lty=2) ```