##################################### # Bayesian Statistics: Laboratory 3 # # 28/04/2023 # ##################################### ############################################################## ## Load the following poackages # ############################################################## library(mvtnorm) # For the multivariate normal distribution library(truncnorm) # For the truncated normal distribution library(microbenchmark) # For computational time comparisons library(gclus) # For the dataset bank set.seed(123) ############################################################# ## Load the bank dataset and explore it data(bank) help(bank) # Extract the outcome y <- bank$Status # equivalently # y <- bank[,1] # Build the model matrix X <- model.matrix(~ Length + Left + Right + Bottom, data = bank) # equivalently # X <- cbind(1,bank[,2:5]) # Sample size n <- dim(bank)[1] ########################### # Probit regression model # ########################### ## Obtain the MLE of beta: ## -) Use the `glm()` (with the suitable arguments) ## -) extract $\hat \beta$, as well as the estimated covariance matrix of $\beta$ ($\hat \Sigma_{\beta}$) # Fit the model fit_glm <- glm(y ~ Length + Left + Right + Bottom, family = binomial(link = "probit"), data = bank) # Alternatively # fit_glm <- glm(Status ~ X - 1, family = binomial(link = "probit"), data = bank) # MLE beta_mle <- coef(fit_glm) round(beta_mle,2) # Obtain the estimated covariance matrix of beta sigma.asymp <- summary(fit_glm)$cov.unscaled sigma.asymp # Alternatively sigma.asymp can be obtained as eta_hat <- predict(fit_glm) p_hat <- predict(fit_glm, type = "response") Wii <- dnorm(eta_hat)^2 / (p_hat * (1 - p_hat)) sigma.asymp2 <- solve(t(X) %*% diag(Wii) %*% X) round(sigma.asymp2, 2) ## A computational note: The previous implementation of the matrix product $X^\top W X$ is not efficient ## Consider the following implementation that ## -) at first explicitly take into account the diagonal structure of $W$ ## -) and then we make use of the efficient matrix product via `crossprod` ## Then, we compare the computational performances by means of the `microbenchmark()` function of the **microbenchmark** package PREC1 <- t(X) %*% diag(Wii) %*% X PREC2 <- t(X) %*% (X * Wii) PREC3 <- crossprod(X * sqrt(Wii)) c(max(abs(PREC1 - PREC2)), max(abs(PREC1 - PREC3))) microbenchmark( PREC1 = t(X) %*% diag(Wii) %*% X, PREC2 = t(X) %*% (X * Wii), PREC3 = crossprod(X * sqrt(Wii))) #################################### # Bayesian Probit regression model # #################################### # With flat prior (uniform) write the posterior function post <- function(b, y, X){ pi <- pnorm(X %*% as.vector(b)) return(prod(pi ^ y * (1 - pi) ^ (1 - y))) } ##RW Metropolis - Hastings # Implement the following random walk Metropolis-Hastings algorithm: # -) Initialize $\beta^{(0)}=\hat{\beta}$ # -) For S-1 iterations repeat the following steps # -) Generate $\beta^* \sim N_{p}(\beta^{(s-1)}, \tau^2\hat\Sigma)$ # -) Compute the acceptance probability as # $$\rho(\beta^{(s-1)},\beta^*)=\min\left(1, \frac{\pi(\beta^*|y)}{\pi(\beta^{(s-1)}|y)}\right)$$ # -) Generate $U \sim U(0,1)$: if $U < \rho(\beta^{(s-1)},\beta^*)$, then $\beta^{(s)}=\beta^*$, otherwise $\beta^{(s)}=\beta^{(s-1)}$. # #Note: beta_mle is the MLE, while sigma_asymp is the estimated covariance matrix of $\beta$ mh <- function(Nsim, tau,y, X, beta_mle, sigma_asymp){ b <- matrix(0, nrow = Nsim, ncol = ncol(X)) b[1,] <- beta_mle for(s in 2:Nsim){ set.seed(s) proposal <- rmvnorm(1, b[s - 1, ], tau ^ 2 * sigma.asymp) rho <- min(1, post(proposal, y, X) / post(b[s - 1, ], y, X)) if ( runif(1) < rho ) { b[s, ] <- proposal } else { b[s, ] <- b[s - 1, ] } } return(b) } # Simulate 10000 iterations using different values of $\tau= 0.1, 1, 4$ tau <- c(0.1, 1, 4) K <- length(tau) nsim <- 1e4 b_sim <- lapply(1 : K, function(.x) mh(nsim, tau[.x], y, X, beta_mle, sygma.asymp)) # Plot the trace of the parameter $\beta_2$, the autocorrelation and # the histogram adding a vertical line for the posterior estimate of the mean. par(mfrow = c(1, 3)) for(j in 1 : K) plot(b_sim[[j]][, 2], type = "l", ylab = "Trace", xlab = "Iteration", main = "") for(j in 1 : K) acf(b_sim[[j]][, 2], main = "") for(j in 1 : K) { hist(b_sim[[j]][, 2], breaks = 50, border = "gray40",freq = F, main = "") abline(v = mean(b_sim[[j]][,2]), col = "firebrick3", lty = 2) } # Which value of $\tau$ seems the best? # Using the selected simulations, give a Monte Carlo estimate # of the posterior mean and variance of $\beta$ (burn-in=1000 and thin=10). post_sample <- b_sim[[2]][seq(1e3, 1e4, by = 10),] # Posterior mean beta_post_mean <- round(colMeans(post_sample), 2) beta_post_mean # Posterior variance beta_post_var <- round(apply(post_sample, 2, var), 2) beta_post_var # Comparison between posterior mean and MLE rbind(beta_post_mean, round(beta_mle, 2) ) # Comparison between posterior variance and estimated variance of the MLE of beta rbind(beta_post_var, round(diag(sigma.asymp), 2)) ################## # GIBBS SAMPLING # ################## # Implement a Gibbs' sampler to generate $\beta$ from the posterior distribution: # - Initialize $\beta^{(1)}=\hat \beta$ (to be precise we also should intialise $y^{*(1)}$) \linebreak # - for s in $2,\ldots, S$ # - generate $y^{*(s)}|\beta^{(s-1)},X,y \sim \begin{cases} # \mathcal{TN}(x_i\beta^{(s-1)},1, 0, +\infty) \quad \text{if } y_i=1\\ # \mathcal{TN}(x_i\beta^{(s-1)},1, -\infty, 0) \quad \text{if } y_i=0 #\end{cases}$ \linebreak # - generate $\beta^{(s)}|X,y^{*(s)} \sim N_{p}((X^T X)^{-1}X^Ty^{*}, (X^TX)^{-1})$ \linebreak gs <- function(Nsim, y, X, beta_mle){ b <- matrix(0, nrow = Nsim, ncol = ncol(X)) y_star <- rep(NA, nrow(X)) b[1,] <- beta_mle S <- solve(crossprod(X)) SX <- S %*% t(X) for(i in 2 : Nsim){ mu <- X %*% b[i - 1,] y_star[y == 1] <- rtruncnorm(sum(y), a = 0, b = Inf, mean = mu[y == 1], sd = 1) y_star[y == 0] <- rtruncnorm(n - sum(y), a = -Inf, b = 0, mean = mu[y == 0], sd = 1) b[i,] <- rmvnorm(1, SX %*% y_star, S) } return(b) } # Simulate a sample of 10000 draws of $\beta$ bgs <- gs(1e4, y, X, beta_mle) # Visualise the results for $beta_2$(traceplot, achf, histograms). par(mfrow = c(1, 3)) plot(bgs[, 2], type = "l", ylab = "Trace", xlab = "Iteration") acf(bgs[, 2], main = "") hist(bgs[, 2], breaks = 50, border = "gray40", freq = F, main = "") abline(v = mean(bgs[,2]), col = "firebrick3", lty = 2) # Then extract the posterior mean and variance fo $\beta$ # (by considering a burn-in of 1000 and thinning of 10 as above) post_sample <- bgs[seq(1e3, 1e4, by = 10),] beta_post_mean <- round(colMeans(post_sample), 2) beta_post_mean beta_post_var <- round(apply(post_sample, 2, var), 2) beta_post_var # Compare the posterior mean and variance estimates with the MLE # and the estimated asymtotic variance rbind(round(colMeans(post_sample),2), beta_mle) rbind(round(apply(post_sample, 2, var),2), round(diag(sigma.asymp), 2)) # Compare the two MH and the GS algorithms via microbenchmark(). # Consider `times = 10` as argument microbenchmark( MH = mh(nsim, tau = 1, y, X, beta_mle, sigma.asymp), GS = gs(nsim, y, X, beta_mle), times = 10L)