############################## # Bayesian Statistics: Lab 6 # # 19/05/2023 # ############################## # Some useful package library(rstudioapi) setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) library(rstan) library(bayesplot) library(loo) theme_set(bayesplot::theme_default()) set.seed(123) ######################################################################################## # From the last Lab: Negative Binomial hierarchical model: Varying intercept and slope # ######################################################################################## # Load the extended dataset stan_dat_hier_long <- readRDS('pest_data_long.RDS') str(stan_dat_hier_long) # Compile the model comp_model_NB_hier_slopes <- stan_model('hier_NB_regression_ncp_slopes.stan') # Sampling (the default of adapt_delta is 0.8) fit_NB_hier_slopes <- sampling(comp_model_NB_hier_slopes, data = stan_dat_hier_long, control = list(adapt_delta = 0.95)) # PPCs: means by month y_rep <- as.matrix(fit_NB_hier_slopes, pars = "y_rep") sel_1year <- which(stan_dat_hier_long$mo_idx %in% 1 : 12) with(stan_dat_hier_long, ppc_stat_grouped( y = complaints[sel_1year], yrep = y_rep[, sel_1year], group = mo_idx[sel_1year], stat = 'mean' ) + xlim(0, 11)) ############################################################################################# # Negative Binomial hierarchical model with varying intercept, slope and time-varying effect# ############################################################################################# # Compile comp_model_NB_hier_mos <- stan_model('hier_NB_regression_ncp_slopes_mos.stan') # Sampling fit_NB_hier_mos <- sampling(comp_model_NB_hier_mos, data = stan_dat_hier_long, control = list(adapt_delta = 0.95)) # PPCs y_rep2 <- as.matrix(fit_NB_hier_mos, pars = "y_rep") # Compare the distribution (simulated vs observed) ppc_dens_overlay( y = stan_dat_hier_long$complaints, yrep = y_rep2[1:200,]) # means by month sel_1year <- which(stan_dat_hier_long$mo_idx %in% 1:12) with(stan_dat_hier_long, ppc_stat_grouped( y = complaints[sel_1year], yrep = y_rep2[, sel_1year], group = mo_idx[sel_1year], stat = 'mean') + xlim(0, 11)) # Comparison between prior and posterior draws rho_draws <- cbind( 2 * rbeta(4000, 10, 5) - 1, # draw from prior as.matrix(fit_NB_hier_mos, pars = "rho") ) colnames(rho_draws) <- c("prior", "posterior") mcmc_hist(rho_draws, freq = FALSE, binwidth = 0.025, facet_args = list(nrow = 2)) + xlim(-1, 1) # Prediction + uncertainty intervals with(stan_dat_hier_long, ppc_intervals( y = complaints, yrep = y_rep2, x = traps) + labs(x = "Number of traps", y = "Number of complaints")) # Standardised residuals mean_y_rep2 <- colMeans(y_rep2) mean_inv_phi2 <- mean(as.matrix(fit_NB_hier_slopes, pars = "inv_phi")) std_resid2 <- (stan_dat_hier_long$complaints - mean_y_rep2) / sqrt(mean_y_rep2 + mean_y_rep2^2*mean_inv_phi2) ggplot() + geom_point(mapping = aes(x = mean_y_rep2, y = std_resid2)) + geom_hline(yintercept = c(-2,2)) #################### # Model comparison # #################### # Get the super variable and include in the extended dataset pest_data <- readRDS('pest_data.RDS') lis <- pest_data$live_in_super[seq(1,120, by=12)] stan_dat_hier_long$super <- rep(lis, each = 36) # Compile and Sampling stan_model <- c("simple_poisson_regression.stan", "multiple_poisson_regression.stan", "multiple_NB_regression.stan", "hier_NB_regression.stan", "hier_NB_regression_ncp.stan", "hier_NB_regression_ncp_slopes.stan", "hier_NB_regression_ncp_slopes_mos.stan" ) no_model <- length(stan_model) comp_model <- fit <- list() set.seed(2) for(j in 1 : no_model){ comp_model[[j]] <- stan_model(stan_model[[j]]) fit[[j]] <- sampling(comp_model[[j]], data = stan_dat_hier_long, cores = 4, control = list(adapt_delta = 0.95)) # Here we are using parallel computing (1 chain for each core) print(j) } ## Extract pointwise log-likelihood and compute loo and waic log_lik <- loo_mod <- waic_mod <- list() for(j in 1 : no_model){ log_lik[[j]] <- extract_log_lik(fit[[j]]) loo_mod[[j]] <- loo(log_lik[[j]]) waic_mod[[j]] <- waic(log_lik[[j]]) } # Compare the models loo_compare(loo_mod) loo_compare(waic_mod) # For single model loo_mod[[7]] waic_mod[[7]] # Extract LOO and WAIC and compare graphically looic <- waic <- list() for(j in 1 : no_model){ looic[[j]] <- loo_mod[[j]]$estimates[3,1] waic[[j]] <- waic_mod[[j]]$estimates[3,1] } looics <- unlist(looic) waics <- unlist(waic) mod_names <- c("P1", "P2", "NB", "HNB1", "HNB1NCP", "HNB2","HNB3" ) par(xaxt="n", mfrow=c(1,2)) plot(looics, type="b", xlab="", ylab="LOOIC") par(xaxt="s") axis(1, 1:7, mod_names, las=2) par(xaxt="n") plot(waics, type="b", xlab="", ylab="WAIC") par(xaxt="s") axis(1, c(1:7), mod_names, las=2) ############################################################################ # A useful way to explore several aspects of your fitting: a shiny app library(shinystan) launch_shinystan(fit[[4]])