###################################### # Lab 3 - Part 2: Bayesian Inference # ###################################### # Save the stan files on a folder # Set the path of the folder with stan files setwd("") ################################################### # Normal case with sigma2 known - Conjugate Prior # ################################################### #Input values #True mean theta_sample <- 2 #Likelihood variance sigma2 <- 2 #Sample size n <- 10 #Prior mean mu <- 7 #Prior variance tau2 <- 2 #Data generation set.seed(123) y <- rnorm(n,theta_sample, sqrt(sigma2)) #Posterior mean mu_star <- ((1/tau2)*mu+(n/sigma2)*mean(y))/( (1/tau2)+(n/sigma2)) mu_star #Posterior standard deviation sd_star <- sqrt(1/( (1/tau2)+(n/sigma2))) sd_star # Plot of likelihood, prior and posterior distribution curve(dnorm(x, theta_sample, sqrt(sigma2/n)),xlim=c(-4,15), lty=2, lwd=1, col="black", ylim=c(0,1.4), ylab="density", xlab=expression(theta)) curve(dnorm(x, mu, sqrt(tau2) ), xlim=c(-4,15), col="red", lty=1,lwd=2, add =T) curve(dnorm(x, mu_star, sd_star), xlab=expression(theta), ylab="", col="blue", lwd=2, add=T) legend(8.5, 0.7, c("Prior", "Likelihood", "Posterior"), c("red", "black", "blue", "grey" ), lty=c(1,2,1),lwd=c(1,1,2), cex=1) library(rstan) # Launch the Stan Model: see the code normal.stan # It is composed by: # -) Data block # -) Parameter block # -) Model block #launch Stan model: # Data specification data<- list(N=n, y=y, sigma =sqrt(sigma2), mu = mu, tau = sqrt(tau2)) # Fit a Stan model fit <- stan(file="normal.stan", data = data, chains = 4, iter=2000) #extract Stan output sim <- extract(fit) #draw the true analytical posterior and the Stan simulated posterior par(mfrow=c(1,2), pty ="m", oma=c(0,0,0,0)) curve(dnorm(x, theta_sample, sqrt(sigma2/n)),xlim=c(-4,15), lty=2, lwd=1, col="black", ylim=c(0,1.2), ylab="density", xlab=expression(theta), cex.lab=2) curve(dnorm(x, mu, sqrt(tau2) ), xlim=c(-4,15), col="red", lty=1,lwd=2, add =T) curve(dnorm(x, mu_star, sd_star), xlab=expression(theta), ylab="", col="blue", lwd=2, cex.lab=2, add=T) legend(5, 1, c("Prior", "Likelihood", "True Posterior"), c("red", "black", "blue", "blue" ), lty=c(1,2,1),lwd=c(1,1,2), cex=0.8) curve(dnorm(x, theta_sample, sqrt(sigma2/n)),xlim=c(-4,15), lty=2, lwd=1, col="black", ylim=c(0,1.2), ylab="density", xlab=expression(theta), cex.lab=2) curve(dnorm(x, mu, sqrt(tau2) ), xlim=c(-4,15), col="red", lty=1,lwd=2, add =T) lines(density(sim$theta, adj=2), col ="blue", lwd=2, lty =1) legend(5, 1, c("Prior", "Likelihood", "Stan Posterior"), c("red", "black", "blue", "blue" ), lty=c(1,2,1),lwd=c(1,1,2), cex=0.8) ####################################################### # Normal case with sigma2 known - Non-Conjugate Prior # ####################################################### #launch Stan model with the uniform prior in place of normal prior data2<- list(N=n, y=y, sigma =sqrt(sigma2), a=-10, b=10) fit2 <- stan(file="normal_uniform.stan", data = data2, chains = 4, iter=2000, refresh=-1) #extract the Stan output sim2 <- extract(fit2) #plot the Stan posterior curve(dnorm(x, theta_sample, sqrt(sigma2/n)),xlim=c(-12,12), lty=2, lwd=1, col="black", ylim=c(0,1.2), ylab="density", xlab=expression(theta)) curve(dunif(x, -10,10 ), xlim=c(-12,12), col="red", lty=1,lwd=2, add =T) lines(density(sim2$theta, adj=2), col ="blue", lwd=2, lty =1) legend(5, 1, c("Prior", "Likelihood", "Stan Posterior"), c("red", "black", "blue", "blue" ), lty=c(1,2,1),lwd=c(1,1,2), cex=0.8) #traceplot traceplot(fit2, pars ="theta") # posterior mean theta_est <- mean(sim2$theta) library("bayesplot") library("rstanarm") library("ggplot2") #MCMC areas posterior <- as.matrix(fit2) plot_title <- ggtitle("Posterior distributions", "with medians and 80% intervals") mcmc_areas(posterior, pars = "theta", prob = 0.8) + plot_title print(fit2) ############################################## # Normal case with mean and variance unknown # ############################################## #launch biparametric Stan model data3<- list(N=n, y=y, a=-10, b=10) fit3 <- stan(file="biparametric.stan", data = data3, chains = 4, iter=2000, refresh=-1) #extract stan output for biparametric model sim3 <- extract(fit3) posterior_biv <- as.matrix(fit3) theta_est <- mean(sim3$theta) sigma_est <- mean(sim3$sigma) c(theta_est, sigma_est) traceplot(fit3, pars=c("theta", "sigma")) plot_title <- ggtitle("Posterior distributions", "with medians and 80% intervals") mcmc_areas(posterior_biv, pars = c("theta","sigma"), prob = 0.8) + plot_title ########################### # Models for soccer goals # ########################### library(LearnBayes) data(soccergoals) y <- soccergoals$goals #write the likelihood function via the gamma distribution lik_pois<- function(data, theta){ n <- length(data) lambda <- exp(theta) dgamma(lambda, shape =sum(data)+1, scale=1/n) } # Write the prior (gamma) prior_gamma <- function(par, theta){ lambda <- exp(theta) dgamma(lambda, par[1], rate=par[2])*lambda } # Write the prior (Normal) prior_norm <- function(npar, theta){ dnorm(theta, npar[1], npar[2]) } # Vectorize the likelihood and the priors lik_pois_v <- Vectorize(lik_pois, "theta") prior_gamma_v <- Vectorize(prior_gamma, "theta") prior_norm_v <- Vectorize(prior_norm, "theta") #Plot likelihood curve(lik_pois_v(theta=x, data=y), xlim=c(-1,4), xlab=expression(theta), ylab = "density", lwd =2 ) #prior 1 curve(prior_gamma_v(theta=x, par=c(4.57, 1.43)), lty =2, col="red", add = TRUE, lwd =2) # prior 2 curve(prior_norm_v(theta=x, npar=c(1, .5)), lty =3, col="blue", add =TRUE, lwd=2) #prior 3 curve(prior_norm_v(theta=x, npar=c(2, .5)), lty =4, col="green", add =TRUE, lwd =2) #prior 4 curve(prior_norm_v(theta=x, npar=c(1, 2)), lty =5, col="violet", add =TRUE, lwd =2) legend(2.6, 1.8, c("Lik.", "Ga(4.57,1.43)", "N(1, 0.25)", "N(2,0.25)","N(1, 4)" ), lty=c(1,2,3,4,5), col=c("black", "red", "blue", "green", "violet"),lwd=2, cex=0.9) # Poisson-Gamma logpoissongamma <- function(theta, datapar){ data <- datapar$data par <- datapar$par log_lik <- log(lik_pois(data, theta)) log_prior <- log(prior_gamma(par, theta)) return(log_lik+log_prior) } logpoissongamma.v <- Vectorize( logpoissongamma, "theta") #Poisson-Normal logpoissonnormal <- function( theta, datapar){ data <- datapar$data npar <- datapar$par log_lik <- log(lik_pois(data, theta)) log_prior <- log(prior_norm(npar, theta)) return(log_lik+log_prior) } logpoissonnormal.v <- Vectorize( logpoissonnormal, "theta") # Plot: #log-likelihood curve(log(lik_pois(y, theta=x)), xlim=c(-1,4),ylim=c(-20,2), lty =1, ylab="log-posteriors", xlab=expression(theta)) #log posterior 1 curve(logpoissongamma.v(theta=x, list(data=y, par=c(4.57, 1.43))), col="red", xlim=c(-1,4),ylim=c(-20,2), lty =1, add =TRUE) #log posterior 2 curve(logpoissonnormal.v( theta=x, datapar <- list(data=y, par=c(1, .5))), lty =1, col="blue", add =TRUE) #log posterior 3 curve(logpoissonnormal.v( theta=x, datapar <- list(data=y, par=c(2, .5))), lty =1, col="green", add =TRUE, lwd =2) #log posterior 4 curve(logpoissonnormal.v( theta=x, list(data=y, par=c(1, 2))), lty =1, col="violet", add =TRUE, lwd =2) legend(2.6, 1.3, c( "loglik", "lpost 1", "lpost 2", "lpost 3", "lpost 4" ), lty=1, col=c("black", "red", "blue", "green", "violet"),lwd=2, cex=0.9) #posterior modes, posterior standard deviations and the log-marginal likelihoods by using laplace function datapar <- list(data=y, par=c(4.57, 1.43)) fit1 <- laplace(logpoissongamma, .5, datapar) datapar <- list(data=y, par=c(1, .5)) fit2 <- laplace(logpoissonnormal, .5, datapar) datapar <- list(data=y, par=c(2, .5)) fit3 <- laplace(logpoissonnormal, .5, datapar) datapar <- list(data=y, par=c(1, 2)) fit4 <- laplace(logpoissonnormal, .5, datapar) postmode <- c(fit1$mode, fit2$mode, fit3$mode, fit4$mode ) postsds <- sqrt(c(fit1$var, fit2$var, fit3$var, fit4$var)) logmarg <- c(fit1$int, fit2$int, fit3$int, fit4$int) cbind(postmode, postsds, logmarg) # bayes factors BF_matrix <- matrix(1, 4,4) for (i in 1:3){ for (j in 2:4){ BF_matrix[i,j]<- exp(logmarg[i]-logmarg[j]) BF_matrix[j,i]=(1/BF_matrix[i,j]) } } round_bf <- round(BF_matrix,3) round_bf