# R code demonstrating a Bayesian analysis using R and Stan # To run this file: # Create a folder called 'Normal Distribution' # Currently, the code assumes this is in the C drive as C:\...\...\...\lab_3\ # Inside the 'Normal Distribution' folder, create a subfolder called 'stan' # Store this file in the 'stan' folder # Remove objects from memory --------------- rm(list=ls()) # Define folders with locations --------------- # Here is where you can change the location of the "main folder" main.folder <- "C:\\Users\\matilde\\Documents\\teaching\\InfStatBayes\\ISB_202122\\course_material\\lab\\lab_3" software.folder <- paste0(main.folder, "\\stan") dir.create(software.folder) # Move to the desired folder setwd(software.folder) # Call the packages ------ library(rstanarm) options(mc.cores = parallel::detectCores()) library(MCMCvis) library(bayesplot) # optional library(mcmcplots) # optional # Define data used in the model ----------- x=c(91, 85, 72, 87, 71, 77, 88, 94, 84, 92) # scores for individuals # Convert the data to a data frame data.for.stan <- as.data.frame(x) # Choose features of MCMC -------- # the number of chains # the number of iterations to burn-in (warmup) # the total number of iterations after burn-in (warmup) n.chains = 1 # by def 4 n.warmup = 1000 # by def 1000 n.iters.per.chain.after.warmup = 10 # by default 1000 n.iters.total.per.chain = n.iters.per.chain.after.warmup+n.warmup # Run MCMC ----------- # Fit the model in stan ------ fitted.model <- stan_glm( x ~ 1, data=data.for.stan, prior_intercept = normal(75, 7.07, autoscale = FALSE), prior_aux = exponential(1, autoscale = FALSE), chains = n.chains, warmup = n.warmup, iter = n.iters.total.per.chain, # by default: 2000 diagnostic_file = "fitted.model.diagnostic.file.csv" ) ################################################### ################################################### # The effective sample size (ESS) of a quantity of interest captures how many independent draws contain the same amount of information as the dependent sample obtained by the MCMC algorithm. Clearly, the higher the ESS the better. # # Bulk-ESS estimates the sampling efficiency for the location of the distribution (e.g. mean and median). # # Tail-ESS computes the minimum of the effective sample sizes (ESS) of the 5% and 95% quantiles. ################################################### prior_summary(fitted.model) # Plot the results ----- names(fitted.model) # Convert the draws into a matrix posterior <- as.matrix(fitted.model) str(posterior) # Rename the first parameter from intercept to 'mu' dimnames(posterior)$parameters[1] <- "mu" # Plot the trace and density MCMCtrace(posterior, pdf=FALSE) # Rerun with more iterations ----- # Choose features of MCMC -------- # the number of chains # the number of iterations to burn-in (warmup) # the total number of iterations n.chains = 1 n.warmup = 1000 n.iters.per.chain.after.warmup = 5000 n.iters.total.per.chain = n.iters.per.chain.after.warmup+n.warmup # Run MCMC ----------- # Fit the model in stan ------ fitted.model <- stan_glm( x ~ 1, data=data.for.stan, prior_intercept = normal(75, 7.07, autoscale = FALSE), prior_aux = exponential(1, autoscale = FALSE), chains = n.chains, warmup = n.warmup, iter = n.iters.total.per.chain, diagnostic_file = "fitted.model.diagnostic.file.csv" ) # Plot the results ----- # Convert the draws into a matrix posterior <- as.matrix(fitted.model) # Rename the first parameter from intercept to 'mu' dimnames(posterior)$parameters[1] <- "mu" # Plot the trace and density MCMCtrace(posterior, pdf=FALSE) PR <- rnorm(5000,75, 7.07) MCMCtrace(posterior, pdf=F, params='mu', priors=PR) MCMCtrace(posterior, pdf=F, params='mu', priors=PR, plot=F, PPO_out = TRUE) MCMCtrace(posterior, pdf=F, params='sigma', priors=rexp(5000,1)) # Obtain the summary statistics ---- # Summary statistics MCMCsummary(posterior, HPD=TRUE, Rhat=FALSE, n.eff=FALSE, round=2, func=median, func_name = "median") ####### ####### Finally ####### library(bayesplot) # optional library(mcmcplots) # optional