# R code demonstrating a Bayesian analysis using R and Stan

# To run this file:

# Create a folder called 'Normal Distribution'
#   Currently, the code assumes this is in the C drive as C:\...\...\...\lab_3\

# Inside the 'Normal Distribution' folder, create a subfolder called 'stan'

# Store this file in the 'stan' folder


# Remove objects from memory ---------------
rm(list=ls())


# Define folders with locations ---------------
# Here is where you can change the location of the "main folder" 
main.folder <- "C:\\Users\\matilde\\Documents\\teaching\\InfStatBayes\\ISB_202122\\course_material\\lab\\lab_3"
software.folder <- paste0(main.folder, "\\stan")
dir.create(software.folder)

# Move to the desired folder
setwd(software.folder)


# Call the packages ------
library(rstanarm)
options(mc.cores = parallel::detectCores())
library(MCMCvis)
library(bayesplot) # optional
library(mcmcplots) # optional


# Define data used in the model -----------
x=c(91, 85, 72, 87, 71, 77, 88, 94, 84, 92) # scores for individuals

# Convert the data to a data frame
data.for.stan <- as.data.frame(x)


# Choose features of MCMC --------
#   the number of chains
#   the number of iterations to burn-in (warmup)
#	  the total number of iterations after burn-in (warmup)
n.chains = 1 # by def 4
n.warmup = 1000 # by def 1000
n.iters.per.chain.after.warmup = 10 # by default 1000
n.iters.total.per.chain = n.iters.per.chain.after.warmup+n.warmup


# Run MCMC -----------

# Fit the model in stan ------
fitted.model <- stan_glm(
  x ~ 1,
  data=data.for.stan,
  prior_intercept = normal(75, 7.07, autoscale = FALSE),
  prior_aux = exponential(1, autoscale = FALSE),
  chains = n.chains,
  warmup = n.warmup,
  iter = n.iters.total.per.chain, # by default: 2000
  diagnostic_file = "fitted.model.diagnostic.file.csv"
)

###################################################
###################################################
# The effective sample size (ESS) of a quantity of interest captures how many independent draws contain the same amount of information as the dependent sample obtained by the MCMC algorithm. Clearly, the higher the ESS the better.
# 
# Bulk-ESS estimates the sampling efficiency for the location of the distribution (e.g. mean and median).
# 
# Tail-ESS computes the minimum of the effective sample sizes (ESS) of the 5% and 95% quantiles. 
###################################################

prior_summary(fitted.model)

# Plot the results -----

names(fitted.model)

# Convert the draws into a matrix
posterior <- as.matrix(fitted.model)

str(posterior)

# Rename the first parameter from intercept to 'mu'
dimnames(posterior)$parameters[1] <- "mu"

# Plot the trace and density
MCMCtrace(posterior, pdf=FALSE)



# Rerun with more iterations -----
# Choose features of MCMC --------
#   the number of chains
#   the number of iterations to burn-in (warmup)
#	  the total number of iterations 
n.chains = 1
n.warmup = 1000
n.iters.per.chain.after.warmup = 5000
n.iters.total.per.chain = n.iters.per.chain.after.warmup+n.warmup


# Run MCMC -----------


# Fit the model in stan ------
fitted.model <- stan_glm(
  x ~ 1,
  data=data.for.stan,
  prior_intercept = normal(75, 7.07, autoscale = FALSE),
  prior_aux = exponential(1, autoscale = FALSE),
  chains = n.chains,
  warmup = n.warmup,
  iter = n.iters.total.per.chain,
  diagnostic_file = "fitted.model.diagnostic.file.csv"
)


# Plot the results -----

# Convert the draws into a matrix
posterior <- as.matrix(fitted.model)

# Rename the first parameter from intercept to 'mu'
dimnames(posterior)$parameters[1] <- "mu"

# Plot the trace and density
MCMCtrace(posterior, pdf=FALSE)

PR <- rnorm(5000,75, 7.07)
MCMCtrace(posterior, pdf=F, params='mu', priors=PR)
MCMCtrace(posterior, pdf=F, params='mu', priors=PR, plot=F, PPO_out = TRUE)
MCMCtrace(posterior, pdf=F, params='sigma', priors=rexp(5000,1))

# Obtain the summary statistics ----

# Summary statistics
MCMCsummary(posterior, HPD=TRUE, Rhat=FALSE, n.eff=FALSE, round=2, func=median, func_name = "median")


#######
####### Finally
####### 

library(bayesplot) # optional
library(mcmcplots) # optional