######################################
# Lab 3 - Part 2: Bayesian Inference #
######################################
# Save the stan files on a folder
# Set the path of the folder with stan files
setwd("")

###################################################
# Normal case with sigma2 known - Conjugate Prior #
###################################################
#Input values
#True mean
theta_sample <- 2
#Likelihood variance
sigma2 <- 2
#Sample size
n <- 10
#Prior mean
mu <- 7
#Prior variance
tau2 <- 2

#Data generation
set.seed(123)
y <- rnorm(n,theta_sample, sqrt(sigma2))

#Posterior mean
mu_star <- ((1/tau2)*mu+(n/sigma2)*mean(y))/( (1/tau2)+(n/sigma2))
mu_star
#Posterior standard deviation
sd_star <- sqrt(1/( (1/tau2)+(n/sigma2)))
sd_star

# Plot of likelihood, prior and posterior distribution 
curve(dnorm(x, theta_sample, sqrt(sigma2/n)),xlim=c(-4,15), lty=2, lwd=1, col="black", ylim=c(0,1.4), 
ylab="density", xlab=expression(theta))

curve(dnorm(x, mu, sqrt(tau2) ), xlim=c(-4,15), col="red", lty=1,lwd=2,  add =T)
curve(dnorm(x, mu_star, sd_star), 
  xlab=expression(theta), ylab="", col="blue", lwd=2, add=T)  
legend(8.5, 0.7, c("Prior", "Likelihood", "Posterior"), 
  c("red", "black", "blue", "grey" ), lty=c(1,2,1),lwd=c(1,1,2), cex=1)

library(rstan) 
# Launch the Stan Model: see the code normal.stan
# It is composed by:
# -) Data block
# -) Parameter block
# -) Model block

#launch Stan model:
# Data specification
data<- list(N=n, y=y, sigma =sqrt(sigma2), mu = mu, tau = sqrt(tau2))

# Fit a Stan model
fit <- stan(file="normal.stan", data = data, chains = 4, iter=2000)

#extract Stan output
sim <- extract(fit)

#draw the true analytical posterior and the Stan simulated posterior

par(mfrow=c(1,2), pty ="m", oma=c(0,0,0,0))
curve(dnorm(x, theta_sample, sqrt(sigma2/n)),xlim=c(-4,15), lty=2, lwd=1, col="black", ylim=c(0,1.2), 
  ylab="density", xlab=expression(theta), cex.lab=2)

curve(dnorm(x, mu, sqrt(tau2) ), xlim=c(-4,15), col="red", lty=1,lwd=2,  add =T)
curve(dnorm(x, mu_star, sd_star), 
  xlab=expression(theta), ylab="", col="blue", lwd=2,
  cex.lab=2, add=T)  

legend(5, 1, c("Prior", "Likelihood", "True Posterior"), 
  c("red", "black", "blue", "blue" ), lty=c(1,2,1),lwd=c(1,1,2), cex=0.8)


curve(dnorm(x, theta_sample, sqrt(sigma2/n)),xlim=c(-4,15), lty=2, lwd=1, col="black", ylim=c(0,1.2), 
  ylab="density", xlab=expression(theta), cex.lab=2)
curve(dnorm(x, mu, sqrt(tau2) ), xlim=c(-4,15), col="red", lty=1,lwd=2,  add =T)
lines(density(sim$theta, adj=2), col ="blue", lwd=2, lty =1)
legend(5, 1, c("Prior", "Likelihood", "Stan Posterior"),
  c("red", "black", "blue", "blue" ), lty=c(1,2,1),lwd=c(1,1,2), cex=0.8)


#######################################################
# Normal case with sigma2 known - Non-Conjugate Prior #
#######################################################

#launch Stan model with the uniform prior in place of normal prior

data2<- list(N=n, y=y, sigma =sqrt(sigma2), a=-10, b=10)
fit2 <- stan(file="normal_uniform.stan", data = data2, chains = 4, iter=2000,
  refresh=-1)

#extract the Stan output
sim2 <- extract(fit2)

#plot the Stan posterior

curve(dnorm(x, theta_sample, sqrt(sigma2/n)),xlim=c(-12,12), lty=2, lwd=1, col="black", ylim=c(0,1.2), 
  ylab="density", xlab=expression(theta))
curve(dunif(x, -10,10  ), xlim=c(-12,12), col="red", lty=1,lwd=2,  add =T)
lines(density(sim2$theta, adj=2), col ="blue", lwd=2, lty =1)
legend(5, 1, c("Prior", "Likelihood", "Stan Posterior"), 
  c("red", "black", "blue", "blue" ), lty=c(1,2,1),lwd=c(1,1,2), cex=0.8)

#traceplot
traceplot(fit2, pars ="theta")

# posterior mean
theta_est <- mean(sim2$theta)

library("bayesplot")
library("rstanarm")
library("ggplot2")

#MCMC areas
posterior <- as.matrix(fit2)

plot_title <- ggtitle("Posterior distributions",
                      "with medians and 80% intervals")

mcmc_areas(posterior, 
           pars = "theta", 
           prob = 0.8) + plot_title

print(fit2)

##############################################
# Normal case with mean and variance unknown #
##############################################
#launch biparametric Stan model

data3<- list(N=n, y=y, a=-10, b=10)
fit3 <- stan(file="biparametric.stan", data = data3, chains = 4, iter=2000,
  refresh=-1)

#extract stan output for biparametric model

sim3 <- extract(fit3)
posterior_biv <- as.matrix(fit3)

theta_est <- mean(sim3$theta)
sigma_est <- mean(sim3$sigma)
c(theta_est, sigma_est)
traceplot(fit3, pars=c("theta", "sigma"))

plot_title <- ggtitle("Posterior distributions",
                      "with medians and 80% intervals")

mcmc_areas(posterior_biv, 
           pars = c("theta","sigma"), 
           prob = 0.8) + plot_title

###########################
# Models for soccer goals #
###########################
library(LearnBayes)
data(soccergoals)

y <- soccergoals$goals

#write the likelihood function via the gamma distribution


lik_pois<- function(data, theta){
  n <- length(data)
  lambda <- exp(theta)
  dgamma(lambda, shape =sum(data)+1, scale=1/n)
}
# Write the prior (gamma)
 prior_gamma <- function(par, theta){
  lambda <- exp(theta)
  dgamma(lambda, par[1], rate=par[2])*lambda  
}

 # Write the prior (Normal)
 prior_norm <- function(npar, theta){
 dnorm(theta, npar[1], npar[2])
  
}

 # Vectorize the likelihood and the priors
lik_pois_v <- Vectorize(lik_pois, "theta")
prior_gamma_v <- Vectorize(prior_gamma, "theta")
prior_norm_v <- Vectorize(prior_norm, "theta")


#Plot likelihood
 curve(lik_pois_v(theta=x, data=y), xlim=c(-1,4), xlab=expression(theta), ylab = "density", lwd =2 )
 
#prior 1 
 curve(prior_gamma_v(theta=x, par=c(4.57, 1.43)), lty =2, col="red", add = TRUE, lwd =2)

 # prior 2  
 curve(prior_norm_v(theta=x, npar=c(1, .5)), lty =3, col="blue", add =TRUE, lwd=2)

#prior 3  
 curve(prior_norm_v(theta=x, npar=c(2, .5)), lty =4, col="green", add =TRUE, lwd =2)
 
#prior 4 
  curve(prior_norm_v(theta=x, npar=c(1, 2)), lty =5, col="violet", add =TRUE, lwd =2)
  legend(2.6, 1.8, c("Lik.", "Ga(4.57,1.43)", "N(1, 0.25)", "N(2,0.25)","N(1, 4)" ),
  lty=c(1,2,3,4,5), col=c("black", "red", "blue", "green", "violet"),lwd=2, cex=0.9)
# Poisson-Gamma
logpoissongamma <- function(theta, datapar){
   data <- datapar$data
   par <- datapar$par
   log_lik <- log(lik_pois(data, theta))
   log_prior <- log(prior_gamma(par, theta))
   return(log_lik+log_prior)
}


logpoissongamma.v <- Vectorize( logpoissongamma, "theta")


#Poisson-Normal
logpoissonnormal <- function( theta, datapar){
 data <- datapar$data
 npar <- datapar$par
 log_lik <- log(lik_pois(data, theta))
 log_prior <- log(prior_norm(npar, theta))
  return(log_lik+log_prior)
}  
logpoissonnormal.v <- Vectorize( logpoissonnormal, "theta")

# Plot:
#log-likelihood

curve(log(lik_pois(y, theta=x)), xlim=c(-1,4),ylim=c(-20,2), lty =1,
   ylab="log-posteriors", xlab=expression(theta))
#log posterior 1
curve(logpoissongamma.v(theta=x, list(data=y, par=c(4.57, 1.43))), col="red", xlim=c(-1,4),ylim=c(-20,2), lty =1, add =TRUE)
#log posterior 2
 curve(logpoissonnormal.v( theta=x, datapar <- list(data=y, par=c(1, .5))), lty =1, col="blue",  add =TRUE)
#log posterior 3
 curve(logpoissonnormal.v( theta=x, datapar <- list(data=y, par=c(2, .5))), lty =1, col="green", add =TRUE, lwd =2)
#log posterior 4
  curve(logpoissonnormal.v( theta=x, list(data=y, par=c(1, 2))), lty =1, col="violet", add =TRUE, lwd =2)
 legend(2.6, 1.3, c( "loglik", "lpost 1", "lpost 2", "lpost 3", "lpost 4" ),
  lty=1, col=c("black", "red", "blue", "green", "violet"),lwd=2, cex=0.9)

#posterior modes, posterior standard deviations and the log-marginal likelihoods by using laplace function
datapar <- list(data=y, par=c(4.57, 1.43))
fit1 <- laplace(logpoissongamma, .5, datapar)
datapar <- list(data=y, par=c(1, .5))
fit2 <- laplace(logpoissonnormal, .5, datapar)
datapar <- list(data=y, par=c(2, .5))
fit3 <- laplace(logpoissonnormal, .5, datapar)
datapar <- list(data=y, par=c(1, 2))
fit4 <- laplace(logpoissonnormal, .5, datapar)

postmode <- c(fit1$mode, fit2$mode, fit3$mode, fit4$mode )
postsds <- sqrt(c(fit1$var, fit2$var, fit3$var, fit4$var))
logmarg <- c(fit1$int, fit2$int, fit3$int, fit4$int)
cbind(postmode, postsds, logmarg)

# bayes factors
BF_matrix <- matrix(1, 4,4)
for (i in 1:3){
  for (j in 2:4){
   BF_matrix[i,j]<- exp(logmarg[i]-logmarg[j])
   BF_matrix[j,i]=(1/BF_matrix[i,j]) 
  }
}

round_bf <- round(BF_matrix,3)
round_bf