# Bayesian Statistics: Laboratory 2 # 18/04/2024 ################# # A-R algorithm # ################# ############# # Example 1 # ############# # * Target: (standard) Gaussian # * Candidate: (standard) Cauchy help(optimise) c <- optimize(f = function(x){ dnorm(x) / dcauchy(x) }, maximum = TRUE, interval = c(-6,6))$objective c # Plot f(\theta) and cg(\theta) par(pty = "s") plot(NULL, NULL, xlim = c(-4,4), ylim = c(0,0.6), xlab = expression(theta), ylab = "") curve(dnorm(x), col = "forestgreen", add = TRUE) curve(c * dcauchy(x), col = "red4", add = TRUE) #Simulate from f via A-R algorithm using g as candidate # Fix the number of simulations Nsim <- 2500 ## Set a seed set.seed(123) ## Generate Nsim values from a U(0,c) u <- runif(Nsim, max = c) ## Generate Nsim values from a standard Cauchy th_star <- rcauchy(Nsim) ## Condition cond <- u <= dnorm(th_star)/dcauchy(th_star) ## Then accept the values satisfying the condition th <- th_star[cond] # Histogram of \theta, with overlapping density of N(0,1) par(mfrow=c(1,2), pty="s") hist(th, prob=TRUE, main = "", xlab=expression(theta)) curve(dnorm(x), col = "forestgreen", add=TRUE) #Plot of the pairs (\theta^*, Ug(\theta^*)) with accepted in green and rejected in red plot(NULL, NULL, xlim = c(-4, 4), ylim = c(0, 0.6), xlab = expression(theta), ylab = "") curve(dnorm(x), col = "forestgreen", add = TRUE) curve(c * dcauchy(x), col = "red4", add = TRUE) points(th_star, u * dcauchy(th_star), pch = 19, cex = .4, col = "firebrick3") points(th, u[cond] * dcauchy(th), pch = 19,cex = .4,col = "forestgreen") ## acceptance probability 1/c ## proportion of accepted draws sum(cond)/Nsim ############# # Example 2 # ############# # * Target: Gamma(4.3,6.2) # * Possible Candidate: Gamma(4,7) and Gamma(4,6) # Plot of ratio between f and g x <- seq(0.01, 100, by = 0.01) plot(x, dgamma(x, 4.3, 6.2) / dgamma(x,4,7), ylab = "", main = "Gamma(4,7)") plot(x, dgamma(x, 4.3, 6.2) / dgamma(x,4,6), ylab = "", main = "Gamma(4,6)") #Then: # Parameters of the target density a1 <- 4.3; b1 <- 6.2 # Parameters of the candidate density a2 <- 4; b2 <- 6 c <- optimize(f = function(x){ dgamma(x, a1, b1) / dgamma(x, a2, b2) }, maximum = T, interval = c(0, 10))$objective c par(mfrow=c(1,1)) plot(NULL, NULL, xlim = c(0,3), ylim = c(0,2), xlab = "", ylab = "") curve(dgamma(x, a1, b1), col = "darkgreen", add = TRUE) curve(c * dgamma(x, a2, b2), col = "red4", add = TRUE) Nsim <- 2500 set.seed(123) u <- runif(Nsim, max = c) th_star <- rgamma(Nsim, a2 ,b2) cond <- u <= dgamma(th_star, a1, b1) / dgamma(th_star, a2, b2) th <- th_star[cond] par(mfrow = c(1, 2), pty = "s") hist(th, prob=TRUE, main = "", xlab=expression(theta)) curve(dgamma(x, a1, b1), col = "forestgreen", add=TRUE) plot(NULL, NULL, xlim = c(0, 3), ylim = c(0, 2), xlab = expression(theta), ylab = "") curve(dgamma(x, a1, b1), col = "darkgreen", add = TRUE) curve(c * dgamma(x, a2, b2), col = "red4", add = TRUE) points(th_star, u * dgamma(th_star, a2, b2), pch = 19, cex = .4, col = "firebrick3") points(th, u[cond] * dgamma(th, a2, b2), pch = 19, cex = 0.4, col = "forestgreen") 1/c sum(cond)/Nsim ############ # Exercise # ############ # What if we consider as candidate density # * Exponential with rate equal to 1 # * Gamma(4, \tilde \beta) where \tilde \beta = 4\beta_1/4.3 # Compare the results with the previous ones? ################## # Markov Chain # ################## ############################################# # Code for reproducing the "node/edge" plot # ############################################# library('heemod') library('diagram') mat_dim <- define_transition( state_names = c('MI', 'TS', 'FI', 'BO'), 0, 1, 0, 0, 1/9, 4/9, 4/9, 0, 0, 4/9, 4/9, 1/9, 0, 0, 1, 0 ); curves <- matrix(nrow = 4, ncol = 4, 0.05) par(mfrow = c(1, 1), pty = "s") plot(mat_dim, curve=curves, self.shiftx = c(0, 0.1, -0.1, 0), self.shifty = c(0, -0.1, -0.1, 0), self.arrpos = c(0, -0.25, 2.1, 0)) ############################################# # Transition probability matrix Tmat <- matrix(c(0, 1, 0, 0, 1/9, 4/9, 4/9, 0, 0, 4/9, 4/9, 1/9, 0, 0, 1, 0), nrow = 4, ncol = 4, byrow = TRUE) colnames(Tmat) <- rownames(Tmat) <- c('MI', 'TS', 'FI', 'BO') Tmat # Initial vector of probabilities \pi^{(0)} pi0 <- c(0,1,0,0) # Probabilities to move to the 4 cities at time 1, that is \pi^{(1)}=\pi^{(0)}P pi1 <- pi0 %*% Tmat pi1 # At time 2: pi2 <- pi1 %*% Tmat pi2 # Extend the process to compute the vector of probabilities \pi^{(t)} pt <- function(t, pi0, Tmat) { pit <- pi0 for(i in 1:t){ pit <- pit %*% Tmat } return(pit) } # Vector probabilities until time 10 (in a matrix form) pi.1_10 <-sapply(1 : 10, function(.x) pt(.x, pi0, Tmat)) t(pi.1_10) rownames(pi.1_10) <- c('MI', 'TS', 'FI', 'B0') par(pty="s") barplot(t(pi.1_10), beside = T, ylab = "Probability") # Vector probabilities time 50 pi50 <- pt(50, pi0, Tmat) pi50 # Vector probabilities time 100 pi100 <- pt(100, pi0, Tmat) pi100 # Stationary distribution via eigendecomposition eigendec <- eigen(t(Tmat)) eigendec$vectors eigendec$value p.distr <- eigendec$vectors[, 1] / sum(eigendec$vectors[, 1]) p.distr ######################### ## Metropolis-Hastings ## ######################### ############# # Example 1 # ############# # t-student(4) - N(0,1) # Compute the mean of a Student's t distribution with 4 degrees of freedom # using a Metropolis-Hastings algorithm with candidate density N(0,1). # Then, monitor the convergence across iterations and compute the acceptance rate. plot(NULL, NULL, xlab = expression(theta), ylab = "density", ylim = c(0, .45), xlim = c(-5, 5)) curve(dt(x, 4), col = "gray30", add = TRUE) curve(dnorm(x), col = "firebrick3", add = TRUE) legend(3, .4, lty = c(1, 1), col = c("gray30", "firebrick3"), legend = c("target", "proposal"), cex = .8) # Number of samples; Nsim <- 10^4 # d.f. target distribution; df_f <- 4 # Fix a seed set.seed(123) # vector of \theta values; th <- rep(0, Nsim) # a starting value th[1] <- rnorm(1) acc.rate <- 0 # a counter for the acceptance rate for(s in 2 : Nsim){ th_star <- rnorm(1)# proposal # numerator and denominator of R num <- dt(th_star, df_f) * dnorm(th[s-1]) den <- dt(th[s-1], df_f) * dnorm(th_star) rho <- min(1, num / den) # Acceptance probability # Decide whether accept the proposal (and update the counter) if ( runif(1) < rho) { th[s] <- th_star acc.rate <- acc.rate + 1 } else { th[s] <- th[s - 1] } } # Mean of a $t$-student distribution with $\nu = 4$ by using the MH algorithm is simply given by mean(th) # Acceptance rate acc.rate/Nsim # Graphical diagnostics par(mfrow=c(2,2)) ##traceplot plot(th, type = "l", ylab = "Trace", xlab = "Iteration") ##histogram hist(th, breaks = 100, border = "gray40",freq = F, main = "") curve(dt(x, df_f), col = "gray30", add = TRUE) abline(v = mean(th), col = "firebrick3", lty = 2) ##cumulative mean plot(cumsum(th)/ (1 : Nsim), type = "l", ylab = "Cumulative mean plot", xlab = "Iteration") abline(h = mean(th), col = "firebrick3", lty = 2) ##autocorrelation acf(th, main = "", ylab = "Autocorrelation") ## traceplot, histogram and autocorrelation ## after burn-in and thinning post_sample <- th[seq(100, Nsim, by = 10)] ##traceplot par(mfrow = c(1, 3), pty = "s") plot(post_sample, type = "l", ylab = "Trace", xlab = "Iteration") ##histogram hist(post_sample, breaks = 100, border = "gray40", freq = F, main = "") curve(dt(x, df_f), col = "gray30", add = TRUE) abline(v = mean(post_sample), col = "firebrick3", lty = 2) ##autocorrelation acf(post_sample, main = "",ylab = "Autocorrelation") ############# # Example 2 # ############# # t-student(4) - t-student(2) plot(NULL, NULL, xlab = expression(theta), ylab = "density", ylim = c(0, .45), xlim = c(-5, 5)) curve(dt(x, 4), col = "gray30", add = TRUE) curve(dt(x, 2), col = "firebrick3" , add = TRUE) legend(3, .4, lty = c(1, 1), col = c("gray30", "firebrick3"), legend = c("target", "proposal"), cex = .8) set.seed(123) df_g <- 2 th <- rep(0, Nsim); th[1] <- rnorm(1) acc.rate <- 0 for(s in 2 : Nsim){ th_star <- rt(1, df_g) num <- dt(th_star, df_f) * dt(th[s - 1], df_g) den <- dt(th[s - 1], df_f) * dt(th_star, df_g) rho <- min(1, num / den) if(runif(1) < rho){ th[s] <- th_star acc.rate <- acc.rate + 1 } else { th[s] <- th[s - 1] } } mean(th) acc.rate/Nsim par(mfrow=c(2,2)) plot(th, type = "l", ylab = "Trace", xlab = "Iteration") ##histogram hist(th, breaks = 100, border = "gray40",freq = F, main = "") curve(dt(x, df_f), col = "gray30", add = TRUE) abline(v = mean(th), col = "firebrick3", lty = 2) ##cumulative mean plot(cumsum(th)/ (1 : Nsim), type = "l", ylab = "Cumulative mean plot", xlab = "Iteration") abline(h = mean(th), col = "firebrick3", lty = 2) ##autocorrelation acf(th, main = "", ylab = "Autocorrelation")