Consider the probit model defined as
\[\begin{align*} y_i &\sim Bernoulli(p_i)\\ p_i &= \Phi(x^T_i\beta) \end{align*}\]
where \(y_i \in {0,1}\) is the binary response variable, \(x_i \in \mathbb{R}^{q+1}\) is the vector of covariates and \(\Phi\) is the cdf of the standard normal distribution. Our aim is to estimate the vector of coefficients \(\beta\).
library(gclus)
data(bank)
bank <- as.matrix(bank)
y <- bank[,1]
X <- cbind(1,bank[,2:5])
q <- dim(X)[2]
n <- dim(X)[1]
post <- function(beta, y=y, X=X){
pi <- pnorm(X%*%matrix(beta,ncol=1))
return(prod(pi^y * (1-pi)^(1-y)))
}
Implement the following random walk Metropolis-Hastings algorithm:
require(mvtnorm)
mh <- function(Nsim, tau,y,X){
beta <- matrix(0, nrow=Nsim, ncol=ncol(X))
sigma.asymp <- summary(glm(y~X,
family=binomial(link="probit")))$cov.unscaled
beta[1,] <- summary(glm(y~X,
family=binomial(link="probit")))$coefficients[,1]
for(s in 2:Nsim){
proposal <- rmvnorm(1, beta[s-1,], tau^2*sigma.asymp)
rho <- min(1, post(proposal, y, X) /post(beta[s-1,], y, X))
if(runif(1)<rho){
beta[s,] <- proposal
}else{
beta[s,] <- beta[s-1,]
}
}
return(beta)
}
b1 <- mh(1e4, .1,y,X)
b2 <- mh(1e4, 1,y,X)
b3 <- mh(1e4, 4,y,X)
b1[1:6,2]
## [1] -0.8074611 -0.8163652 -0.7508277 -0.6869436 -0.6887356 -0.7629664
b2[1:6,2]
## [1] -0.8074611 -0.8074611 -1.3995869 -1.3995869 -1.3995869 -1.3995869
b3[1:6,2]
## [1] -0.8074611 -0.8074611 -0.8074611 -0.8074611 -0.8074611 -0.8074611
par(mfcol=c(3,3))
##beta2
plot(b1[,2], type="l", ylab="Trace", xlab="Iteration")
plot(b2[,2], type="l", ylab="Trace", xlab="Iteration")
plot(b3[,2], type="l", ylab="Trace", xlab="Iteration")
acf(b1[,2], main="")
acf(b2[,2], main="")
acf(b3[,2], main="")
hist(b1[1e3:1e4,2], breaks=50, border="gray40",freq=F, main="")
abline(v=mean(b1[1e3:1e4,2]), col="firebrick3", lty=2)
hist(b2[1e3:1e4,2], breaks=50, border="gray40",freq=F, main="")
abline(v=mean(b2[1e3:1e4,2]), col="firebrick3", lty=2)
hist(b3[1e3:1e4,2], breaks=50, border="gray40",freq=F, main="")
abline(v=mean(b3[1e3:1e4,2]), col="firebrick3", lty=2)
post_sample <- b2[seq(1e3,1e4,by=10),]
par(mfrow=c(2,2))
plot(post_sample[,2], type="l", ylab="Trace", xlab="Iteration")
acf(post_sample[,2], main="")
hist(post_sample[,2], border="gray40",freq=F, main="")
abline(v=mean(b2[1e3:1e4,2]), col="firebrick3", lty=2)
round(colMeans(post_sample),2)
## [1] -117.92 -0.82 1.07 1.12 1.13
round(summary(glm(y~X,family=binomial(link="probit")))$coefficients[,1],2)
## (Intercept) XLength XLeft XRight XBottom
## -113.11 -0.81 1.06 1.06 1.11
round(apply(post_sample, 2, var),2)
## [1] 8113.98 0.14 0.39 0.27 0.03
round(diag(summary(glm(y~X,family=binomial(link="probit")))$cov.unscaled),2)
## (Intercept) XLength XLeft XRight XBottom
## 6956.75 0.15 0.36 0.29 0.03
The same model can be written using the definition of an auxiliary latent variable: \[\begin{align*} y_i &= \mathbb{1}_{z_i> 0}\\ z_i &\sim N(x_i \beta,1) \end{align*}\] Prove that the two model specifications are equivalent.
Given that the full conditional distributions for \(z\) and \(\beta\) are \[ z_i| \beta, x_i, y_i \sim \begin{cases} N_+(x_i\beta,1) \quad \text{if } y_i=1\\ N_-(x_i\beta,1) \quad \text{if } y_i=0 \end{cases}\] where \(N_+\) and \(N_-\) represent the left and right truncated normal distribution on 0, and \[\beta|X,y,z \sim N_{q+1}((X^T X)^{-1}X^Tz, (X^TX)^{-1}) \]
library(truncnorm)
gs <- function(Nsim,y,X){
beta <- matrix(0,nrow=Nsim,ncol=ncol(X))
z <- c()
beta[1,]=summary(glm(y~X,family=binomial(link="probit")))$coefficients[,1]
S <- solve(t(X)%*%X)
SX <- S%*%t(X)
for(i in 2:Nsim){
mu <- X%*%beta[i-1,]
z[y==1] <- rtruncnorm(sum(y),a=0,b=Inf,mean=mu[y==1],sd=1)
z[y==0] <- rtruncnorm(n-sum(y),a=-Inf,b=0,mean=mu[y==0],sd=1)
beta[i,] <- rmvnorm(1, SX%*%z,S)
}
return(beta)
}
bgs <- gs(1e4,y,X)
par(mfrow=c(2,2))
plot(bgs[,2], type="l", ylab="Trace", xlab="Iteration")
acf(bgs[,2], main="")
hist(bgs[,2], breaks=50, border="gray40",freq=F, main="")
abline(v=mean(bgs[1e3:1e4,2]), col="firebrick3", lty=2)
post_sample <- bgs[seq(1e3,1e4,by=10),]
par(mfrow=c(2,2))
plot(post_sample[,2], type="l", ylab="Trace", xlab="Iteration")
acf(post_sample[,2], main="")
hist(post_sample[,2], breaks=50, border="gray40",freq=F, main="")
abline(v=mean(post_sample[,2]), col="firebrick3", lty=2)
round(colMeans(post_sample),2)
## [1] -120.85 -0.82 1.08 1.13 1.15
round(summary(glm(y~X,family=binomial(link="probit")))$coefficients[,1],2)
## (Intercept) XLength XLeft XRight XBottom
## -113.11 -0.81 1.06 1.06 1.11
round(apply(post_sample, 2, var),2)
## [1] 7208.32 0.14 0.43 0.35 0.03
round(diag(summary(glm(y~X,family=binomial(link="probit")))$cov.unscaled),2)
## (Intercept) XLength XLeft XRight XBottom
## 6956.75 0.15 0.36 0.29 0.03
system.time(mh(Nsim = 10000,tau = 1,y = y,X = X))
## user system elapsed
## 3.274 0.009 3.287
system.time(gs(Nsim = 10000,y = y,X = X))
## user system elapsed
## 3.257 0.021 3.282