# Binary models 
library(micsr)
data(mode_choice)
mode_choice$ivtime <- mode_choice$ivtime/60
mode_choice$ovtime <- mode_choice$ovtime/60
mode_choice$cost <- mode_choice$cost/100
mode_choice$cost <- mode_choice$cost * 8.42
mode_choice$gcost <- (mode_choice$ivtime + mode_choice$ovtime) * 8 + mode_choice$cost

# linear probability model
linprob <- lm(mode ~ gcost, mode_choice)

#probit and logit models 
probit <- glm(mode ~ gcost, mode_choice, family = binomial(link = "probit"))
logit <- glm(mode ~ gcost, mode_choice, family = binomial(link = "logit"))

# Coefficients of gcost
round(summary(linprob)$coefficients[2,],2)
round(summary(probit)$coefficients[2,],2)
round(summary(logit)$coefficients[2,],2)

# Differencese between coefficints also multiplying by 0.4 (probit) and 0.25 (logit)
c(coef(linprob)[2], coef(probit)[2] * 0.4,  coef(logit)[2] * 0.25)

# evaluation at the mean prodictor for the comparison task
xmean <- mean(mode_choice$gcost)
lprobit <- coef(probit)[1] + coef(probit)[2] * xmean
llogit <- coef(logit)[1] + coef(logit)[2] * xmean

round(dnorm(lprobit) * coef(probit)[2], 3)
round(dlogis(llogit) * coef(logit)[2], 3)
coef(linprob)[2]

# Plot the fitted curves
xlim <- c(-30,30)
set.seed(123)
y_jitter <- jitter(as.numeric(mode_choice$mode), amount = 0.02)

plot(mode_choice$gcost, y_jitter, xlim = xlim, pch = 16,
     cex = 0.2, xlab = "gcost", ylab = "mode")

curve(coef(linprob)[1] + coef(linprob)[2] * x, add = TRUE, lty = 1 )
curve(pnorm(coef(probit)[1] + coef(probit)[2] * x), add = TRUE, lty = 2)
curve(plogis(coef(logit)[1] + coef(logit)[2] * x), add = TRUE, lty = 3)

legend("topleft", legend = c("linear", "probit", "logit"),
       lty = c(1, 2, 3), bty = "n")


# Airbnb example 
# For those having problems with the micsr.data call, 
# you can load the dataset via .csv or .RData; 
# see below for the two alternative ways
#
# airbnb <- read.csv("airbnb.csv")
# load("airbnb.RData")

airbnb <- as.data.frame(micsr.data::airbnb)
str(airbnb)
mean(as.numeric(airbnb$acceptance) - 1)
table(airbnb$acceptance)/nrow(airbnb)

apply(is.na(airbnb), 2, sum)
airbnb <- na.omit(airbnb)
airbnb$acceptance <- ifelse(airbnb$acceptance == "no", 0, 1)
str(airbnb)


fit_lprob <- lm(acceptance ~ guest_race + I(log(price)) + city, data = airbnb)
fit_logit <- glm(acceptance ~ guest_race + I(log(price)) + city, 
                 data = airbnb, family = binomial("logit"))
fit_probit <- glm(acceptance ~ guest_race + I(log(price)) + city, 
                  
                  data = airbnb, family = binomial("probit"))

round(cbind(coef(fit_lprob), coef(fit_logit) * 0.25, 
            coef(fit_probit)* 0.4, coef(fit_logit)/coef(fit_probit)),3)

# Coefficients comparison
round(rbind(coef(fit_logit),  coef(fit_probit)),3)

# Comparison between the differences in terms of probability
pnorm(coef(fit_probit)[1] + coef(fit_probit)[2] + coef(fit_probit)[3] * log(182)) - 
  pnorm(coef(fit_probit)[1] + coef(fit_probit)[3] * log(182))

plogis(coef(fit_logit)[1] + coef(fit_logit)[2] + coef(fit_logit)[3] * log(182)) - 
  plogis(coef(fit_logit)[1] + coef(fit_logit)[3] * log(182))

# Explore the glm function 
summary(logit)

#Deviance
logit$deviance
logit$df.residual
# Equivalently 
deviance(logit)
df.residual(logit)

# Null deviance

logit$null.deviance
logit$df.null

summary(glm(mode ~ 1, mode_choice, family = "binomial"))

# Residuals
head(resid(logit, type = "response"))
head(resid(logit, type = "pearson"))
head(resid(logit, type = "deviance"))

# Predict
head(predict(logit, type = "link"))
head(predict(logit, type = "response"))

head(predict(logit, type = "link", newdata = data.frame(gcost = 5)))
head(predict(logit, type = "response", newdata = data.frame(gcost = 5)))


# Estimation 
y <- mode_choice$mode
X <- model.matrix(~gcost, data = mode_choice)

loglik_logit <- function(beta, X, y) {
  eta <- X %*% beta
  p <- exp(eta) / (1 + exp(eta))
  sum(y * log(p) + (1 - y) * log(1 - p))
}

beta_start <- rep(0, ncol(X))

opt <- optim(
  par = beta_start,
  fn  = function(b) -loglik_logit(b, X, y),
  hessian = TRUE,
  method = "BFGS"
)

beta_hat <- opt$par
cbind(beta_hat, logit$coefficients)

# Standard errors
# Via numerical Hessian
H_obs <- opt$hessian
vcov_obs <- solve(H_obs)
se_obs <- sqrt(diag(vcov_obs))

cbind(se_obs, summary(logit)$coefficients[,2])

# Via Fisher Information matrix
p_hat <- 1 / (1 + exp(-X %*% beta_hat))
W <- diag(as.vector(p_hat * (1 - p_hat)))

Fisher <- t(X) %*% W %*% X
vcov_fisher <- solve(Fisher)
se_fisher <- sqrt(diag(vcov_fisher))


cbind(se_fisher, summary(logit)$coefficients[,2])


cbind(logLik(logit), loglik_logit(beta_hat, X, y))


# Model comparisons
logit2 <- glm(mode ~ cost + ivtime + ovtime, data = mode_choice, family = binomial("logit"))
probit2 <- glm(mode ~ cost + ivtime + ovtime, data = mode_choice, family = binomial("probit"))

cbind(summary(logit)$coefficients[,1:2], summary(probit)$coefficients[,1:2])

cbind(summary(logit2)$coefficients[,1:2], summary(probit2)$coefficients[,1:2])
cbind(-2*loglik_logit(beta_hat, X, y), logit$deviance)

cbind(- 2*loglik_logit(beta_hat, X, y) + 4, AIC(logit))


cbind(logLik(logit), logLik(logit2), logLik(probit), logLik(probit2))

cbind(AIC(logit), AIC(logit2), AIC(probit), AIC(probit2))

cbind(BIC(logit), BIC(logit2), BIC(probit), BIC(probit2))

cbind(logit$deviance, logit2$deviance, probit$deviance, probit2$deviance)

cbind(logit$null.deviance, logit2$null.deviance, probit$null.deviance, probit2$null.deviance)


# Testing nested models
library(lmtest)
lrtest(probit, probit2)
car::linearHypothesis(probit2, 
                      c("ivtime = 8 * cost", "ovtime = 8 * cost"))

# Conditional moment test
cmtest(probit2, test = "normality") 
cmtest(probit2, test = "heterosc")
cmtest(probit2, test = "reset")