# Dataset in wide format: example 
library(micsr)
dutch_railways <- as.data.frame(micsr.data::dutch_railways)
head(dutch_railways)

library(dfidx)
Tr <- dfidx(dutch_railways, shape = "wide", varying = 4:11, sep = "_",
            idx = list(c("choiceid", "id")), idnames = c(NA, "alt"),
            opposite = c("price", "time", "change", "comfort"))

head(Tr)

# Dataset in long format 
toronto_montreal <- as.data.frame(micsr.data::toronto_montreal)
head(toronto_montreal)

MC <- dfidx(toronto_montreal, subset = noalt == 4, 
            idx = c("case", "alt"), drop.index = FALSE)
head(MC)

# The error below is not a error 
library(Formula)
f <- Formula(choice ~ cost | income + urban | ivt)

library(mlogit)
MC <- dfidx(toronto_montreal, subset = noalt == 4, pkg = "mlogit")
class(MC)
f <- Formula(choice ~ cost | income  | ivt)
mf <- model.frame(MC, f)
class(mf)

head(model.matrix(mf), 4) # Model matrix

# Multinomial logit model 
MC <- dfidx(toronto_montreal, subset = noalt == 4)
ml.MC1 <- mlogit(choice ~ cost + freq + ovt | income | ivt, MC)
# Equivalently
ml.MC1b <- mlogit(choice ~ cost + freq + ovt | income | ivt, 
                  toronto_montreal, subset = noalt == 4, 
                  idx = c("case", "alt"))

# Transforming the variables and excluding bus 
MC$time <- MC$ivt + MC$ovt 
ml.MC1 <- mlogit(choice ~ cost + freq | income | time, MC, 
                 alt.subset = c("car", "train", "air"), reflevel = "car")


summary(ml.MC1)

# Fitted probabilities
head(fitted(ml.MC1, type = "outcome"))
head(fitted(ml.MC1, type = "probabilities"), 4)

# Log-likelihood
sum(log(fitted(ml.MC1, type = "outcome")))
logLik(ml.MC1)
apply(fitted(ml.MC1, type = "probabilities"), 2, mean)


# Prediction (for a reduction of 20% of train time)
NMC <- MC
NMC$time[NMC$idx$alt == "train"] <- 0.8 * NMC$time[NMC$idx$alt == "train"]
Oprob <- fitted(ml.MC1, type = "probabilities")
Nprob <- predict(ml.MC1, newdata = NMC)
rbind(old = apply(Oprob, 2, mean), new = apply(Nprob, 2, mean))

# Illustration of the IIA property
head(Nprob[, "air"] / Nprob[, "car"])
head(Oprob[, "air"] / Oprob[, "car"])

# Surplus computation
ivbefore <- logsum(ml.MC1)
ivafter <- logsum(ml.MC1, data = NMC)

surplus <- - (ivafter - ivbefore) / coef(ml.MC1)["cost"]
summary(surplus)

# Marginal effects
effects(ml.MC1, covariate = "income", type = "ar")

effects(ml.MC1, covariate = "cost", type = "rr")
coef(ml.MC1)[grep("time", names(coef(ml.MC1)))] /
  coef(ml.MC1)["cost"] * 60


#  Heteroschedasticity logit model
ml.MC <- mlogit(choice ~ freq + cost + ivt + ovt | 
                  urban + income, MC, reflevel = 'car', 
                alt.subset = c("car", "train", "air"))
hl.MC <- mlogit(choice ~ freq + cost + ivt + ovt | 
                  urban + income, MC, reflevel = 'car', 
                alt.subset = c("car", "train", "air"), 
                heterosc = TRUE)

hl.MC

lrtest(hl.MC, ml.MC)
waldtest(hl.MC, heterosc = FALSE) 

# Nested logit model 
telephone <- as.data.frame(micsr.data::telephone)
head(telephone)
ml_tel <- mlogit(choice ~ log(cost), telephone, 
                 idx = c("household", "service"))
nl_tel <- mlogit(choice ~ cost, telephone, 
                 idx = c("household", "service"), 
                 nests = list(measured = c("budget", "standard"), 
                              flat = c("local", "metro", "extended")))
coef(nl_tel)
nl_tel_u <- update(nl_tel, un.nest.el = TRUE)

scoretest(nl_tel_u, un.nest.el = FALSE) 
lrtest(nl_tel, nl_tel_u) 
waldtest(nl_tel, un.nest.el = TRUE) 

scoretest(ml_tel, 
          nests = list(measured = c("budget", "standard"), 
                       flat = c("local", "metro", "extended"))) 
lrtest(nl_tel_u, ml_tel) 
waldtest(nl_tel_u, nests = NULL)