## IV-PROBIT (Two-Step / Control Function) library(micsr) str(federiv) table(federiv$federiv)/nrow(federiv) form <- federiv ~ eqrat + optval + bonus + #suspected to be endogenous ltass + linsown + linstown + roe + mktbk + perfor + dealdum + div + year | ltass + linsown + linstown + roe + mktbk + perfor + dealdum + div + year + no_emp + no_subs + no_off + ceo_age + gap + cfa # External instruments bank_2st <- binomreg(form, data = federiv, link = "probit", method = "twosteps") summary(bank_2st) bank_2ml <- binomreg(form, data = federiv, link = "probit", method = "ml") summary(bank_2ml) endogtest(form, federiv) glm1 <- glm(federiv ~ eqrat + optval + bonus + #suspected to be endogenous ltass + linsown + linstown + roe + mktbk + perfor + dealdum + div + year, data = federiv, family = binomial("probit")) summary(glm1) help(glm) # Matching tuscany <- twa[twa$region == "Tuscany", ] table(tuscany$group) tuscany$perm <- ifelse(tuscany$outcome == "perm", 1, 0) cbind(control = mean(tuscany$perm[tuscany$group == "control"]), treated = mean(tuscany$perm[tuscany$group == "treated"])) tuscany$dist2 <- tuscany$dist^2 tuscany$livselfemp <- (tuscany$city == "livorno") * (tuscany$occup == "selfemp") ftusc <- perm + group ~ city + sex + marital + age + loc + children + educ + pvoto + training + empstat + occup + sector + wage + hour + feduc + femp + fbluecol + dist + dist2 + livselfemp ps <- pscore(ftusc, tuscany) ps$strata[, c(1,5,6,2,3)] ps tusc_tr <- ps$model[ps$model$group == "treated", c("id", "pscore")] colnames(tusc_tr) <- c("id_tr", "ps_tr") tusc_ctl <- ps$model[ps$model$group == "control", c("id", "pscore")] colnames(tusc_ctl) <- c("id_ctl", "ps_ctl") print(head(tusc_tr, 2)) # Merging the two tables via inequality joining tusc_tr_2 <- tusc_tr[1:2, ] result <- data.frame( id_tr = tusc_tr_2$id_tr, n = sapply(1:2, function(i) { sum(tusc_ctl$ps_ctl >= tusc_tr_2$ps_tr[i]) }) ) print(result) # Merging the two tables via rolling inequality id_sup <- ps_sup <- numeric(nrow(tusc_tr)) for (i in seq_len(nrow(tusc_tr))) { ps_treated <- tusc_tr$ps_tr[i] candidates <- tusc_ctl[tusc_ctl$ps_ctl >= ps_treated, ] if (nrow(candidates) == 0) { id_sup[i] <- NA ps_sup[i] <- NA } else { j <- which.min(candidates$ps_ctl) id_sup[i] <- candidates$id_ctl[j] ps_sup[i] <- candidates$ps_ctl[j] } } match_sup <- data.frame( id_tr = tusc_tr$id_tr, ps_tr = tusc_tr$ps_tr, id_sup = id_sup, ps_sup = ps_sup ) print(match_sup[1:3, ]) # The same for the closest lower propensity score control id_inf <- ps_inf <- numeric(nrow(tusc_tr)) for (i in seq_len(nrow(tusc_tr))) { ps_treated <- tusc_tr$ps_tr[i] candidates <- tusc_ctl[tusc_ctl$ps_ctl <= ps_treated, ] if (nrow(candidates) == 0) { id_inf[i] <- NA ps_inf[i] <- NA } else { j <- which.max(candidates$ps_ctl) id_inf[i] <- candidates$id_ctl[j] ps_inf[i] <- candidates$ps_ctl[j] } } match_inf <- data.frame( id_tr = tusc_tr$id_tr, ps_tr = tusc_tr$ps_tr, id_inf = id_inf, ps_inf = ps_inf ) print(match_inf[1:3, ]) # Merge the two tables m <- merge(match_sup, match_inf, by = "id_tr", all.x = TRUE) m$id_ctl <- ifelse( is.na(m$ps_inf) | (!is.na(m$ps_sup) & (m$ps_tr.y - m$ps_inf) >= (m$ps_sup - m$ps_tr.x)), m$id_sup, m$id_inf ) m$ps_ctl <- ifelse( m$id_ctl == m$id_inf, m$ps_inf, m$ps_sup ) match_nearest <- m[, c("id_tr", "id_ctl", "ps_tr.x", "ps_tr.y", "ps_ctl")] head(match_nearest, 3) length(unique(match_nearest$id_ctl)) tab <- table(match_nearest$id_ctl) tab_sorted <- sort(tab, decreasing = TRUE) head(tab_sorted, 2) match_nearest$diff <- match_nearest$ps_tr.x - match_nearest$ps_ctl match_nearest[order(-match_nearest$diff), ][1:3, ] match_caliper <- match_nearest[abs(match_nearest$ps_tr - match_nearest$ps_ctl) < 0.01, ] tmp <- match_nearest[, !(names(match_nearest) %in% c("ps_tr", "ps_ctl"))] id_tr <- tmp$id_tr id_ctl <- tmp$id_ctl group_tr <- rep("tr", length(id_tr)) group_ctl <- rep("ctl", length(id_ctl)) match_long <- data.frame( id = c(id_tr, id_ctl), gp = c(group_tr, group_ctl) ) match_smpl <- merge(match_long, tuscany[, c("id", "perm")], by = "id", all.x = TRUE) head(match_smpl, 2) means <- tapply(match_smpl$perm, match_smpl$gp, mean) # ATET atet <- means["tr"] - means["ctl"] c(treatment = means["tr"], control = means["ctl"], atet = atet) # Using the MatchIt library library(MatchIt) ftusc2 <- update(ftusc, group ~ .) mtch <- matchit(ftusc2, tuscany, replace = TRUE) summary(mtch)$nn matched_data <- match.data(mtch) head(matched_data, 2) mtch_cap <- matchit(ftusc2, tuscany, replace = TRUE, caliper = 0.01) matched_data <- match.data(mtch_cap) att <- with(matched_data, mean(perm[group == "treated"]) - mean(perm[group == "control"])) att plot(summary(mtch_cap))