# Lecture 9 # IV regression library(micsr) paces <- as.data.frame(micsr.data::paces) # For those having problems with micsr.data, # you can read the files via .csv or .RData # (one of the two commands below) # # load(file = "paces.RData") # paces <- read.csv(file = "paces.csv") ols <- lm(educyrs ~ privsch, data = paces) summary(ols)$coefficients intTreat <- lm(privsch ~ voucher, data = paces) summary(intTreat)$coefficients reduced <- lm(educyrs ~ voucher, data = paces) summary(reduced)$coefficients coef(reduced)[2]/coef(intTreat)[2] iv1 <- lm(educyrs ~ predict(intTreat), data = paces) iv1$coefficients[2] library(ivreg) iv2 <- ivreg(educyrs ~ privsch | voucher, data = paces) iv2$coefficients[2] round(summary(iv2)$diagnostics, 2) ols <- lm(educyrs ~ privsch + pilot + housvisit + smpl + phone + age + sex + strata + month, data = paces) iv <- ivreg(educyrs ~ privsch + pilot + housvisit + smpl + phone + age + sex + strata + month | voucher + pilot + housvisit + smpl + phone + age + sex + strata + month, data = paces) rbind(summary(ols)$coefficients[2,], summary(iv)$coefficients[2,]) summary(iv)$diagnostics # Difference-in-differences # Example 1 car_thefts <- as.data.frame(micsr.data::car_thefts) # For those having problems with micsr.data, # you can read the files via .csv or .RData # (one of the two commands below) # # load(file = "car_thefts.RData") # car_thefts <- read.csv(file = "car_thefts.csv") sum_thefts <- aggregate(thefts ~ block + period, data = car_thefts, sum) sum_days <- aggregate(days ~ block + period, data = car_thefts, sum) two_obs <- merge(sum_thefts, sum_days, by = c("block", "period")) two_obs$thefts <- two_obs$thefts / two_obs$days * 30.5 mean(two_obs$thefts) block_distance <- unique(car_thefts[, c("block", "distance")]) two_obs <- merge(two_obs, block_distance, by = "block", all.x = TRUE) two_obs$distance <- ifelse(two_obs$distance == "same", 1, 0) mod <- lm(thefts ~ period * distance, data = two_obs) summary(mod)$coefficients before <- two_obs[two_obs$period == "before", c("block", "distance", "thefts")] after <- two_obs[two_obs$period == "after", c("block", "distance", "thefts")] names(before)[names(before) == "thefts"] <- "before" names(after)[names(after) == "thefts"] <- "after" diffs <- merge(before, after, by = c("block", "distance")) diffs$dt <- diffs$after - diffs$before mean(diffs$dt[diffs$distance == 1]) - mean(diffs$dt[diffs$distance == 0]) t.test(dt ~ factor(distance), diffs, var.equal = TRUE) # Difference-in-differences # Example 2 # For those having problems with micsr.data, # you can read the files via .csv or .RData # (one of the two commands below) # # load(file = "napster.RData") # napster <- read.csv(file = "napster.csv") napster <- as.data.frame(micsr.data::napster) napster <- napster[, c("date", "expmusic", "internet", "weight")] cutoff <- as.Date("1999-06-01") napster$period <- ifelse(as.Date(napster$date) < cutoff, "before", "after") napster$period <- factor(napster$period, levels = c("before", "after")) fit <- lm(expmusic ~ period * internet, napster, weight = weight) summary(fit)$coefficients