# Lecture 9

# IV regression
library(micsr)
paces <- as.data.frame(micsr.data::paces)

# For those having problems with micsr.data, 
# you can read the files via .csv or .RData  
# (one of the two commands below)
# 
# load(file = "paces.RData")
# paces <- read.csv(file = "paces.csv")

ols <- lm(educyrs ~  privsch, data = paces) 
summary(ols)$coefficients

intTreat <- lm(privsch ~ voucher, data = paces)
summary(intTreat)$coefficients 

reduced <- lm(educyrs ~ voucher, data = paces)
summary(reduced)$coefficients 

coef(reduced)[2]/coef(intTreat)[2] 

iv1 <- lm(educyrs ~  predict(intTreat), data = paces) 
iv1$coefficients[2] 

library(ivreg)
iv2 <- ivreg(educyrs ~  privsch | voucher, data = paces) 
iv2$coefficients[2] 

round(summary(iv2)$diagnostics, 2)



ols <- lm(educyrs ~ privsch + pilot + housvisit + smpl +
            phone + age + sex + strata + month, data = paces)
iv <- ivreg(educyrs ~ privsch + pilot + housvisit + smpl +
              phone + age + sex + strata + month | 
              voucher + pilot + housvisit + smpl +
              phone + age + sex + strata + month, data = paces)
rbind(summary(ols)$coefficients[2,], summary(iv)$coefficients[2,])

summary(iv)$diagnostics

# Difference-in-differences
# Example 1
car_thefts <- as.data.frame(micsr.data::car_thefts)

# For those having problems with micsr.data, 
# you can read the files via .csv or .RData  
# (one of the two commands below)
# 
# load(file = "car_thefts.RData")
# car_thefts <- read.csv(file = "car_thefts.csv")




sum_thefts <- aggregate(thefts ~ block + period, data = car_thefts, sum)
sum_days <- aggregate(days ~ block + period, data = car_thefts, sum)
two_obs <- merge(sum_thefts, sum_days, by = c("block", "period"))
two_obs$thefts <- two_obs$thefts / two_obs$days * 30.5
mean(two_obs$thefts)


block_distance <- unique(car_thefts[, c("block", "distance")])
two_obs <- merge(two_obs, block_distance, by = "block", all.x = TRUE)


two_obs$distance <- ifelse(two_obs$distance == "same", 1, 0)
mod <- lm(thefts ~ period * distance, data = two_obs)
summary(mod)$coefficients

before <- two_obs[two_obs$period == "before",
                  c("block", "distance", "thefts")]
after  <- two_obs[two_obs$period == "after",
                  c("block", "distance", "thefts")]

names(before)[names(before) == "thefts"] <- "before"
names(after)[names(after)   == "thefts"] <- "after"

diffs <- merge(before, after, by = c("block", "distance"))

diffs$dt <- diffs$after - diffs$before

mean(diffs$dt[diffs$distance == 1]) - mean(diffs$dt[diffs$distance == 0])
t.test(dt ~ factor(distance), diffs, var.equal = TRUE)


# Difference-in-differences
# Example 2

# For those having problems with micsr.data, 
# you can read the files via .csv or .RData  
# (one of the two commands below)
# 
# load(file = "napster.RData")
# napster <- read.csv(file = "napster.csv")
napster <- as.data.frame(micsr.data::napster)

napster <- napster[, c("date", "expmusic", "internet", "weight")]
cutoff <- as.Date("1999-06-01")

napster$period <- ifelse(as.Date(napster$date) < cutoff, 
                         "before", 
                         "after")

napster$period <- factor(napster$period, levels = c("before", "after"))

fit <- lm(expmusic ~ period * internet, napster, weight = weight) 
summary(fit)$coefficients