# IV regression # Segregation effect on urban poverty tracks_side <- read.csv("tracks_side.csv") summary(tracks_side[,-1]) library(PerformanceAnalytics) chart.Correlation(tracks_side[,c(3,4,2)]) # OLS regression lm_yx <- lm(povb ~ segregation, tracks_side) summary(lm_yx)$coefficients # First stage regression lm_xz <- lm(segregation ~ raildiv, tracks_side) summary(lm_xz) with(tracks_side, cor(segregation, raildiv)) summary(lm_xz)$r.squared summary(lm_xz)$fstatistic pf(25.19009, 1, 119, lower.tail = F) # Second stage regression lm_yhx <- lm(povb ~ fitted(lm_xz), tracks_side) summary(lm_yhx)$coefficients # A way to obtain the estimate lm_yz <- lm(povb ~ raildiv, tracks_side) coef(lm_yz)[2] coef(lm_yz)[2] / coef(lm_xz)[2] # The ivreg package library(ivreg) ivFit1 <- ivreg(povb ~ segregation | raildiv, data = tracks_side) summary(ivFit1, diagnostics = TRUE) summary(ivFit1, diagnostics = T)$diagnostics # Some quantities of the model output, obtained by hand X <- model.matrix(~segregation, data = tracks_side) Z <- model.matrix(~raildiv, data = tracks_side) PZ <- Z%*%solve(t(Z)%*%Z)%*%t(Z) B <- solve(t(X)%*%PZ%*%X)%*%t(X)%*%PZ%*%tracks_side$povb B M <- solve(t(X)%*%PZ%*%X) V1 <- sqrt(diag(sum(residuals(ivFit1)^2)/(nrow(tracks_side)-2) * M)) V1 # Example: long-term effects of slave trade library(necountries) sltd <- as.data.frame(slave_trade) library(ggplot2) sltd %>% ggplot(aes(slavesarea, gdp)) + geom_point() + scale_x_continuous(trans = "log10", expand = expansion(mult = c(.1))) + scale_y_log10() + geom_smooth(method = "lm", se = FALSE, color = "black") + ggrepel::geom_label_repel(aes(label = country), size = 2, max.overlaps = Inf) table(sltd$colony) levels(sltd$colony) <- c(levels(sltd$colony), "other") sltd[sltd$colony == "spain" | sltd$colony == "germany" | sltd$colony == "italy" | sltd$colony == "none", ]$colony <- "other" sltd$colony <- factor(sltd$colony, levels = c("other", "uk", "france", "portugal","belgium")) table(sltd$colony) # OLS regression (accounting for colony) slaves_ols <- lm(log(gdp) ~ log(slavesarea) + colony, data = sltd) summary(slaves_ols)$coefficients # First stage regression slaves_first <- lm(log(slavesarea) ~ colony + atlantic + indian + redsea + sahara, sltd) summary(slaves_first)$coefficients summary(slaves_first)$r.squared summary(slaves_first)$fstatistic pf(2.66499, 8, 43, lower.tail = FALSE) # Second stage regression slaves_second <- lm(log(gdp) ~ predict(slaves_first) + colony, data = sltd) summary(slaves_second)$coefficients # Results obtained via ivreg library(ivreg) fit_iv<-ivreg(log(gdp) ~ log(slavesarea) + colony | colony + redsea + atlantic + sahara + indian, data = sltd) summary(fit_iv, diagnostics = TRUE) # Weak instrument test: segregation summary(lm_xz)$fstatistic pf(25.190, 1, 119, lower.tail = FALSE) summary(ivFit1, diagnostics = T)$diagnostics # Weak instrument test: slave trade slaves_first_com <- lm(log(slavesarea) ~ colony + atlantic + indian + redsea + sahara, sltd) slaves_first_red <- lm(log(slavesarea) ~ colony, sltd) RSS_r <- sum(resid(slaves_first_red)^2) RSS_u <- sum(resid(slaves_first_com)^2) q <- 4 k <- 9 n <- nrow(sltd) F <- ((RSS_r - RSS_u)/q) / (RSS_u/(n - k)) F pf(F, q, n - k, lower.tail = FALSE) summary(fit_iv, diagnostics = T)$diagnostics # Let's explore the Hausmann test summary(ivFit1, diagnostics = T)$diagnostics #segregation summary(fit_iv, diagnostics = T)$diagnostics #slave trade