# Heteroschedasticity n <- 100 x <- seq(1, 10, length.out = n) beta0 <- 2 beta1 <- 3 sigma <- 1 eps <- rnorm(n, mean = 0, sd = sigma * x) y <- beta0 + beta1 * x + eps par(mfrow = c(1,3)) plot(x, y) fit <- lm(y ~ x) abline(fit) plot(fit, 1) plot(fit, 2) par(mfrow=c(1,3)) plot(x, sqrt(y)) fit2 <- lm(I(sqrt(y)) ~ x) abline(fit2) plot(fit2, 1) plot(fit2, 2) # Econometric Example library(wooldridge) library(sandwich) library(nlme) data("wage1") str(wage1) summary(wage1[,1:4]) par(mfrow=c(1,4)) for(i in 1:4) hist(wage1[,i], main = names(wage1)[i], breaks = 30) library(PerformanceAnalytics) chart.Correlation(wage1[,c(2,3,4,1)]) # Fit ols ols <- lm(wage ~ educ + exper + tenure, data = wage1) summary(ols) plot(ols, 1) # bptest library(lmtest) bptest(ols) # Fit gls pred <- predict(ols) glsFit <- gls(wage ~ educ + exper + tenure, data = wage1, weights = varPower(form = ~ pred)) summary(glsFit) cbind(coef(ols), coef(glsFit)) cbind(summary(ols)$coefficients[,2], sqrt(diag(glsFit$varBeta))) par(mfrow=c(1,3)) plot(fitted(ols), residuals(ols), xlab = "Fitted values (OLS)", ylab = "Residuals", main = "Heteroskedasticity in OLS") abline(h = 0, col = "red", lty = 2) plot(fitted(ols), rstandard(ols), xlab = "Fitted values (OLS)", ylab = "Standardized Residuals", main = "Heteroskedasticity in OLS") abline(h = 0, col = "red", lty = 2) plot(fitted(glsFit), resid(glsFit, type="normalized"), xlab = "Fitted values (GLS)", ylab = "Standardized residuals", main = "Variance stabilized in GLS") abline(h = 0, col = "red", lty = 2) # Transforming the outcome ols_log <- lm(log(wage) ~ educ + exper + tenure, data = wage1) bptest(ols_log) plot(ols_log,1) pred <- predict(ols_log) gls_log <- gls(log(wage) ~ educ + exper + tenure, data = wage1, weights = varPower(form = ~ pred)) par(mfrow=c(1,2)) plot(fitted(ols_log), residuals(ols_log), xlab = "Fitted values (OLS)", ylab = "Residuals", main = "OLS") abline(h = 0, col = "red", lty = 2) plot(fitted(gls_log), resid(gls_log, type="normalized"), xlab = "Fitted values (GLS)", ylab = "Standardized residuals", main = "GLS") abline(h = 0, col = "red", lty = 2) cbind(coef(ols_log), coef(gls_log)) cbind(summary(ols_log)$coefficients[,2], sqrt(diag(gls_log$varBeta))) cbind(summary(ols)$sigma, summary(glsFit)$sigma) cbind(summary(ols_log)$sigma, summary(gls_log)$sigma) cbind(AIC(ols), AIC(glsFit)) cbind(AIC(ols_log), AIC(gls_log)) AIC(ols_log) + sum(2*log(wage1$wage)) AIC(gls_log) + sum(2*log(wage1$wage)) # Multicollinearity library(ISLR2) data(Credit) par(mfrow=c(1,2)) with(Credit, plot(Limit, Age)) with(Credit, plot(Limit, Rating)) lmAge <- lm(Balance ~ Age, data = Credit) summary(lmAge)$coefficients lmLimit <- lm(Balance ~ Limit, data = Credit) summary(lmLimit)$coefficients lmRating <- lm(Balance ~ Rating, data = Credit) summary(lmRating)$coefficients lmAge <- lm(Balance ~ Age, data = Credit) summary(lmAge)$coefficients lmLimit <- lm(Balance ~ Limit, data = Credit) summary(lmLimit)$coefficients lmAgeLimit <- lm(Balance ~ Age + Limit, data = Credit) summary(lmAgeLimit)$coefficients lmRating <- lm(Balance ~ Rating, data = Credit) summary(lmRating)$coefficients lmLimit <- lm(Balance ~ Limit, data = Credit) summary(lmLimit)$coefficients lmLimitRating <- lm(Balance ~ Limit + Rating, data = Credit) summary(lmLimitRating)$coefficients cor(Credit[, c("Age", "Limit", "Rating")]) lmFull <- lm(Balance ~ Age + Limit + Rating, data = Credit) library(car) vif(lmFull) vif(lmAgeLimit) # Omitted Variable Bias data <- read.table("kpt.dat") colnames(data) <- c("wage", "educ", "AFQT", "educSibl", "exper", "tenure", "educM", "educF", "urban", "home") data$exper <- data$exper/52 fit1 <- lm(wage ~ educ + poly(exper, 2), data = data) summary(fit1)$coefficients with(data, cor(educ, AFQT)) fit2 <- lm(wage ~ educ + poly(exper, 2) + AFQT, data = data) summary(fit2)$coefficients