# Load the library, and the data (also info on the data) library(ISLR) data("Credit") help(Credit) # Scatterplot with(Credit, plot(Limit, Balance)) # Boxplot with(Credit, plot(Balance ~ Student)) # Scatterplot including the information on the Student's status with(Credit, plot(Limit, Balance, col = ifelse(Student == "Yes", "green", "red"))) # Fit the lm model lmFit <- lm(Balance ~ Limit + Student, data = Credit) lmFit # Model matrix X <- model.matrix(Balance ~ Limit + Student, data = Credit) # Outcome y <- Credit$Balance # LS by hand beta_OLS <- solve(t(X)%*%X)%*%t(X)%*%y beta_OLS # Check lmFit$coefficients # Explore the summary summary(lmFit) # Residuals: head(residuals(lmFit)) head(Credit$Balance - predict(lmFit)) # Extract the table of coefficients summary(lmFit)$coefficients # Summary of the Residuals summary(residuals(lmFit)) # Estimate of the square root of the variance of the error term sqrt(sum(residuals(lmFit)^2)/(nrow(Credit)-ncol(X))) summary(lmFit)$sigma # R2 coefficient summary(lmFit)$r.squared # By hand 1 - sum(residuals(lmFit)^2)/(var(Credit$Balance)*(nrow(Credit) - 1)) # Adjusted R squared Rsq <- 1 - sum(residuals(lmFit)^2)/(var(Credit$Balance)*(nrow(Credit) - 1)) Rsq summary(lmFit)$adj.r.squared Rsq - (1-Rsq)*(ncol(X)-1)/(nrow(Credit)-ncol(X)) # Fitted regression lines with(Credit, plot(Limit, Balance, col = ifelse(Student == "Yes", "green", "red"))) abline(lmFit$coefficients[1:2], col = "red") abline(c(lmFit$coefficients[1] + lmFit$coefficients[3], lmFit$coefficients[2]), col = "green")