# Load the data Advertising <- read.csv("Advertising.csv") # See the structure dim(Advertising) names(Advertising) str(Advertising) # Numerical and graphical summary summary(Advertising[,-1]) par(mfrow=c(1,4)) for (i in 2:5) hist(Advertising[,i], breaks = 30, main = names(Advertising)[i], xlab = names(Advertising)[i]) # Simple Linear Regression Sales~TV fitTV <- lm(Sales ~ TV, data = Advertising) summary(fitTV)$coefficients par(mfrow=c(1,1)) with(Advertising, plot(TV, Sales, cex = 0.5)) abline(fitTV$coefficients, col = "red") points(Advertising$TV, predict(fitTV), col = "blue", cex = 0.5) fitLMtrans <- lm(I(log(Sales)) ~ I(sqrt(TV)) + I(sqrt(Radio)), data = Advertising) par(mfrow=c(2,2)) plot(fitLMtrans) par(mfrow=c(1,2)) with(Advertising, plot(TV, Sales, cex = 0.5)); abline(fitTV$coefficients, col = "red") points(Advertising$TV, predict(fitTV), col = "blue", cex = 0.5) with(Advertising, segments(x0 = TV, y0 = Sales, x1 = TV, y1 = predict(fitTV), lty = "dashed")) par(mfrow=c(2,2)) plot(fitTV) plot(fitTV, which = 1) confint(fitTV) summary(fitTV)$coefficients[2,] 2*pt(-17.66763, df = 198) str(summary(fitTV)) summary(fitTV)$sigma summary(fitTV)$r.squared # Simple Linear Regression Sales~Radio fitRadio <- lm(Sales ~ Radio, data = Advertising) summary(fitRadio)$coefficients # Simple Linear Regression Sales~Newspaper fitNewspaper <- lm(Sales ~ Newspaper, data = Advertising) summary(fitNewspaper)$coefficients # Multiple Linear Regression round(cor(Advertising[,-1]),3) fitLM <- lm(Sales ~ TV + Radio + Newspaper, data = Advertising) round(summary(fitLM)$coefficients,4) summary(fitLM)$sigma summary(fitLM)$r.squared summary(fitLM) anova(lm(Sales ~ 1, data = Advertising), fitLM) summary(fitTV)$r.squared summary(fitRadio)$r.squared summary(lm(Sales ~ TV + Radio, data = Advertising))$r.squared summary(fitLM)$r.squared # Model with all the variables summary(fitTV)$sigma summary(fitRadio)$sigma summary(lm(Sales ~ TV + Radio, data = Advertising))$sigma summary(fitLM)$sigma # Model with all the variables #3dplot fitTVRadio <- lm(Sales ~ TV + Radio, data = Advertising) tv.seq <- seq(min(Advertising$TV), max(Advertising$TV), length.out = 30) radio.seq <- seq(min(Advertising$Radio), max(Advertising$Radio), length.out = 30) grid <- expand.grid(TV = tv.seq, Radio = radio.seq) grid$Sales <- predict(fitTVRadio, newdata = grid) Z <- matrix(grid$Sales, nrow = length(tv.seq), ncol = length(radio.seq)) par(mfrow=c(1,2)) p <- persp(tv.seq, radio.seq, Z, theta = 45, phi = 15, expand = 0.6, col = "lightblue", shade = 0.5, xlab = "TV", ylab = "Radio", zlab = "Sales", ticktype = "detailed", main = "Sales ~ TV + Radio") pts <- trans3d(Advertising$TV, Advertising$Radio, Advertising$Sales, p) predicted <- predict(fitTVRadio) visible <- Advertising$Sales > predicted points(pts, col = ifelse(visible, "red", "blue"), pch = 19) p <- persp(tv.seq, radio.seq, Z, theta = 135, phi = 15, expand = 0.6, col = "lightblue", shade = 0.5, xlab = "TV", ylab = "Radio", zlab = "Sales", ticktype = "detailed", main = "Sales ~ TV + Radio") pts <- trans3d(Advertising$TV, Advertising$Radio, Advertising$Sales, p) predicted <- predict(fitTVRadio) visible <- Advertising$Sales > predicted points(pts, col = ifelse(visible, "red", "blue"), pch = 19) # Look at the individual p-values round(summary(fitLM)$coefficients,3) # Confidence Interval for beta confint(fitLM) # The table of coefficients of the simple linear regression models summary(fitTV)$coefficients summary(fitRadio)$coefficients summary(fitNewspaper)$coefficients # Fitted vs observed yhat <- predict(fitLM) par(mfrow=c(1,1)) plot(Advertising$Sales, yhat) abline(0,1) # Confidence interval predict(fitLM, newdata = data.frame(TV = 100, Radio = 20, Newspaper = 0), interval = "confidence") # Prediction interval predict(fitLM, newdata = data.frame(TV = 100, Radio = 20, Newspaper = 0), interval = "prediction") # Residuals plots par(mfrow=c(2,2)) plot(fitLM) # Polynomial regression lmFitpoly <- lm(Sales ~ TV + I(TV^2) + Radio + I(Radio^2), data = Advertising) summary(lmFitpoly) plot(lmFitpoly) # Interaction lmFitInt <- lm(Sales ~ TV*Radio, data = Advertising) summary(lmFitInt) plot(lmFitInt) # Transforming the outcome lmFitTrans <- lm(I(log(Sales)) ~ TV + I(TV^2) + Radio + TV:Radio, data = Advertising) round(summary(lmFitTrans)$coefficients,4) plot(lmFitTrans) # Predicted vs observed par(mfrow=c(1,1)) plot(Advertising$Sales, exp(predict(lmFitTrans))) abline(0,1)