## ----global_options, include=FALSE---------------- knitr::opts_chunk$set(fig.align = 'center', warning=FALSE, message=FALSE, fig.asp=0.625, dev='pdf', global.par = TRUE, dev.args=list(pointsize=10), fig.path = 'figs/') library(MASS) library(knitr) ## ----libraries, echo = TRUE----------------------- library(glmnet) library(arm) library(bayesplot) library(ggplot2) library(rstanarm) library(corrplot) library(ggplot2) library(GGally) Prostate <- read.csv("prostate.csv") Prostate <- Prostate[,-c(1,11)] # sample size N <- dim(Prostate)[1] # predictor matrix X <- Prostate[,1:8] # number of predictors p <- dim(Prostate)[2] # outcome of interest/response variable y <- Prostate$lpsa # to have a quick look head(Prostate) # correlation plot corrplot(cor(Prostate)) # pairs plot my_fn <- function(data, mapping, ...){ p <- ggplot(data = data, mapping = mapping) + geom_point(size =0.2) + geom_smooth(method=loess, fill="red", color="red", ...) + geom_smooth(method=lm, fill="blue", color="blue", ...) p } ggpairs(Prostate, upper = list(continuous = my_fn), lower = list(continuous = my_fn )) ## ----modellolineare, echo = TRUE------------------ ################################ ## CLASSICAL LM ################################ mod_lm <- lm(lpsa~., data =Prostate) summary(mod_lm) mod_lm$coefficients ## ----lasso, echo = TRUE-------------------------- ############################# ## LASSO/RIDGE ########################### # regularization techniques mod_glmnet <- glmnet(x = X, y = y, alpha = 1, family = "gaussian") plot(mod_glmnet, cex.lab =1.4) cvfit <- cv.glmnet(x = as.matrix(X), y = y) cvfit plot(cvfit) cvfit$lambda.min coef(cvfit, s = "lambda.min") mod_ridge <- glmnet(x = X, y = y, alpha = 0) cvfit_ridge <- cv.glmnet(x = as.matrix(X), y = y, alpha = 0) ## ----bayesian1, echo = TRUE, results='hide'------- ###################################### ## Bayesian model (with MCMC sampling) ###################################### # with gaussian priors mod_lm_bayes <- stan_glm(lpsa~., data =Prostate, family=gaussian) # with cauchy priors mod_lm_bayes2 <- stan_glm(lpsa~., data =Prostate, family=gaussian, prior = cauchy(), prior_intercept = cauchy()) ## ----plots, echo = TRUE--------------------------- print(mod_lm_bayes) prior_summary(mod_lm_bayes) print(mod_lm_bayes2) prior_summary(mod_lm_bayes2) posterior <- as.matrix(mod_lm_bayes) posterior2 <- as.matrix(mod_lm_bayes2) # depict posterior distributions intervals for the parameters beta_names <- paste0("beta[", 9:1, "]") mcmc_intervals(posterior, pars=c("(Intercept)", "lcavol", "lweight", "age", "lbph", "svi", "lcp", "gleason", "pgg45"))+ xaxis_text(on =TRUE, size=rel(1.9))+ yaxis_text(on =TRUE, size=rel(1.9))+ # scale_y_discrete(labels = rev((parse(text= beta_names))))+ ggtitle("Weakly informative")+ theme(plot.title = element_text(hjust = 0.5, size =rel(2)))+ xlim(-2,3) mcmc_intervals(posterior2, pars=c("(Intercept)", "lcavol", "lweight", "age", "lbph", "svi", "lcp", "gleason", "pgg45"))+ xaxis_text(on =TRUE, size=rel(1.9))+ yaxis_text(on =TRUE, size=rel(1.9))+ # scale_y_discrete(labels = rev((parse(text= beta_names))))+ ggtitle("Weakly informative")+ theme(plot.title = element_text(hjust = 0.5, size =rel(2)))+ xlim(-2,3) ## ----comparison, echo = FALSE--------------------- ########################## ## Comparison of estimates ########################## # final table of coefficients tab_est <- cbind(round(mod_lm$coefficients,3), round(coef(cvfit, s = "lambda.min")[,1],3), round(coef(cvfit_ridge, s = "lambda.min")[,1],3), round(mod_lm_bayes$coefficients,3), round(mod_lm_bayes2$coefficients,3)) colnames(tab_est) <- c("lm", "lasso", "ridge", "bayes1", "bayes2") knitr::kable(tab_est)