## ----global_options, include=FALSE---------------- knitr::opts_chunk$set(fig.align = 'center', warning=FALSE, message=FALSE, fig.asp=0.625, dev='pdf', global.par = TRUE, dev.args=list(pointsize=10), fig.path = 'figs/') library(MASS) library(knitr) ## ----libraries, echo =TRUE------------------------ library(readxl) library(rstanarm) library(ggplot2) library(bayesplot) library(corrplot) library(RColorBrewer) library(dplyr) library(tidyr) library(tidyverse) library(survival) ## ----dati, echo = TRUE, results='hide'------------ # primo trial melanoma <- readxl::read_xlsx("trial_e1684.xlsx") head(melanoma) # secondo trial melanoma2 <- readxl::read_xlsx("trial_e1690.xlsx") head(melanoma2) # due trial assieme melanoma3 <- readxl::read_xlsx("trial_e1684_e1690_Merged.xlsx") # primary end-point: rfscens, # equal 0 if relapse-free survival, # equal 1 if survival with at least one relapse # response variable y <- melanoma$rfscens # predictor matrix X <- melanoma[, c(3:9)] ## ----analisi variabili: age, echo = TRUE---------- # name redefinition (only for clarifying names) merged_data <- melanoma3 historical_data <- melanoma current_data <- melanoma2 ggplot(merged_data, aes(age)) + geom_density(aes(fill = factor(study)), alpha=0.5) + labs(title= "Età") boxplot(historical_data$age, current_data$age) ## ----analisi variabili: trt, echo = TRUE---------- table(merged_data$trt) #352 trattamento IFN, 360 altro prop.table(table(merged_data$trt)) ggplot(merged_data, aes(trt, fill = as.character(study))) + geom_bar(width = 0.5) + theme(axis.text.x = element_text(angle=65, vjust=0.6)) + labs(title= "Trattamento somministrato nei trials") ## ----analisi variabili: sex, echo = TRUE---------- table(merged_data$sex) #439 uomini e 273 donne prop.table(table(merged_data$sex)) ggplot(merged_data, aes(sex, fill = as.character(study))) + geom_bar(width = 0.5) + theme(axis.text.x = element_text(angle=65, vjust=0.6)) + labs(title= "Sesso dei pazienti nei trials") ## ----analisi variabili: perform, echo = TRUE------ table(merged_data$perform) #627 pienamente attivi, 81 ambulatori prop.table(table(merged_data$perform)) ggplot(merged_data, aes(perform, fill = as.character(study))) + geom_bar(width = 0.5) + theme(axis.text.x = element_text(angle=65, vjust=0.6)) + labs(title= "Stato di salute dei pazienti nei trials") ## ----analisi variabili: nodes, echo = TRUE-------- table(merged_data$nodes) #0 nodi = 143, 1 nodo = 247, # 2-3 nodi = 157, 4+ nodi = 142 pazienti prop.table(table(merged_data$nodes)) ggplot(merged_data, aes(nodes, fill = as.character(study))) + geom_bar(width = 0.5) + theme(axis.text.x = element_text(angle=65, vjust=0.6)) + labs(title= "Numero di nodi positivi nei trials") ## ----analisi variabili: breslow, echo = TRUE------ # minimo spessore 0.08, massimo spessore 35. # in media lo spessore è 3.8. # non sono disponibili 40 osservazioni merged_data$breslow <- as.numeric(as.vector(merged_data$breslow)) ggplot(merged_data, aes(breslow)) + geom_density(aes(fill = factor(study)), alpha=0.5) + labs(title= "Spessore del Breslow") historical_data$breslow <- as.numeric(as.vector(historical_data$breslow)) current_data$breslow <- as.numeric(as.vector(current_data$breslow)) boxplot(historical_data$breslow, current_data$breslow) ## ----analisi variabile stage, echo=TRUE----------- table(merged_data$stage) #143 pazienti nel primo stadio, # 81 nel secondo, 94 nel terzo e 393 nel quarto. # c'è uno stadio -2 anomalo prop.table(table(merged_data$stage)) ggplot(merged_data, aes(stage, fill = as.character(study))) + geom_bar(width = 0.5) + theme(axis.text.x = element_text(angle=65, vjust=0.6)) + labs(title= "Stadio della malattia nei trials") ## ----analisi variabile failtime, echo = TRUE------ # minimo di 0 anni per la ricaduta, # massimo di 9.6, in media 2.45 ggplot(merged_data, aes(failtime)) + geom_density(aes(fill = factor(study)), alpha=0.5) + labs(title= "Tempo di recidiva") boxplot(historical_data$failtime, current_data$failtime) ## ----analisi variabile frscens, echo = TRUE------- table(merged_data$rfscens) # 275 non hanno una ricaduta, 437 hanno una ricaduta prop.table(table(merged_data$rfscens)) ggplot(merged_data, aes(rfscens, fill = as.character(study))) + geom_bar(width = 0.5) + theme(axis.text.x = element_text(angle=65, vjust=0.6)) + labs(title= "Paziente sopravvissuto senza recidiva e con recidiva nei trials") ## ----analisi variabile survtime,echo= TRUE-------- #minimo 0 anni per il decesso, massimo di 9.6 e in media 3.4 ggplot(merged_data, aes(survtime)) + geom_density(aes(fill = factor(study)), alpha=0.5) + labs(title= "Tempo al decesso") boxplot(historical_data$survtime, current_data$survtime) ## ----analisi variabile scens, echo = TRUE--------- table(merged_data$scens) #348 pazienti sono vivi, 364 sono morti prop.table(table(merged_data$scens)) ggplot(merged_data, aes(scens, fill = as.character(study))) + geom_bar(width = 0.5) + theme(axis.text.x = element_text(angle=65, vjust=0.6)) + labs(title= "Paziente sopravvissuto o deceduto nei trials") ## ----tempo al decesso..dati correnti, echo = TRUE, results='hide'---- Surv(time = current_data$survtime, event = current_data$scens == 1) Surv(time = current_data$survtime, event = current_data$scens) summary(Surv(time = current_data$survtime, event = current_data$scens == 1)) ## ----KM objects...dati correnti, echo = TRUE,results = 'hide'---- surv_curr <- Surv(current_data$survtime, current_data$scens == 1) kmfit_curr <- survfit(surv_curr ~ current_data$trt) summary(kmfit_curr) ## ----tempo al decesso..dati storici, echo = TRUE, results= 'hide'---- Surv(time = historical_data$survtime, event = historical_data$scens == 1) summary(Surv(time = historical_data$survtime, event = historical_data$scens == 1)) ## ----KM objects...dati storici, echo=TRUE, results = 'hide'---- surv_hist <- Surv(historical_data$survtime, historical_data$scens == 1) kmfit_hist <- survfit(surv_hist ~ historical_data$trt) summary(kmfit_hist) ## ----KM plot OS, echo = TRUE--------------------- par(mfrow = c(1,2)) plot(kmfit_hist, col = c("black", "red"), xlab = "Survival Time In Years", ylab = "Survival Probabilities", main = "Historical data") legend("topright", c("INF", "OBS"), col = c("red", "black"), lty = 1) plot(kmfit_curr, col = c("black", "red"), xlab = "Survival Time In Years", ylab = "Survival Probabilities", main = "Current data") legend("topright", c("INF", "OBS"), col = c("red", "black"), lty = 1) ## ----tempo alla ricaduta..dati correnti, echo = TRUE, results='hide'---- Surv(time = current_data$failtime, event = current_data$rfscens == 1) summary(Surv(time = current_data$failtime, event = current_data$rfscens == 1)) ## ----KM objects for relapse...dati correnti, echo = TRUE, results='hide'---- relapse_curr <- Surv(current_data$failtime, current_data$rfscens == 1) kmfit_rel_curr <- survfit(relapse_curr ~ current_data$trt) summary(kmfit_rel_curr) ## ----tempo alla ricaduta..dati storici, echo = TRUE, results='hide'---- Surv(time = historical_data$failtime, event = historical_data$rfscens == 1) summary(Surv(time = historical_data$failtime, event = historical_data$rfscens == 1)) ## ----KM objects for relapse...dati storici, echo = TRUE, results='hide'---- relapse_hist <- Surv(historical_data$failtime, historical_data$rfscens == 1) kmfit_rel_hist <- survfit(relapse_hist ~ historical_data$trt) summary(kmfit_rel_hist) ## ----KM plot RFS, echo = TRUE-------------------- par(mfrow = c(1,2)) plot(kmfit_rel_hist, col = c("black", "red"), xlab = "Relapse Time In Years", ylab = "Relapse Probabilities", main = "Historical data") legend("topright", c("INF", "OBS"), col = c("red", "black"), lty = 1) plot(kmfit_rel_curr, col = c("black", "red"), xlab = "Relapse Time In Years", ylab = "Relapse Probabilities", main = "Current data") legend("topright", c("INF", "OBS"), col = c("red", "black"), lty = 1) ## ----mod1, echo = TRUE, results='hide'------------ ############################### ## Bayesian logistic regression ############################### # first model with gaussian priors mod_logit_bayes <- stan_glm(rfscens ~., data = historical_data[, c(3:11)]) posterior <- as.matrix(mod_logit_bayes) ## ----plots, echo = TRUE--------------------------- # depict marginal posterior distributions mcmc_intervals(posterior, pars = c("(Intercept)", "age", "trt", "sex", "perform", "nodes2", "nodes3", "nodes4", "breslow" ))+ xaxis_text(on =TRUE, size=rel(1.9))+ yaxis_text(on =TRUE, size=rel(1.9))+ # scale_y_discrete(labels = rev((parse(text= beta_names))))+ ggtitle("Weakly informative")+ theme(plot.title = element_text(hjust = 0.5, size =rel(2)))+ xlim(-2,3) ## ----mod2, echo = TRUE, results='hide'------------ # change priors: informative gaussian mod_logit_bayes2 <- stan_glm(rfscens ~., data = historical_data[, c(3:11)], prior = normal(0,1), prior_intercept = normal(0,2)) ## ----plots2, echo = TRUE-------------------------- posterior2 <- as.matrix(mod_logit_bayes2) # depict marginal posterior distributions mcmc_intervals(posterior2, pars = c("(Intercept)", "age", "trt", "sex", "perform", "nodes2", "nodes3", "nodes4", "breslow" ))+ xaxis_text(on =TRUE, size=rel(1.9))+ yaxis_text(on =TRUE, size=rel(1.9))+ # scale_y_discrete(labels = rev((parse(text= beta_names))))+ ggtitle("Informative")+ theme(plot.title = element_text(hjust = 0.5, size =rel(2)))+ xlim(-2,3) ## ----mod3, results='hide', echo = TRUE------------ # change trial data: e1690 # same model with weakly-informative priors mod_logit_bayes3 <- stan_glm(rfscens ~., data = current_data[, c(3:11)]) ## ----plots3, echo = TRUE-------------------------- posterior3 <- as.matrix(mod_logit_bayes3) # depict marginal posterior distributions mcmc_intervals(posterior3, pars = c("(Intercept)", "age", "trt", "sex", "perform", "nodes2", "nodes3", "nodes4", "breslow" ))+ xaxis_text(on =TRUE, size=rel(1.9))+ yaxis_text(on =TRUE, size=rel(1.9))+ # scale_y_discrete(labels = rev((parse(text= beta_names))))+ ggtitle("Weakly informative")+ theme(plot.title = element_text(hjust = 0.5, size =rel(2)))+ xlim(-2,3) ## ----mod4, results='hide', echo = TRUE------------ # model with all the data: e1684 + e1690 head(melanoma3) # same model with weakly-informative priors mod_logit_bayes4 <- stan_glm(rfscens ~., data = merged_data[, c(3:11)]) ## ----plots4, echo =TRUE--------------------------- posterior4 <- as.matrix(mod_logit_bayes4) # depict marginal posterior distributions mcmc_intervals(posterior4, pars = c("(Intercept)", "age", "trt", "sex", "perform", "nodes2", "nodes3", "nodes4", "breslow" ))+ xaxis_text(on =TRUE, size=rel(1.9))+ yaxis_text(on =TRUE, size=rel(1.9))+ # scale_y_discrete(labels = rev((parse(text= beta_names))))+ ggtitle("Weakly informative")+ theme(plot.title = element_text(hjust = 0.5, size =rel(2)))+ xlim(-2,3)