The following dataset come from our paper: https://pubmed.ncbi.nlm.nih.gov/30131230/ discussed during the lesson.
Load the required library:
library(survival)
Load the data:
load("C:\\Users\\Giulia Barbati\\OneDrive - Università degli Studi di Trieste\\Stat_Learning_Epi\\Block 4\\R codes\\dati_iniziali.Rdata")
We are interested here in evaluating the impact of HF progression on mortality. Remember that HF progression is a time-dependent variable measured along the follow up (variable: quando_progr).
Try to adopt the standard KM approach:
sfitkm <- survfit(Surv(fup_decesso_cens, morto)~progressione,data=dati_iniziali)
plot(sfitkm,lty=c(2,1),mark.time=FALSE, col="black", fun="event", ylab="Cumulative Incidence of Death",
xlab="Follow up (months)", xlim=c(0,48), xaxp=c(0,48,4),ylim=c(0, 0.6))
legend(0,0.55, legend=c("HF progression", "Others"), lty=c(1,2), bty="n")
title("Standard 1-KM")
It seems that patients that experience HF progression are at a lower risk of mortality… Immortal time bias !!!
Let’s now put the dataset in the counting format (we do not covered this in the slides, see for details : https://rpubs.com/SteadyStoffel/surv_counting_process1)
attach(dati_iniziali)
data_progr_0 <-dati_iniziali[which(progressione==0),]
rec <-rep(0,nrow(data_progr_0))
inizio <-rep(0,nrow(data_progr_0))
fine <-data_progr_0$fup_decesso_cens
deaths <-data_progr_0$morto
data_progr_1 <-dati_iniziali[which(progressione==1),]
rec <-c(rec,rep(0,nrow(data_progr_1)))
inizio <-c(inizio,rep(0,nrow(data_progr_1)))
fine <-c(fine,data_progr_1$quando_progr)
deaths <-c(deaths,rep(0,nrow(data_progr_1)))
rec <-c(rec,rep(1,nrow(data_progr_1)))
inizio <-c(inizio,data_progr_1$quando_progr)
fine <-c(fine,data_progr_1$fup_decesso_cens)
deaths <-c(deaths,data_progr_1$morto)
id <-c(data_progr_0$KEY_ANAGRAFE,data_progr_1$KEY_ANAGRAFE,data_progr_1$KEY_ANAGRAFE)
lvef35fin <-c(data_progr_0$LVEF_35,data_progr_1$LVEF_35,data_progr_1$LVEF_35)
dataset_analisi <-data.frame(id,lvef35fin, rec,inizio,fine,deaths)
detach(dati_iniziali)
table(rec,deaths)
## deaths
## rec 0 1
## 0 1755 773
## 1 305 208
table(dataset_analisi$rec)
##
## 0 1
## 2528 513
In this format, we can apply the extended KM estimator:
sfit <- survfit(Surv(inizio, fine, deaths)~rec,data=dataset_analisi)
sfit
## Call: survfit(formula = Surv(inizio, fine, deaths) ~ rec, data = dataset_analisi)
##
## records n.max n.start events median 0.95LCL 0.95UCL
## rec=0 2528 2528 2528 773 81 78 NA
## rec=1 513 224 37 208 45 37 54
plot(sfit,lty=c(2,1),mark.time=FALSE, col="black", fun="event", ylab="Cumulative Incidence of Death",
xlab="Follow up (months)", xlim=c(0,48), xaxp=c(0,48,4),ylim=c(0, 0.6))
legend(0,0.55, legend=c("HF progression", "Others"), lty=c(1,2), bty="n")
title("Extended 1-KM")
Here we can not apply the log-rank test to compare the curves, but we still can estimate the hazard ratio of the time-dependent HF progression variable:
coxprogr <- coxph(Surv(inizio, fine, deaths)~rec,data=dataset_analisi)
summary(coxprogr)
## Call:
## coxph(formula = Surv(inizio, fine, deaths) ~ rec, data = dataset_analisi)
##
## n= 3041, number of events= 981
##
## coef exp(coef) se(coef) z Pr(>|z|)
## rec 0.66255 1.93973 0.08176 8.104 5.32e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## rec 1.94 0.5155 1.653 2.277
##
## Concordance= 0.537 (se = 0.006 )
## Likelihood ratio test= 58.74 on 1 df, p=2e-14
## Wald test = 65.67 on 1 df, p=5e-16
## Score (logrank) test = 67.87 on 1 df, p=<2e-16