An example of time-dependent covariate : the extended KM estimator

The following dataset come from our paper: https://pubmed.ncbi.nlm.nih.gov/30131230/ discussed during the lesson.

Load the required library:

library(survival)

Load the data:

load("C:\\Users\\Giulia Barbati\\OneDrive - Università degli Studi di Trieste\\Stat_Learning_Epi\\Block 4\\R codes\\dati_iniziali.Rdata")

We are interested here in evaluating the impact of HF progression on mortality. Remember that HF progression is a time-dependent variable measured along the follow up (variable: quando_progr).

Try to adopt the standard KM approach:

sfitkm <- survfit(Surv(fup_decesso_cens, morto)~progressione,data=dati_iniziali)

plot(sfitkm,lty=c(2,1),mark.time=FALSE, col="black", fun="event", ylab="Cumulative Incidence of Death", 
     xlab="Follow up (months)", xlim=c(0,48), xaxp=c(0,48,4),ylim=c(0, 0.6))
legend(0,0.55, legend=c("HF progression", "Others"), lty=c(1,2), bty="n")
title("Standard 1-KM")

It seems that patients that experience HF progression are at a lower risk of mortality… Immortal time bias !!!

Let’s now put the dataset in the counting format (we do not covered this in the slides, see for details : https://rpubs.com/SteadyStoffel/surv_counting_process1)

attach(dati_iniziali)
data_progr_0 <-dati_iniziali[which(progressione==0),]
rec          <-rep(0,nrow(data_progr_0))
inizio       <-rep(0,nrow(data_progr_0))
fine         <-data_progr_0$fup_decesso_cens
deaths       <-data_progr_0$morto
data_progr_1   <-dati_iniziali[which(progressione==1),]
rec            <-c(rec,rep(0,nrow(data_progr_1)))
inizio         <-c(inizio,rep(0,nrow(data_progr_1)))
fine           <-c(fine,data_progr_1$quando_progr)
deaths         <-c(deaths,rep(0,nrow(data_progr_1)))
rec         <-c(rec,rep(1,nrow(data_progr_1)))
inizio      <-c(inizio,data_progr_1$quando_progr)
fine        <-c(fine,data_progr_1$fup_decesso_cens)
deaths      <-c(deaths,data_progr_1$morto)

id         <-c(data_progr_0$KEY_ANAGRAFE,data_progr_1$KEY_ANAGRAFE,data_progr_1$KEY_ANAGRAFE)
lvef35fin  <-c(data_progr_0$LVEF_35,data_progr_1$LVEF_35,data_progr_1$LVEF_35)
dataset_analisi   <-data.frame(id,lvef35fin, rec,inizio,fine,deaths)
detach(dati_iniziali)
table(rec,deaths)

##    deaths
## rec    0    1
##   0 1755  773
##   1  305  208

table(dataset_analisi$rec)

## 
##    0    1 
## 2528  513

In this format, we can apply the extended KM estimator:

sfit <- survfit(Surv(inizio, fine, deaths)~rec,data=dataset_analisi)
sfit

## Call: survfit(formula = Surv(inizio, fine, deaths) ~ rec, data = dataset_analisi)
## 
##       records n.max n.start events median 0.95LCL 0.95UCL
## rec=0    2528  2528    2528    773     81      78      NA
## rec=1     513   224      37    208     45      37      54

plot(sfit,lty=c(2,1),mark.time=FALSE, col="black", fun="event", ylab="Cumulative Incidence of Death", 
     xlab="Follow up (months)", xlim=c(0,48), xaxp=c(0,48,4),ylim=c(0, 0.6))
legend(0,0.55, legend=c("HF progression", "Others"), lty=c(1,2), bty="n")
title("Extended 1-KM")

Here we can not apply the log-rank test to compare the curves, but we still can estimate the hazard ratio of the time-dependent HF progression variable:

coxprogr <- coxph(Surv(inizio, fine, deaths)~rec,data=dataset_analisi)
summary(coxprogr)

## Call:
## coxph(formula = Surv(inizio, fine, deaths) ~ rec, data = dataset_analisi)
## 
##   n= 3041, number of events= 981 
## 
##        coef exp(coef) se(coef)     z Pr(>|z|)    
## rec 0.66255   1.93973  0.08176 8.104 5.32e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##     exp(coef) exp(-coef) lower .95 upper .95
## rec      1.94     0.5155     1.653     2.277
## 
## Concordance= 0.537  (se = 0.006 )
## Likelihood ratio test= 58.74  on 1 df,   p=2e-14
## Wald test            = 65.67  on 1 df,   p=5e-16
## Score (logrank) test = 67.87  on 1 df,   p=<2e-16