# install.packages(c("survival", "dplyr", "readxl")) library(survival) library(dplyr) library(readxl) # Load data ---------------------------------------------------------- df <- read_xlsx("Example_3.xlsx") # Inspect data str(df) summary(df) # Variables: # time = follow-up time # status = event indicator, usually 1 = event/death, 0 = censored # age = patient age # sex = patient sex # ph.ecog = ECOG performance status # Higher values indicate worse functional status / more severe disease # treatment = presence (1) or absence (0) of a treatment # Check treatment distribution -------------------------------------- table(df$treatment) prop.table(table(df$treatment)) # Check whether treated patients are different ----------------------- df %>% group_by(treatment) %>% summarise( n = n(), mean_age = mean(age, na.rm = TRUE), mean_ecog = mean(ph.ecog, na.rm = TRUE), event_rate = mean(status, na.rm = TRUE), .groups = "drop" ) # If treated patients have higher ECOG or higher event rate, # this supports the presence of confounding # and specifically mimics a realistic observational setting: # patients with worse clinical conditions are more likely to receive treatment. # This creates confounding by indication. # NAIVE MODEL -------------------------------------------------------- # This model estimates the crude association between treatment and survival. # It does NOT adjust for confounders. cox_naive <- coxph( Surv(time, status) ~ treatment, data = df ) summary(cox_naive) # The coefficient for treatment is expressed as a hazard ratio: exp(coef). # If HR > 1, treated patients have higher hazard of death. # However, this crude association may be biased because treated patients # may be older or have worse ECOG performance status. # ADJUSTED MODEL ----------------------------------------------------- # This model adjusts for potential confounders: # age, sex, and ECOG performance status. cox_adj <- coxph( Surv(time, status) ~ treatment + age + sex + ph.ecog, data = df ) summary(cox_adj) # Interpretation: # The coefficient for treatment now compares treated and untreated patients # with the same age, sex, and ECOG performance status. # # If the treatment HR becomes smaller after adjustment, # this suggests that part of the crude treatment effect was due to confounding. # # In our example, the naive model suggested that treatment was associated # with a higher hazard of death. # After adjustment, the HR is attenuated and may no longer be statistically significant. # # This indicates confounding by disease severity: # sicker patients were more likely to receive treatment and also more likely to die. # Extract hazard ratios and confidence intervals --------------------- exp(cbind( HR = coef(cox_naive), confint(cox_naive) )) exp(cbind( HR = coef(cox_adj), confint(cox_adj) )) # Final message: # In causal modeling, the goal is to estimate the effect of treatment, # not to optimize prediction. # Adjustment for confounders is essential to avoid misleading conclusions. # However, causal interpretation still relies on the assumption of no unmeasured confounding.