# install.packages(c("dplyr", "splines", "readxl", "ggplot2")) library(dplyr) library(splines) library(readxl) library(ggplot2) # GOAL: # To analyze the relationship between hypertension and key variables: # age, BMI, and sex # Load data df <- read_xlsx("Example1_2.xlsx") # Remove missing values df <- na.omit(df) # Basic inspection str(df) summary(df) names(df) # Graphical exploration --------------------------------------------- ggplot(df, aes(x = Age, y = BPSysAve)) + geom_jitter(alpha = 0.1) + geom_smooth(method = "loess", se = TRUE) + labs( x = "Age", y = "Average systolic blood pressure", title = "Systolic blood pressure by age" ) ggplot(df, aes(x = Age, y = BPDiaAve)) + geom_jitter(alpha = 0.1) + geom_smooth(method = "loess", se = TRUE) + labs( x = "Age", y = "Average diastolic blood pressure", title = "Diastolic blood pressure by age" ) ggplot(df, aes(x = BMI, y = BPSysAve)) + geom_jitter(alpha = 0.1) + geom_smooth(method = "loess", se = TRUE) + labs( x = "BMI", y = "Average systolic blood pressure", title = "Systolic blood pressure by BMI" ) ggplot(df, aes(x = BMI, y = BPDiaAve)) + geom_jitter(alpha = 0.1) + geom_smooth(method = "loess", se = TRUE) + labs( x = "BMI", y = "Average diastolic blood pressure", title = "Diastolic blood pressure by BMI" ) # Logistic regression models ---------------------------------------- # Model 1: baseline model with age and BMI modeled linearly model_linear <- glm( hypertension ~ Age + BMI + Female_sex, data = df, family = binomial ) summary(model_linear) exp(cbind( OR = coef(model_linear), confint(model_linear) )) # Model 2: age modeled non-linearly, BMI linearly model_age_spline <- glm( hypertension ~ ns(Age, df = 3) + BMI + Female_sex, data = df, family = binomial ) summary(model_age_spline) # Model 3: age linearly, BMI modeled non-linearly model_bmi_spline <- glm( hypertension ~ Age + ns(BMI, df = 3) + Female_sex, data = df, family = binomial ) summary(model_bmi_spline) # Model 4: both age and BMI modeled non-linearly model_age_bmi_spline <- glm( hypertension ~ ns(Age, df = 3) + ns(BMI, df = 3) + Female_sex, data = df, family = binomial ) summary(model_age_bmi_spline) # Model comparison --------------------------------------------------- AIC( model_linear, model_age_spline, model_bmi_spline, model_age_bmi_spline ) # Predicted probabilities: age effect ------------------------------- newdata_age <- data.frame( Age = seq(min(df$Age, na.rm = TRUE), max(df$Age, na.rm = TRUE), length.out = 100), BMI = mean(df$BMI, na.rm = TRUE), Female_sex = 1 ) newdata_age$pred_linear <- predict( model_linear, newdata = newdata_age, type = "response" ) newdata_age$pred_age_spline <- predict( model_age_spline, newdata = newdata_age, type = "response" ) ggplot(newdata_age, aes(x = Age)) + geom_line(aes(y = pred_linear), linetype = "dashed", linewidth = 1) + geom_line(aes(y = pred_age_spline), linewidth = 1) + labs( x = "Age", y = "Predicted probability of hypertension", title = "Linear vs non-linear effect of age", subtitle = "Dashed line = linear age model; solid line = spline age model" ) # Predicted probabilities: BMI effect ------------------------------- newdata_bmi <- data.frame( BMI = seq(min(df$BMI, na.rm = TRUE), max(df$BMI, na.rm = TRUE), length.out = 100), Age = mean(df$Age, na.rm = TRUE), Female_sex = 1 ) newdata_bmi$pred_linear <- predict( model_linear, newdata = newdata_bmi, type = "response" ) newdata_bmi$pred_bmi_spline <- predict( model_bmi_spline, newdata = newdata_bmi, type = "response" ) ggplot(newdata_bmi, aes(x = BMI)) + geom_line(aes(y = pred_linear), linetype = "dashed", linewidth = 1) + geom_line(aes(y = pred_bmi_spline), linewidth = 1) + labs( x = "BMI", y = "Predicted probability of hypertension", title = "Linear vs non-linear effect of BMI", subtitle = "Dashed line = linear BMI model; solid line = spline BMI model" )