#======================================================== # DIAGNOSTIC TEST PERFORMANCE USING PRE-SPECIFIED CUT-OFFS # Example: Creatine kinase (CK) for myocardial infarction #======================================================== # Load required package library(readxl) # Import the dataset df <- read_xlsx("CK.xlsx") # Inspect the first rows of the dataset head(df) #-------------------------------------------------------- # Function to calculate diagnostic performance measures # given a binary test result and the true disease status #-------------------------------------------------------- compute_metrics <- function(test_result, disease_status) { # Create the confusion matrix conf_mat <- table(Test = test_result, Disease = disease_status) print(conf_mat) # Extract the four core quantities from the confusion matrix # TP = true positives # FN = false negatives # FP = false positives # TN = true negatives TP <- conf_mat["1", "infarction"] FN <- conf_mat["0", "infarction"] FP <- conf_mat["1", "angina"] TN <- conf_mat["0", "angina"] # Total number of subjects total <- sum(conf_mat) # Calculate diagnostic performance measures sensitivity <- TP / (TP + FN) specificity <- TN / (TN + FP) accuracy <- (TP + TN) / total PPV <- TP / (TP + FP) NPV <- TN / (TN + FN) # Return the results as a data frame results <- data.frame( Metric = c("Sensitivity", "Specificity", "Accuracy", "PPV", "NPV"), Value = c(sensitivity, specificity, accuracy, PPV, NPV) ) return(results) } #-------------------------------------------------------- # Example 1: Define a binary test using CK >= 100 #-------------------------------------------------------- # Create a binary test variable: # 1 = positive test, 0 = negative test df$CK_100 <- as.factor(ifelse(df$CK >= 100, 1, 0)) # Calculate performance measures results_ck100 <- compute_metrics(test_result = df$CK_100, disease_status = df$Patologia) # Display results print(results_ck100) #-------------------------------------------------------- # Example 2: Define a binary test using CK >= 80 #-------------------------------------------------------- # Create a second binary test variable using a lower cut-off df$CK_80 <- as.factor(ifelse(df$CK >= 80, 1, 0)) # Calculate performance measures results_ck80 <- compute_metrics(test_result = df$CK_80, disease_status = df$Patologia) # Display results print(results_ck80) ########################################################### #======================================================== # ROC ANALYSIS FOR A CONTINUOUS TEST # Example: sFlt/PlGF ratio for hypertension #======================================================== # Load required packages library(readxl) library(pROC) library(dplyr) # Import the dataset df <- read_excel("Exercise_2.xls") # Inspect the structure of the dataset str(df) #-------------------------------------------------------- # Step 1. Create the ROC curve #-------------------------------------------------------- # response = true disease status (must be binary: 0/1) # predictor = continuous test value roc_obj <- roc( response = df$hypertension, predictor = df$sfltplgf, quiet = TRUE ) #-------------------------------------------------------- # Step 2. Calculate and print the Area Under the Curve #-------------------------------------------------------- auc_value <- auc(roc_obj) print(auc_value) #-------------------------------------------------------- # Step 3. Plot the ROC curve #-------------------------------------------------------- plot( roc_obj, legacy.axes = TRUE, main = "ROC curve: sFlt/PlGF for hypertension", lwd = 3, col = "blue" ) # Add AUC to the plot legend( "bottomright", legend = paste("AUC =", round(auc_value, 3)), bty = "n" ) #-------------------------------------------------------- # Step 4. Define a function to calculate performance # measures for any chosen cut-off #-------------------------------------------------------- calc_metrics <- function(cutoff, data) { # Classify subjects according to the chosen threshold # 1 = test positive, 0 = test negative pred <- ifelse(data$sfltplgf >= cutoff, 1, 0) true <- data$hypertension # Compute confusion matrix components TP <- sum(pred == 1 & true == 1, na.rm = TRUE) TN <- sum(pred == 0 & true == 0, na.rm = TRUE) FP <- sum(pred == 1 & true == 0, na.rm = TRUE) FN <- sum(pred == 0 & true == 1, na.rm = TRUE) # Compute diagnostic performance measures sensitivity <- ifelse((TP + FN) > 0, TP / (TP + FN), NA) specificity <- ifelse((TN + FP) > 0, TN / (TN + FP), NA) PPV <- ifelse((TP + FP) > 0, TP / (TP + FP), NA) NPV <- ifelse((TN + FN) > 0, TN / (TN + FN), NA) accuracy <- ifelse((TP + TN + FP + FN) > 0, (TP + TN) / (TP + TN + FP + FN), NA) # Youden index: a common criterion for the optimal cut-off youden_index <- sensitivity + specificity - 1 # Return all metrics data.frame( cutoff = cutoff, TP = TP, FP = FP, TN = TN, FN = FN, sensitivity = sensitivity, specificity = specificity, PPV = PPV, NPV = NPV, accuracy = accuracy, youden_index = youden_index ) } #-------------------------------------------------------- # Step 5. Calculate performance measures for all cut-offs # identified by the ROC procedure #-------------------------------------------------------- thresholds <- sort(unique(roc_obj$thresholds)) results <- bind_rows(lapply(thresholds, calc_metrics, data = df)) %>% distinct() %>% arrange(desc(youden_index), desc(accuracy)) # Print the full table print(results) #-------------------------------------------------------- # Step 6. Identify the optimal cut-off according to # the Youden index #-------------------------------------------------------- best_cutoff_row <- results[which.max(results$youden_index), ] print(best_cutoff_row) #-------------------------------------------------------- # Step 7. Alternative way to extract the best cut-off # directly from the pROC package #-------------------------------------------------------- best_coords <- coords( roc_obj, x = "best", best.method = "youden", ret = c("threshold", "sensitivity", "specificity"), transpose = FALSE ) print(best_coords)