# Librerie necessarie
library(ggplot2)
set.seed(123)

# Genera i dati
n <- 100
x <- sort(runif(n, -3, 3))
f_true <- function(x) { 0.5 * x^3 - x^2 + 2 * x }
sigma <- 2
y <- f_true(x) + rnorm(n, mean = 0, sd = sigma)

# Divisione dei dati in training e test
set.seed(42)
train_index <- sample(seq_len(n), size = 0.7 * n)
train_data <- data.frame(x = x[train_index], y = y[train_index])
test_data <- data.frame(x = x[-train_index], y = y[-train_index])

# Parametri
max_degree <- 15   # Grado massimo del polinomio da testare
n_simulations <- 100  # Numero di ripetizioni per ciascun grado del polinomio

# Inizializza vettori per memorizzare i risultati
mse_per_degree <- matrix(NA, n_simulations, max_degree)
bias_squared_per_degree <- numeric(max_degree)
variance_per_degree <- numeric(max_degree)

# Loop sui vari gradi del polinomio
for (degree in 1:max_degree) {
  
  # Creiamo una matrice per salvare le predizioni
  predictions_matrix <- matrix(0, nrow = nrow(test_data), ncol = n_simulations)
  
  # Ripeti il processo di addestramento e predizione per n_simulations volte
  for (i in 1:n_simulations) {
    # Aggiungi rumore ai dati di training
    train_data$y <- f_true(train_data$x) + rnorm(length(train_data$x), mean = 0, sd = sigma)
    
    # Addestra il modello polinomiale
    model <- lm(y ~ poly(x, degree), data = train_data)
    
    # Fai predizioni sui dati di test
    predictions_matrix[, i] <- predict(model, newdata = test_data)
    
    # Calcola il MSE complessivo sui dati di test
    mse_per_degree[i, degree] <- mean((test_data$y - predictions_matrix[,i])^2)
    }
  
    # MSE per degree finale
    mse_per_degree_fin <- apply(mse_per_degree,2, mean)  
  
  # Calcola la varianza delle predizioni per ciascuna osservazione nei dati di test
  variance_per_point <- apply(predictions_matrix, 1, var)
  
  # Calcola la predizione media per ogni punto nei dati di test
  mean_predictions <- apply(predictions_matrix, 1, mean)
  
  # Calcola il bias al quadrato per ciascun punto
  y_true_test <- f_true(test_data$x)
  bias_squared_per_point <- (-y_true_test + mean_predictions)^2
  
  
  
  
  # Calcola il bias al quadrato e la varianza medi per ciascun grado del polinomio
  bias_squared_per_degree[degree] <- mean(bias_squared_per_point)
  variance_per_degree[degree] <- mean(variance_per_point)
}

# Errore irriducibile (varianza del rumore) costante per tutti i gradi del polinomio
irreducible_error <- sigma^2

total_mse <- irreducible_error + variance_per_degree + bias_squared_per_degree

# ---- Grafico Finale delle Componenti di Errore per i Diversi Gradi del Polinomio ----
# Crea un dataframe per il grafico
plot_data <- data.frame(
  Degree = 1:max_degree,
  MSE = mse_per_degree_fin,
  Bias_Squared = bias_squared_per_degree,
  Variance = variance_per_degree,
  Irreducible_Error = rep(irreducible_error, max_degree),
  Total_MSE = total_mse
  )

# Grafico con ggplot2
ggplot(plot_data, aes(x = Degree)) +
  geom_line(aes(y = MSE, color = "MSE"), size = 1.2) +
  geom_line(aes(y = Bias_Squared, color = "Bias^2"), size = 1.2, linetype = "dashed") +
  geom_line(aes(y = Variance, color = "Varianza"), size = 1.2, linetype = "dotted") +
  geom_line(aes(y = Irreducible_Error, color = "Errore Irriducibile"), size = 1.2, linetype = "dotdash") +
  geom_line(aes(y = Total_MSE, color = "Total_MSE"), size = 1.2, linetype = "dotdash") +
  labs(title = "Contributo di MSE, Bias^2, Varianza e Errore Irriducibile al variare del grado del polinomio",
       x = "Grado del Polinomio",
       y = "Valore",
       color = "Componenti") +
  theme_minimal() +
  theme(legend.position = "right")