### Lab 4 (28 ottobre 2024) ## 1. simulo dati da una cubic spline library(splines) set.seed(101) n <- 150 beta0 <- 1 beta1 <- 0.7 beta2 <- -0.1 beta3 <- 5 beta4 <- -1.5 beta5 <- 0.1 beta6 <- -0.3 #beta7 <- 0.1 beta <- c(beta0, beta1, beta2, beta3, beta4, beta5, beta6) x <- runif(n, -1,1) xlims <- range(x) x.grid <- seq(xlims[1], xlims[2], length.out = n) knots <- c(-0.7,0,0.7) # 3 nodi # metodo con basi di potenza troncate (truncated power basis) basis <- matrix(NA, n, 7) # K + 4 gdl = 7 gdl basis[,1] <- rep(1,n) basis[,2] <- x basis[,3] <- x^2 basis[,4] <- x^3 basis[,5] <- 0*(x<=knots[1])+((x-knots[1])^3)*(x>knots[1]) # truncated basis creation basis[,6] <- 0*(x<=knots[2])+((x-knots[2])^3)*(x>knots[2]) basis[,7] <- 0*(x<=knots[3])+((x-knots[3])^3)*(x>knots[3]) # metodo con b-splines basis.2 <- bs(x, knots = knots, intercept = FALSE) # metodo alternativo per le basi (b-spline) epsilon <- rnorm(n, 0,0.5) # dati con metodo 1 y <- basis%*%beta + epsilon # dati con metodo 2 y.2 <- beta0 + basis.2%*%beta[2:7] + epsilon par(mfrow=c(1,2)) plot(x,y) plot(x, y.2) # uso dati 1 par(mfrow=c(1,1)) plot(x,y) ## 2. Stimo spline cubica, naturale cubica e smoothing splines # cubic spline con funzione 'bs()' e nodi specificati fit <- lm(y ~ bs(x, knots = c( -0.5, 0, 0.5))) pred <- predict(fit , newdata = list(x = x.grid), se = T) lines(x.grid, pred$fit , lwd = 2, col = 4) lines(x.grid , pred$fit + 2 * pred$se, lty = "dashed", col = 4) lines(x.grid , pred$fit - 2 * pred$se, lty = "dashed", col = 4) # cubic spline con funzione 'bs()' e nodi non specificati dall'utente fit.b <- lm(y ~ bs(x, df = 7, intercept = TRUE)) pred.b <- predict(fit.b , newdata = list(x = x.grid), se = T) lines(x.grid, pred.b$fit , lwd = 2, col = "cyan") lines(x.grid , pred.b$fit + 2 * pred$se, lty = "dashed", col = 4) lines(x.grid , pred.b$fit - 2 * pred$se, lty = "dashed", col = 4) # natural cubic spline con funzione 'ns()' (K gdl) fit.2 <- lm(y ~ ns(x, df = 3)) pred.2 <- predict(fit.2 , newdata = list(x = x.grid), se = T) lines(x.grid, pred.2$fit , lwd = 2, col = 2) lines(x.grid , pred.2$fit + 2 * pred.2$se, lty = "dashed", col = 2) lines(x.grid , pred.2$fit - 2 * pred.2$se, lty = "dashed", col = 2) # smoothing spline con criterio per lambda fit.3 <- smooth.spline(x , y , cv = TRUE) lines(fit.3 , col = "green", lwd = 2) # 2 modi alternativi # smoothing spline con lambda = 0 (molto flessibile) fit.3bis <- smooth.spline(x , y , lambda = 0 ) lines(fit.3bis , col = "darkgreen", lwd = 2) # smoothing spline con 200 df fit.4 <- smooth.spline(x , y , df = 200) lines(fit.4 , col = "darkolivegreen", lwd = 2) # smoothing spline con 2 df (molto liscia) fit.5 <- smooth.spline(x , y , df = 2) lines(fit.5 , col = "orange", lwd = 2) legend(-0.2,-2, lty = 1, lwd =2, col=c(4, "cyan", 2,3, "darkgreen", "darkolivegreen", "orange"), c("Cubic", "Cubic 2", "Natural", expression(paste("Smooth, ", lambda[opt])), expression(paste("Smooth, ", lambda == 0)), expression(paste("Smooth, ", df==200)), expression(paste("Smooth, ", df==2))), cex =0.4) ## Consegna: # 1. Ripetere punti precedenti usando # i dati generati con b-splines, y.2 # 2. Ripetere punti precedenti usando # i dati contenuti in splines_data.csv # 3. Esercizio con GAM: produrre codice # commentare. Trovate prima un esempio # con dati 'mcycle' che non serve riportare # nella consegna. La consegna è sui dati # tree. ## 3. GAM library(MASS) library(gam) # the data measure the acceleration of the # rider's head, against time, in a simulated motorcycle crash. # Plot the acceleration against time, and use gam to t a univariate smooth to the data, # selecting the smoothing parameter by GCV. Plot the resulting smooth, with partial resid- # uals, but without standard errors. data("mcycle") plot(mcycle$times, mcycle$accel) y <- mcycle$accel x <- mcycle$times xlims <- range(x) x.grid <- seq(xlims[1], xlims[2], length.out = length(y)) fit.gam <- gam::gam(y ~ gam::s(x, 10) ) summary(fit.gam) pred <- predict(fit.gam, newdata = list(x = x.grid )) lines(x.grid, pred, col=2, lwd=2) plot(fit.gam) # It's possible to overstate the importance of penalization in explaining the improvement of # the penalized regression spline, relative to the polynomial. Use gam to re t an unpenalized # thin plate regression spline to the data, with basis dimension the same as that used for # the polynomial, and again produce a plot for comparison with the previous two results. # 6. GAM (trees data) library(mgcv) glm.2<-glm(Volume ~ Girth + Height, family = Gamma(link=log), data=trees) gam.2 <- mgcv::gam(Volume ~ s(Height) + s(Girth), family=Gamma(link=log), data=trees) summary(gam.2) plot(gam.2,residuals=TRUE,pch=19) plot.gam(gam.2,residuals=TRUE,pch=19) # AIC tra modello GAM e GLM AIC(glm.2, gam.2) sp <- gam.2$sp sp # optimal level of smoothing tuning.scale<-c(1e-5,1e-4,1e-3,1e-2,1e-1,1e0,1e1,1e2, 1e3,1e4,1e5) scale.exponent <- log10(tuning.scale) n.tuning <- length(tuning.scale) edf <- rep(NA,n.tuning) min2ll <- rep(NA,n.tuning) aic <- rep(NA,n.tuning) for (i in 1:n.tuning) { gamobj <- gam(Volume ~ s(Height) + s(Girth), family=Gamma(link=log), data=trees, sp=tuning.scale[i]*sp) min2ll[i]<--2*logLik(gamobj) edf[i]<-sum(gamobj$edf)+1 aic[i]<-AIC(gamobj) } par(mfrow=c(2,2)) plot(scale.exponent,min2ll,type="b",main="-2 log likelihood") plot(scale.exponent,edf,ylim=c(0,70),type="b",main="effective number of parameters") plot(scale.exponent,aic,type="b",main="AIC") # find the minimum min.aic<-1e100 opt.tuning.scale<-NULL for (i in 1:n.tuning) { if (aic[i]