n=200
x=sort(runif(n,0,1))
x=seq(0,1,length=n)
truef=function(x) sin(2*pi*x)
my=truef(x)
signaltonoise=2
simula=function(signalnoise=signaltonoise){
y=rnorm(n,my,sd=sd(my)/signalnoise)
return(data.frame(x=x,y=y,truef=my))
}
d=simula()
plot(d$x,d$y)
library(mgcv)
## Loading required package: nlme
## This is mgcv 1.9-0. For overview type 'help("mgcv-package")'.
fit=gam(y~s(x),data=d)
fit=gam(y~s(x,k=3),data=d)
fit=gam(y~s(x,k=10),data=d)
plot(fit)
plot(fit,residuals=TRUE,pch=20)
Some more options for {}
plot(fit,shade=TRUE,seWithMean = TRUE,scale=0)
gam.check(fit)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
## The RMS GCV score gradient at convergence was 2.09908e-05 .
## The Hessian was positive definite.
## Model rank = 10 / 10
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(x) 9.0 5.4 1.1 0.9
Increase k to see
fit=gam(y~s(x,k=10),data=d)
fit=gam(y~s(x,k=20),data=d)
Check for unmodeled patterns in the residuals
fit=gam(y~s(x,k=3),data=d)
rsd <- residuals(fit,type="deviance")
fit.res=gam(rsd~s(x,k=20)-1,data=d,select=TRUE)
fit=gam(y~s(x,k=20),data=d)
rsd <- residuals(fit,type="deviance")
fit.res=gam(rsd~s(x,k=20)-1,data=d,select=TRUE)
fit1=gam(y~s(x,k=20),data=d,method="GCV.Cp")
fit2=gam(y~s(x,k=20),data=d,method="REML")
fit3=gam(y~s(x,k=20),data=d,method="ML")
plot(d$x,d$y)
curve(predict(fit1,newdata=data.frame(x=x)),add=TRUE)
curve(predict(fit2,newdata=data.frame(x=x)),add=TRUE,col="red")
curve(predict(fit3,newdata=data.frame(x=x)),add=TRUE,col="green")
summary(fit)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## y ~ s(x, k = 20)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0318 0.0262 1.214 0.226
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(x) 5.731 7.137 106.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.795 Deviance explained = 80.1%
## GCV = 0.14209 Scale est. = 0.13731 n = 200
anova(fit)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## y ~ s(x, k = 20)
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(x) 5.731 7.137 106.4 <2e-16
beta.hat=coef(fit)
beta.vcov=vcov(fit)
beta.vcov.freq=vcov(fit,freq=TRUE)
dat <- gamSim(1,n=400,dist="normal",scale=2)
## Gu & Wahba 4 term additive model
fit=gam(y~s(x0)+s(x1)+s(x2)+s(x3),data=dat)
par(mfrow=c(1,4))
plot(fit)
par(mfrow=c(1,4))
plot(fit,scale=0)
vis.gam(fit,view=c("x1","x2"),theta=30,se=2)