## Fit di curve deterministiche

#setwd("~/Dropbox/StatEcon/Materiali_DiFonzoLisi/")

## esempio "Pantaloni sportivi"
pant <- read.table("fig.2_18.dat", h=F)
plot(ts(pant, start=1971))

pant <- pant[,1]
ttrend <- 1:length(pant)

dati <- data.frame(pant=pant, ttrend=ttrend)

## curva esponenziale, criterio dei minimi quadrati non lineari (NLS)
expmod <- nls(pant ~ I(a*exp(b*ttrend)), dati,
              start=list(a=1, b=0), trace=TRUE)

## idem, curva logistica
logismod <- nls(pant ~ I(a/(1+b*exp(-k*ttrend))), dati,
                start=list(a=2000, b=12, k=0.3), trace=TRUE)
## ok!
lines(ts(fitted(logismod), start=1971), col="red", lty=2)

## "worse" starting values:
logismod <- nls(pant ~ I(a/(1+b*exp(-k*ttrend))), dati,
                start=list(a=10000, b=10, k=0.5), trace=TRUE)
## still ok, 10 it.

## "really bad" starting values:
logismod <- nls(pant ~ I(a/(1+b*exp(-k*ttrend))), dati,
                start=list(a=5000, b=1, k=0.5), trace=TRUE)
## singular gradient

## "a mano":
## criterio dei MQO: write "sum of squares" function
mylogis <- function(parms, y, ttrend) {
  a <- parms[1]
  b <- parms[2]
  k <- parms[3]
  yhat <- a/(1+b*exp(-k*ttrend))
  uhat <- y - yhat
  return(sum(uhat^2))
}

## Valori di partenza ragionevoli: un metodo del gradiente
## trova la soluzione

## reasonable starting values, Nelder-Mead
optim(par=c(30000, 4, 0.5),
      fn=mylogis,
      y=pant, ttrend=ttrend)

## Valori di partenza mal scelti: l'ottimizzazione vincolata
## "aiuta"

## bad starting values, box constraints
optim(par=c(5000, 0, 0.5),
      fn=mylogis,
      method="L-BFGS-B",
      lower=c(5000, 0, 0),
      upper=c(100000, 100, 10),
      y=pant, ttrend=ttrend)

#######################################

## Gompertz curve

gompmod <- nls(pant ~ I(a*exp(-b*exp(-k*ttrend))), dati,
                start=list(a=35000, b=1, k=0.5), trace=TRUE)

mygomp <- function(parms, y, ttrend) {
  a <- parms[1]
  b <- parms[2]  
  k <- parms[2]
  yhat <- a*exp(-b*exp(-k*ttrend))
  uhat <- y - yhat
  return(sum(uhat^2))
}

myopt <- optim(par=c(35000, 1, 0.5),
      fn=mygomp,
      method="L-BFGS-B",
      lower=c(25000, 0, 0),
      upper=c(50000, 100, 10),
      y=pant, ttrend=ttrend)

## fitted values:
mypar <- myopt$par
my.yhat <- mypar[1]*exp(-mypar[2]*exp(-mypar[3]*ttrend))

plot(pant, type="l")
lines(my.yhat, col="green3", lty=2)
#######################################

## (try to-) fit mod. exponential
mymexp <- function(parms, y, ttrend) {
  b0 <- parms[1]
  b1 <- parms[2]  
  b2 <- parms[2]
  yhat <- b0 + b1*exp(b2*ttrend)
  uhat <- y - yhat
  return(sum(uhat^2))
}

myopt <- optim(par=c(5000, 0, 0.5),
      fn=mymexp,
      method="L-BFGS-B",
      lower=c(5000, -Inf, -100),
      upper=c(35000, 0, 100),
      y=pant, ttrend=ttrend)

## converges! To what??
## fitted values:
mypar <- myopt$par
my.yhat <- mypar[1] + mypar[2]*exp(mypar[3]*ttrend)

plot(pant, type="l")
lines(my.yhat, col="green3", lty=2)

#####################################

## Proviamo ad applicare il criterio dei MQ al modello lineare
## (esiste una soluzione ottimale in forma chiusa, OLS, ma
## vediamo se la ritroviamo)

## numerical estimate of linear OLS:

## least squares function
mylin <- function(parms, y, ttrend) {
  b0 <- parms[1]
  b1 <- parms[2]  
  yhat <- b0 + b1*ttrend
  uhat <- y - yhat
  return(sum(uhat^2))
}

myopt <- optim(par=c(0, 0),
               fn=mylin,
               y=pant, ttrend=ttrend)

## fitted values:
my.yhat.l <- myopt$par[1] + myopt$par[2]*ttrend

plot(pant, type="l")
lines(my.yhat.l, col="green3", lty=2)

## OLS fit (closed-form solution)
abline(lm(pant~ttrend), col="orange", lty=3)

## compare parameters:
myopt$par
coef(lm(pant~ttrend))

##################################

## Esempio "cellulari in Portogallo" (p. 76)

cell <- read.table("fig.2_19.dat", h=F)
cell <- cell[,1]
plot(ts(cell, start=c(1995,4)))

## (log-)linear trend (Tab. 2.7)
coef(lm(log(cell)~ttrend))

## Gompertz curve
ttrend <- 1:length(cell)
dati <- data.frame(cell=cell, ttrend=ttrend)
gompmod <- nls(cell ~ I(a*exp(-b*exp(-k*ttrend))), dati,
               start=list(a=5000, b=1, k=0.1), trace=TRUE)

mygomp <- function(parms, y, ttrend) {
  a <- parms[1]
  b <- parms[2]  
  k <- parms[2]
  yhat <- a*exp(-b*exp(-k*ttrend))
  uhat <- y - yhat
  return(sum(uhat^2))
}

myopt <- optim(par=c(12000, 5, 0.1),
               fn=mygomp,
               method="CG", #L-BFGS-B",
               #lower=c(0, 0, 0),
               #upper=c(0, 100, 10),
               y=cell, ttrend=ttrend,
               control=list(trace=1))

## fitted values:
mypar <- myopt$par
my.yhat <- mypar[1]*exp(-mypar[2]*exp(-mypar[3]*ttrend))

plot(cell, type="l")
lines(my.yhat, col="green3", lty=2)

## try another optimizer
myopt <- optim(par=c(12000, 5, 0.1),
               fn=mygomp,
               method="SANN", #L-BFGS-B",
               #lower=c(0, 0, 0),
               #upper=c(0, 100, 10),
               y=cell, ttrend=ttrend,
               control=list(trace=1, maxit=2000000))
mypar <- myopt$par
my.yhat <- mypar[1]*exp(-mypar[2]*exp(-mypar[3]*ttrend))

plot(cell, type="l")
lines(my.yhat, col="green3", lty=2)

## logistic model
logismod2 <- nls(cell ~ I(a/(1+b*exp(-k*ttrend))), dati,
               start=list(a=10000, b=30, k=0.1), trace=TRUE)

plot(cell, type="l")
lines(fitted(logismod2), col="blue", lty=2)