######################################
# Lab 3 - Part 1, 10/11/2021: R code #
######################################

#########################################################
# Plots of probability density functions of the Weibull #
#########################################################
#shape = 0.5; scale = 1
curve(dweibull(x, shape=0.5, scale = 1), from = 0, to = 3, 
      ylab=expression("f(y;" ~ gamma ~"," ~ beta ~")"), xlab="y")
#shape = 1; scale = 1
curve(dweibull(x, shape=1, scale = 1), add=TRUE, col = "red")
#shape = 1; scale = 2
curve(dweibull(x, shape=1, scale = 2), add=TRUE, col = "blue")
#shape = 1.5; scale = 1
curve(dweibull(x, shape=1.5, scale = 1), add=TRUE, col = "green")
#shape = 5; scale = 1
curve(dweibull(x, shape=5, scale = 1), add=TRUE, col = "purple")
# add a legend to plot
legend("topright", legend=c(expression(gamma == 0.5 * ";" ~ beta == 1),
                            expression(gamma == 1 * ";" ~ beta == 1),
                            expression(gamma == 1 * ";" ~ beta == 2),
                            expression(gamma == 1.5 * ";" ~ beta == 1),
                            expression(gamma == 5 * ";" ~ beta == 1)),
       col = c("black", "red", "blue", "green", "purple"), lty = rep(1,5),
       cex = 0.8, box.lty = 0)

###########################
# Log-likelihood function #
###########################
# Negative log-likelihood function:

# by using  dweibull()
n_logLik_Weib <- function(param, data){
 -sum(dweibull(data, shape = param[1], scale = param[2], log = TRUE))
}

# by hand (sum(log(y))) is an additive constant
n_logLik_Weib2 <- function(param, data){
 n <- length(data)
 res <- n*log(param[1]) - n*param[1]*log(param[2]) + 
        param[1]*sum(log(data)) - sum((data/param[2])^param[1])#-sum(log(data))
 return(-res)
}


# We generate the data
set.seed(1234)
y <- rweibull(15, shape = 7, scale = 155)

#######################################
# Plot of the relative log-likelihood #
#######################################

# Define a parameter grid to plot the log-likelihood
gamma <- seq(0.1, 15, length = 100) 
beta <- seq(100, 200, length = 100) 
parvalues <- expand.grid(gamma, beta)

# compute the negative log-likelihood for the values of the grid
n_llikvalues <- apply(parvalues, 1, n_logLik_Weib, data = y)
# return to the log-likelihood and store the results in a matrix 
llikvalues <- matrix(-n_llikvalues, nrow = length(gamma), ncol = length(beta),
                     byrow=FALSE)

par(mfrow=c(1,2))
# contour plot 
# Define the confidence levels (the level of the confidence region)
conf_levels <- c(0,0.5, 0.75, 0.9, 0.95, 0.99)

# Note: maybe at the blackboard I forgot the minus sign
#       when I asked you the asymptotic distribution 
#       of the log-likelihood ratio statistic
 

contour(gamma, beta, llikvalues - max(llikvalues), 
        levels = -qchisq(conf_levels, 2)/2,
        xlab = expression(gamma), 
        ylab = expression(beta),
        labels = as.character(conf_levels))
title("Weibull relative log likelihood")

# image plot
image(gamma, beta, llikvalues - max(llikvalues), zlim = c(-6,0),
      col = terrain.colors(20), xlab = expression(gamma),
      ylab = expression(beta))
title("Weibull relative log likelihood")


# Score function (derivatives of log-likelihood w.r.t. gamma 
#                 after beta_gamma substitution)

logLik_equation <- function(x, data){
 n <- length(data) 
 res <- n/x + sum(log(data)) - n*(sum(data^x*log(data))/(sum(data^x)))
 return(res)
}

#MLE gamma
gammahat <- uniroot(logLik_equation, c(1e-5, 15), data = y)$root
gammahat 
#[1] 7.289321

#MLE beta
betahat <- mean(y^gammahat)^(1/gammahat) 
betahat 
#[1] 155.3338

# Check if the score is (0,0) in the MLE by using numerical first derivative
library(numDeriv)
grad(c(gammahat, betahat), func=n_logLik_Weib, data=y)
#[1]  1.564041e-05 -2.112082e-12


n <- length(y)
# observed information matrix evaluated in the MLE 
jhat <- matrix(NA, 2, 2)
jhat[1,1] <- n/gammahat^2 + sum((y/betahat)^gammahat*(log(y/betahat)^2)) 
jhat[1,2] <- jhat[2,1] <- n/betahat - sum(y^gammahat/(betahat^(gammahat+1))*(gammahat*log(y/betahat)+1))
jhat[2,2] <- -n*gammahat/(betahat^2) + gammahat*(gammahat+1)*sum(y^gammahat)/(betahat^(gammahat+2)) 

jhat
#            [,1]        [,2]
#[1,]  0.60641358 -0.04736006
#[2,] -0.04736006  0.03303187

# Check the hessian in the MLE by using the hessian function of the numDeriv package 
hessian(c(gammahat, betahat), func=n_logLik_Weib, data=y)
#            [,1]        [,2]
#[1,]  0.60641358 -0.04736006
#[2,] -0.04736006  0.03303187

# Estimate of the std.err of the MLE estimators
mle.se <- sqrt(diag(solve(jhat)))
#[1] 1.362709 5.838764

################################################
# Numerical optimization of the log-likelihood #
################################################
# Log-likelihood optimization 

# By using nlm function 
#Staring values near to MLE estimate
weib.nlm_start1 <- nlm(n_logLik_Weib, c(5,160), data=y, hessian = TRUE)
weib.nlm_start1
#$minimum
#[1] 67.4824
#
#$estimate
#[1]   7.289283 155.333712
#
#$gradient
#[1]  1.111246e-06 -1.282633e-07
#
#$hessian
#            [,1]        [,2]
#[1,]  0.60639337 -0.04728086
#[2,] -0.04728086  0.03299776
#
#$code
#[1] 1
#
#$iterations
#[1] 10

#Staring values far to MLE estimate
weib.nlm_start2 <- nlm(f = n_logLik_Weib, p = c(0.1, 0.1), data = y, hessian = TRUE)
weib.nlm_start2

# By using optim function 
#Staring values near to MLE estimate
weib.optim_start1 <- optim(par = c(5, 160), fn= n_logLik_Weib, data = y, hessian = TRUE,
                         method = "L-BFGS-B", lower = rep(1e-7, 2), upper = rep(Inf,2))
weib.optim_start1 
#Staring values far to MLE estimate
weib.optim_start2 <- optim(par = c(0.1, 0.1), fn = n_logLik_Weib, data = y, hessian = TRUE,
                         method = "L-BFGS-B", lower = rep(1e-7, 2), upper = rep(Inf,2))
weib.optim_start2

# What happens if we using c(0,0) as guesstimate 
weib.nlm_start3 <- nlm(f = n_logLik_Weib, p = c(0, 0), data = y, hessian = TRUE)
weib.nlm_start3


# Reparameterization
theta <- function(omega) exp(omega)

# Negative log-likelihood
n_logLik_Weib_rep <- function(param, data) n_logLik_Weib(theta(param), data)

# Optimize the log-likelihood function by using nlm 
# (also here the are some warnings but the algorithm works)
weib_nlm_start3_rep <- nlm(f = n_logLik_Weib_rep, p = c(0, 0), data = y)
weib_nlm_start3_rep

# Check
theta(weib_nlm_start3_rep$estimate)

weib.nlm_start1$estimate

# Try to use also nlminb 


# Working on the reparameterization
# Log-likelihood optimization by using optim:
# starting values = c(1,20), quasi-Newton Method
weib_optim_start3_rep_qn <- optim(par = c(1, 20), fn = n_logLik_Weib_rep,
                             method = "BFGS", data=y)


# starting values = c(1,6), conjugate-gradient Method
weib_optim_start3_rep_CG <- optim(par = c(1, 6), fn = n_logLik_Weib_rep,
                             method = "CG", data = y)

# check
theta(weib_optim_start3_rep_qn$par)

theta(weib_optim_start3_rep_CG$par)

weib.nlm_start1$estimate

# Check the hessian
optimHess(theta(weib_optim_start3_rep_qn$par), n_logLik_Weib, data = y)


jhat


######################
# Profile likelihood #
######################

weib.y.mle<- optim(par = c(1,1), fn = n_logLik_Weib, hessian = TRUE,
                   method = "L-BFGS-B", data = y,
                   lower = rep(1e-7, 2), upper = rep(Inf, 2) )

# We can visualize the profile log-likelihood directly 
# in the contour plot of the log-likelihood
gamma <- seq(0.1, 15, length = 100)
beta <- seq(100, 200, length = 100)
parvalues <- expand.grid(gamma, beta)
llikvalues <- apply(parvalues, 1, n_logLik_Weib, data = y)
llikvalues <- matrix(-llikvalues, nrow = length(gamma),
                     ncol = length(beta), byrow = FALSE)

conf.levels <- c(0, 0.5, 0.75, 0.9, 0.95, 0.99)

par(mfrow=c(1,1))
contour(gamma, beta, llikvalues - max(llikvalues),
        levels = -qchisq(conf.levels, 2)/2,
        xlab = expression(gamma),
        ylab = expression(beta),
        labels = as.character(conf_levels))


beta.gamma <- sapply(gamma, function(x) mean(y^x)^(1/x))
# line of the constrained estimate
lines(gamma, beta.gamma, lty = "dashed", col = 2)
points(weib.y.mle$par[1],weib.y.mle$par[2])
title("Weibull relative log-likelihood with profile log-likelihood")

# profile log-likelihood 
n_logLik_Weib_profile <- function(gamma, data){
 beta.gamma <- mean(data^gamma)^(1/gamma)
 n_logLik_Weib(c(gamma, beta.gamma), data)
}

# vectorize the function with respect to gamma  
n_logLik_Weib_profile_v <- Vectorize(n_logLik_Weib_profile, 'gamma')

# Vectorized function allows to compute the function 
# in a vector of points and get the corresponding output results. So 
n_logLik_Weib_profile(c(5,6,7), data = y) # wrong

#since return the log-likelihood evaluated in c(5,6)
n_logLik_Weib(c(5,6), data=y)

n_logLik_Weib_profile_v(c(5,6,7), data = y) # correct

# Plot the relative profile log-likelihood  
plot(function(x) -n_logLik_Weib_profile_v(x, data = y) + weib.y.mle$value,
     from = 0.1, to =15, xlab = expression(gamma), 
     ylab = "Profile relative log-likelihood", ylim =  c(-8,0))

# choose the confidence level
conf.level <- 0.95
#Obtaining the upper and the lower limits of the deviance confidence interval
abline(h = -qchisq(conf.level,1)/2, lty = "dashed", col = "red")

# Find the numerical values by using the uniroot function
lrt.ci1 <- uniroot(function(x) -n_logLik_Weib_profile_v(x, data = y) + weib.y.mle$value +
                   qchisq(conf.level,1)/2,
                   c(1e-7,weib.y.mle$par[1]))$root
lrt.ci <- c(lrt.ci1, uniroot(function(x) -n_logLik_Weib_profile_v(x, data = y) +  weib.y.mle$value +
                   qchisq(conf.level,1)/2,
                   c(weib.y.mle$par[1],15))$root)

# Plotting some quantities in the relative profile log-likelihood plot 
segments(lrt.ci[1], -qchisq(conf.level,1)/2,
         lrt.ci[1], -n_logLik_Weib_profile_v(lrt.ci[1], data = y), col = "red")
segments(lrt.ci[2], - qchisq(conf.level,1)/2,
         lrt.ci[2], -n_logLik_Weib_profile_v(lrt.ci[2], data = y), col = "red")

points(lrt.ci[1],-qchisq(conf.level,1)/2, pch=16, col="red", cex=1.5)
points(lrt.ci[2],-qchisq(conf.level,1)/2, pch=16, col="red", cex=1.5)

segments(lrt.ci[1], -8.1, lrt.ci[2], -8.1, col = "red", lty = 1, lwd =2 )
text(7,-7.5, "95% Deviance CI", col = 2)