# On the Normal distribution
curve(dnorm(x, mean = 100, sd = 10), xlim = c(70, 130), ylab = "density")
abline(v = 100, lty = "dashed", col = "red")
abline(v = c(90, 110), lty = "dashed", col = "blue")

# Density functions: location shift
curve(dnorm(x, mean = -5, sd = 1), xlim = c(-10, 15), ylab = "density")
curve(dnorm(x, mean = 0, sd = 1), add = TRUE, col = "darkgreen")
curve(dnorm(x, mean = 5, sd = 1), add = TRUE, col = "gold")
curve(dnorm(x, mean = 10, sd = 1), add = TRUE, col = "darkred")

# Density functions: scaling
curve(dnorm(x, mean = 0, sd = 0.5), xlim = c(-10, 10), ylab = "density")
curve(dnorm(x, mean = 0, sd = 1), add = TRUE, col = "darkgreen")
curve(dnorm(x, mean = 0, sd = 2), add = TRUE, col = "gold")
curve(dnorm(x, mean = 0, sd = 5), add = TRUE, col = "darkred")

# Density functions: location shift and scaling
curve(dnorm(x, mean = 0, sd = 1), xlim = c(-10, 10), ylab = "density")
curve(dnorm(x, mean = 0, sd = 2), add = TRUE, col = "darkgreen")
curve(dnorm(x, mean = 5, sd = 1), add = TRUE, col = "gold")
curve(dnorm(x, mean = 5, sd = 2), add = TRUE, col = "darkred")

# Cumulative distribution: location shift and scaling
curve(pnorm(x, mean = 0, sd = 1), xlim = c(-10, 10), ylab = "cumulative distribution function")
curve(pnorm(x, mean = 0, sd = 2), add = TRUE, col = "darkgreen")
curve(pnorm(x, mean = 5, sd = 1), add = TRUE, col = "gold")
curve(pnorm(x, mean = 5, sd = 2), add = TRUE, col = "darkred")


# The dnorm function: graphical check
curve(dnorm(x, mean = 100, sd = 10), xlim = c(80,120), ylab = "density")
dnorm(110, mean = 100, sd = 10)
y <- dnorm(110, mean = 100, sd = 10)
segments(x0 = 110, y0 = 0, x1 = 110, y1 = y, col = "red", lwd = 2)
segments(x0 = 80, y0 = y, x1 = 110, y1 = y, col = "red", lwd = 2)
points(110, y, col = "red", pch = 19)

# The pnorm function: graphical check
curve(pnorm(x, mean = 100, sd = 10), xlim = c(80,120), ylab = "cumulative distribution function")
pnorm(110, mean = 100, sd = 10)

y <- pnorm(110, mean = 100, sd = 10)
segments(x0 = 110, y0 = 0, x1 = 110, y1 = y, col = "red", lwd = 2)
segments(x0 = 80, y0 = y, x1 = 110, y1 = y, col = "red", lwd = 2)
points(110, y, col = "red", pch = 19)

# The qnorm function: graphical check
curve(pnorm(x, mean = 100, sd = 10), xlim = c(80,120), ylab = "cumulative distribution function")
qnorm(c(0.1,0.5,0.9), mean = 100, sd = 10) 
x <- qnorm(c(0.1,0.5,0.9), mean = 100, sd = 10)
segments(x0 = c(0,0,0), y0 = c(0.1, 0.5, 0.9), x1 = x, y1 = c(0.1, 0.5, 0.9), col = "red", lwd = 2)
segments(x0 = x, y0 = 0, x1 = x, y1 = c(0.1, 0.5, 0.9), col = "red", lwd = 2)

# The rnorm funtion: graphical check
set.seed(14)
sim <- rnorm(1000, mean = 170, sd = 10)
hist(sim, breaks = 40, freq = FALSE)
curve(dnorm(x, mean = 170, sd = 10), add = TRUE)

# Confidence Intervals
mu <- 5; sigma <- 2; B <- 1000; n <- 10; alpha <- 0.05
CI <- matrix(0, B, 2)
l <- rep(0, B)

set.seed(23)
q0975 <- qt(1 - alpha/2, n - 1)

for (i in 1 : B){
  x <- rnorm(n, mu, sigma)
  CI[i, ] <- mean(x) + c(-q0975, q0975) * sd(x)/sqrt(n)
  l[i] <- (mu > CI[i,1] & mu < CI[i,2]) 
}

#Empirical coverage probability
mean(l)

# Plot of the (first 100) CIs
plot(1, xlim = c(0,10), ylim = c(0,11), type = "n", 
     xlab = expression(mu), ylab = "", yaxt  = "n", 
     main = paste("100 IC for the mean (unknown variance)"), cex.main = 1.2)
abline(v = mu)

d <- 0
for (i in 1 : 100){
  d <- d + 0.1 
  lines(seq(CI[i, 1], CI[i, 2], length = 100), rep(d, 100), col = (l[i] + 1))
}

# number of intervals (out the 100) including the true parameter value 
sum(l[1 : 100]) 

# Example of hypothesis test
y <- c(497.0621, 497.7690, 497.2631, 498.6688, 494.6227, 496.2185, 498.2635, 502.7199, 497.4339, 494.2883)
t_obs <- (mean(y) - 500)/(sd(y)/sqrt(length(y))) 

curve(dt(x, 9), xlim = c(-5, 5), ylim = c(0, 0.4), 
      main = "Rejection region", col = "blue", 
      lwd = 2, xlab = "",  ylab = expression(t[9]),  yaxs="i")
cord.x0 <- c(-5, seq(-5, qt(0.025, 9), 0.01),  qt(0.025, 9))
cord.y0 <- c(0, dt(seq(-5,qt(0.025, 9), 0.01), 9), 0)
cord.x1 <- c(qt(0.975, 9),seq(qt(0.975, 9), 5, 0.01), 5)
cord.y1 <- c(0, dt(seq(qt(0.975, 9), 5, 0.01), 9), 0)
polygon(cord.x0, cord.y0, col = "darkred", border = NA )
polygon(cord.x1, cord.y1, col = "darkred", border = NA )

abline(v = t_obs, lty = 2, lwd = 2, col = "red")
text(0, 0.2, paste("Accept", expression(H0)))
text(c(-2.7,2.7), 0.08, paste("Reject", expression(H0)), col = "darkred")
text(as.double(t_obs) - 0.15, 0.02, "t", col = "red", cex = 1.2)

curve(dt(x, 9), xlim = c(-5, 5), ylim = c(0, 0.4), 
      main = "P-value", col = "blue", 
      lwd = 2, xlab = "",  ylab = expression(t[9]),  yaxs="i")
cord.x0 <- c(-5, seq(-5, t_obs, 0.01), t_obs)
cord.y0 <- c(0, dt(seq(-5,t_obs, 0.01), 9), 0)
polygon(cord.x0, cord.y0, col = "darkgreen", border = NA )

abline(v = t_obs, lty = 2, lwd = 2, col = "red")
text(as.double(t_obs) - 0.15, 0.02, "t", col = "red", cex = 1.2)


# Likelihood optimization: normal model 
nllik <- function(par, data) {
  mu <- par[1]      
  sigma2 <- par[2]   
  n <- length(data)
  nll <-  0.5 * n * log(sigma2) + sum((data - mu)^2) / (2 * sigma2)
  return(nll)
}

# Generate some data
set.seed(13)
n <- 50
x <- rnorm(n, mean = 10, sd = 2)

# Set the starting values of the algorithm 
start_vals <- c(mu = 0, sigma = 1)

# Fit the model 
fit <- nlminb(start = start_vals,
              objective = nllik,
              data = x,
              lower = c(-Inf, 1e-6))
# Estimated parameters
fit$par

# Check
mean(x)
var(x)*(n-1)/n 
  

# Likelihood optimization: linear regression model 
nllik_lm <- function(par, x, y) {
  beta0 <- par[1]
  beta1 <- par[2]
  sigma2 <- par[3]
  mu <- beta0 + beta1 * x
  n <- length(y)
  nll <-  0.5 * n * log(sigma2) + sum((y - mu)^2) / (2 * sigma2)
  return(nll)
}

# Generate data
n <- 100
x <- runif(n, 0, 10)
beta1 <- 2; beta2 <- 0.8; sigma <- 1.5
y <- beta1 + beta2 * x + rnorm(n, 0, sigma)
plot(x,y)

# Starting values 
start_vals <- c(beta1 = 0, beta2 = 0, sigma2 = 1)
# Fit
fit <- nlminb(
  start = start_vals,
  objective = nllik_lm,
  x = x,
  y = y,
  lower = c(-Inf, -Inf, 1e-6)
)

# LM estimates 
fit$par

# Check 
fitLM <- lm(y ~ x)
fitLM$coefficients

# sigma2 estimate
sum(fitLM$residuals^2)/n

# Test for the nullity of the single coefficients
library(ISLR)
data("Credit")
lmFit <- lm(Balance ~ Limit + Student, data = Credit)
summary(lmFit)$coefficients

X <- model.matrix(Balance ~ Limit + Student, data = Credit)
n <- nrow(X)
p <- ncol(X)
hat_s2 <- sum(residuals(lmFit)^2)/(nrow(Credit) - p)

# observed test statistics
tval <- lmFit$coefficients/(sqrt(hat_s2 * diag(solve(t(X) %*% X))))
tval

# p-values
2*pt(-abs(tval), n - p)

# Test for the nullity of all the regression coefficients 
hat_sigma2 <- sum(residuals(lmFit)^2)/n
tilde_sigma2 <- var(Credit$Balance)*(n-1)/n

# observed test statistics
fval <- ((tilde_sigma2 - hat_sigma2)/(p-1))/((hat_sigma2)/(n-p))
fval
summary(lmFit)$fstatistic


# p-value
pf(fval, p-1, n-p, lower = FALSE)  
summary(lmFit)