############################
# R code Lab 1: 18/10/2021 #
############################
library(MASS) 
library(skellam) 

####################################
# CLT application: Waterpolo match #
####################################
p1 <- 0.5; p2 <- 0.7; n1 <- 20; n2 <- 20;
mean_w <- n1*p1 - n2*p2
sigma2_w <- n1*p1*(1 - p1) + n2*p2*(1 - p2) 
Prob_win_Posillipo <- pnorm(0, mean = mean_w, 
                            sd = sqrt(sigma2_w), 
                            lower.tail = FALSE)
Prob_win_Posillipo

#Alternatively
Prob_win_Posillipo <- 1 - pnorm(0, mean = mean_w, 
                                sd = sqrt(sigma2_w))
Prob_win_Posillipo
#[1] 0.09362452

##################
# A summary plot #
##################
plot(1, ylim = c(0, 0.3), xlim = c(-12,20), 
     ylab = "f(x)", xlab = "x", type = "n")
points(0:n1, dbinom(0:n1, n1, p1), pch = 21, bg=1)
text(10.25, 0.20, "X", cex = 2, col="black")
points(0:n2, dbinom(0:n2, n2, p2), pch = 21, bg=2)
text(14.25, 0.23, "Y", cex = 2, col="red")

curve(dnorm(x, p1*n1 - p2*n2, 
            sqrt(n1*p1*(1 - p1) + n2*p2*(1 - p2))), 
      add = TRUE, col = "blue", lty= "dashed")
text(-4.0, 0.16, "W", cex = 2, col= 4)
segments(0,0, 0, dnorm(0, mean_w, sqrt(sigma2_w)), 
         col = "black")

##############################################
# Waterpolo games: alternative specification #
##############################################
lambda1 <- 5; lambda2 <- 7.5

Prob_win_Posillipo_S <- pskellam(0, lambda1, lambda2, 
                                 lower.tail=FALSE)
Prob_win_Posillipo_S
#[1] 0.1961603

##################
# A summary plot #
##################
plot(1, ylim = c(0,0.3), xlim = c(-12,20), 
     ylab = "f(x)", xlab = "x", type = "n")
points(0:n1, dpois(0:n1, lambda1), pch = 21, bg=1)
text(4.5, 0.25, "X", cex = 2, col="black")
points(0:n2, dpois(0:n2, lambda2), pch = 21, bg=2)
text(7, 0.20, "Y", cex = 2, col="red")
points(-12:20, dskellam(-12:20, lambda1, lambda2),
       col = "blue")
text(-3.0, 0.16, "W", cex = 2, col= 4)
segments(0, 0, 
         0, dskellam(0, lambda1, lambda2), col = "black")


###########################
# Inverse sampling method #
###########################

###############################
# Generation from exponential #
###############################
B <- 10000
lambda <- 2
x <- rep(0, B)

for(i in 1:B){
  x[i] <- -log(runif(1,0,1))/lambda
}

par(mfrow = c(1,2))
hist.scott(x,  xlab = "x", prob = TRUE)
curve(dexp(x, rate = lambda), from = 1e-16, 
      col = "red", add = TRUE)

plot(ecdf(x))
curve(pexp(x, rate = lambda), col = "red", add = TRUE)

##########################################
# Generation from mimimum of exponential #
##########################################
B <- 10000
lambda <- 2
n <- 10
x <- rep(0, n)
z <- rep(0, B)

for(i in 1:B){
  x <- -log(runif(n,0,1))/lambda
  z[i] <- min(x)
}

par(mfrow = c(1,2))
hist.scott(z, xlab="z", prob = TRUE)
curve(dexp(x, rate = n*lambda), from = 1e-16, 
      col = "red", add = TRUE)

plot(ecdf(z), xlab="z")
curve(pexp(x, rate = n*lambda), col = "red", add = TRUE)

##################################################
# It's your turn: generate from Gamma(n, lambda) #
##################################################


######################################
# Normal and Chi-square distribution #
######################################
n <- 10 # n observations
B <- 10000 # B simulations
chisq.mc <- rep(0, B)

set.seed(1234)

for(j in 1:B){
  x <- rnorm(10, 0, 1)
  chisq.mc[j] <-  sum(x^2) 
}

par(mfrow = c(1, 2))
hist.scott(chisq.mc, prob = TRUE, xlab = "x")
curve(dchisq(x, df = n), add = TRUE, col = "red", lwd = 1.5)

plot(ecdf(chisq.mc), xlim = c(0,25))
curve(pchisq(x, df = n), add = TRUE, col = "red", lwd = 1)

#####################################################
# It's your turn: compare sample mean, variance and #
#                 quantile of order 0.01, 0.05, via #
#                 MC simulation with true values    #
#####################################################


###################################
# Distribution of the sample mean #
###################################
n <- 30 # n observations
B <- 1000 # B simulations

set.seed(1234)

# sample generations
samples <- array(0, dim = c(n, 3, B))
for(i in 1:B){
  samples[, 1, i] <-rnorm(n, 0, 1)
  samples[, 2, i] <-rt(n, df = 3)
  samples[, 3, i] <-runif(n, 0, 1)
}

#Sample statistics
samples_stat <- array(0, dim = c(2, 3, B))
for(j in 1:3){
  #sample mean
  samples_stat[1, j, ] <- apply(samples[,j,], 2, mean)
  #sample variance
  samples_stat[2, j, ] <- apply(samples[,j,], 2, var)
}
##########
# N(0,1) #
##########
par(mfrow = c(1, 3))
hist.scott(samples_stat[1,1,], 
           prob =TRUE, xlab = "x", main = "N(0,1)")
curve(dnorm(x, 0, 1/sqrt(n)), col = "red", 
      lwd = 2, add = TRUE)


########
# t(3) #
########
hist.scott(samples_stat[1,2,], prob =TRUE, 
           xlab = "x", main = "t3")
curve(dnorm(x, 0, sqrt(3/n)), col = "red",
      lwd = 2, add = TRUE)
########## 
# U(0,1) #
##########
hist.scott(samples_stat[1,3,], prob =TRUE, 
           xlab = "x", main = "U(0,1)")
curve(dnorm(x, 1/2, 1/sqrt(12*n)), col = "red", 
      lwd = 2, add = TRUE)

#######################################
# Distribution of the sample variance #
#######################################
sigma2_n <- 1
sigma2_t <- 3 #3/(3-2)
sigma2_u <- 1/12

par(mfrow=c(1,3))
##########
# N(0,1) #
##########
hist.scott(samples_stat[2,1, ], prob=TRUE, 
           xlab = expression(s^2), main = "N(0,1)")
curve((n - 1)/sigma2_n * dchisq(x * (n-1)/sigma2_n, 
                                df = n - 1),
      add=TRUE, col="red", lwd=2)

########
# t(3) #
########
hist.scott(samples_stat[2,2, ], prob=TRUE, 
           xlab = expression(s^2), main = "t(3)", 
           xlim = c(0,25))

curve((n - 1)/sigma2_t * dchisq(x * (n - 1)/sigma2_t, 
                              df = n - 1),
      add=TRUE, col="red", lwd=2)
var_sample_var <- var(samples_stat[2,2, ])
curve(2 * sigma2_t /var_sample_var * dchisq(x * 2 * sigma2_t/var_sample_var , 
                                            df = (2*sigma2_t^2/var_sample_var)),
      add=TRUE, col="green", lwd=2)

##########
# U(0,1) #
##########
hist.scott(samples_stat[2,3, ],  prob=TRUE, 
           xlab = expression(s^2), main = "U(0,1)")
curve((n-1)/sigma2_u *dchisq(x * (n-1)/sigma2_u, df = n - 1), add=TRUE, col="red", lwd=2)

var_sample_var <- var(samples_stat[2,3, ])
curve(2*sigma2_u/var_sample_var * dchisq(x * 2 * sigma2_u/var_sample_var, 
                                         df = (2 * sigma2_u^2/var_sample_var)), 
      add=TRUE, col="green", lwd=2)

##########################################
# It's your turn: biased sample variance #
##########################################