############################ # R code Lab 1: 18/10/2021 # ############################ library(MASS) library(skellam) #################################### # CLT application: Waterpolo match # #################################### p1 <- 0.5; p2 <- 0.7; n1 <- 20; n2 <- 20; mean_w <- n1*p1 - n2*p2 sigma2_w <- n1*p1*(1 - p1) + n2*p2*(1 - p2) Prob_win_Posillipo <- pnorm(0, mean = mean_w, sd = sqrt(sigma2_w), lower.tail = FALSE) Prob_win_Posillipo #Alternatively Prob_win_Posillipo <- 1 - pnorm(0, mean = mean_w, sd = sqrt(sigma2_w)) Prob_win_Posillipo #[1] 0.09362452 ################## # A summary plot # ################## plot(1, ylim = c(0, 0.3), xlim = c(-12,20), ylab = "f(x)", xlab = "x", type = "n") points(0:n1, dbinom(0:n1, n1, p1), pch = 21, bg=1) text(10.25, 0.20, "X", cex = 2, col="black") points(0:n2, dbinom(0:n2, n2, p2), pch = 21, bg=2) text(14.25, 0.23, "Y", cex = 2, col="red") curve(dnorm(x, p1*n1 - p2*n2, sqrt(n1*p1*(1 - p1) + n2*p2*(1 - p2))), add = TRUE, col = "blue", lty= "dashed") text(-4.0, 0.16, "W", cex = 2, col= 4) segments(0,0, 0, dnorm(0, mean_w, sqrt(sigma2_w)), col = "black") ############################################## # Waterpolo games: alternative specification # ############################################## lambda1 <- 5; lambda2 <- 7.5 Prob_win_Posillipo_S <- pskellam(0, lambda1, lambda2, lower.tail=FALSE) Prob_win_Posillipo_S #[1] 0.1961603 ################## # A summary plot # ################## plot(1, ylim = c(0,0.3), xlim = c(-12,20), ylab = "f(x)", xlab = "x", type = "n") points(0:n1, dpois(0:n1, lambda1), pch = 21, bg=1) text(4.5, 0.25, "X", cex = 2, col="black") points(0:n2, dpois(0:n2, lambda2), pch = 21, bg=2) text(7, 0.20, "Y", cex = 2, col="red") points(-12:20, dskellam(-12:20, lambda1, lambda2), col = "blue") text(-3.0, 0.16, "W", cex = 2, col= 4) segments(0, 0, 0, dskellam(0, lambda1, lambda2), col = "black") ########################### # Inverse sampling method # ########################### ############################### # Generation from exponential # ############################### B <- 10000 lambda <- 2 x <- rep(0, B) for(i in 1:B){ x[i] <- -log(runif(1,0,1))/lambda } par(mfrow = c(1,2)) hist.scott(x, xlab = "x", prob = TRUE) curve(dexp(x, rate = lambda), from = 1e-16, col = "red", add = TRUE) plot(ecdf(x)) curve(pexp(x, rate = lambda), col = "red", add = TRUE) ########################################## # Generation from mimimum of exponential # ########################################## B <- 10000 lambda <- 2 n <- 10 x <- rep(0, n) z <- rep(0, B) for(i in 1:B){ x <- -log(runif(n,0,1))/lambda z[i] <- min(x) } par(mfrow = c(1,2)) hist.scott(z, xlab="z", prob = TRUE) curve(dexp(x, rate = n*lambda), from = 1e-16, col = "red", add = TRUE) plot(ecdf(z), xlab="z") curve(pexp(x, rate = n*lambda), col = "red", add = TRUE) ################################################## # It's your turn: generate from Gamma(n, lambda) # ################################################## ###################################### # Normal and Chi-square distribution # ###################################### n <- 10 # n observations B <- 10000 # B simulations chisq.mc <- rep(0, B) set.seed(1234) for(j in 1:B){ x <- rnorm(10, 0, 1) chisq.mc[j] <- sum(x^2) } par(mfrow = c(1, 2)) hist.scott(chisq.mc, prob = TRUE, xlab = "x") curve(dchisq(x, df = n), add = TRUE, col = "red", lwd = 1.5) plot(ecdf(chisq.mc), xlim = c(0,25)) curve(pchisq(x, df = n), add = TRUE, col = "red", lwd = 1) ##################################################### # It's your turn: compare sample mean, variance and # # quantile of order 0.01, 0.05, via # # MC simulation with true values # ##################################################### ################################### # Distribution of the sample mean # ################################### n <- 30 # n observations B <- 1000 # B simulations set.seed(1234) # sample generations samples <- array(0, dim = c(n, 3, B)) for(i in 1:B){ samples[, 1, i] <-rnorm(n, 0, 1) samples[, 2, i] <-rt(n, df = 3) samples[, 3, i] <-runif(n, 0, 1) } #Sample statistics samples_stat <- array(0, dim = c(2, 3, B)) for(j in 1:3){ #sample mean samples_stat[1, j, ] <- apply(samples[,j,], 2, mean) #sample variance samples_stat[2, j, ] <- apply(samples[,j,], 2, var) } ########## # N(0,1) # ########## par(mfrow = c(1, 3)) hist.scott(samples_stat[1,1,], prob =TRUE, xlab = "x", main = "N(0,1)") curve(dnorm(x, 0, 1/sqrt(n)), col = "red", lwd = 2, add = TRUE) ######## # t(3) # ######## hist.scott(samples_stat[1,2,], prob =TRUE, xlab = "x", main = "t3") curve(dnorm(x, 0, sqrt(3/n)), col = "red", lwd = 2, add = TRUE) ########## # U(0,1) # ########## hist.scott(samples_stat[1,3,], prob =TRUE, xlab = "x", main = "U(0,1)") curve(dnorm(x, 1/2, 1/sqrt(12*n)), col = "red", lwd = 2, add = TRUE) ####################################### # Distribution of the sample variance # ####################################### sigma2_n <- 1 sigma2_t <- 3 #3/(3-2) sigma2_u <- 1/12 par(mfrow=c(1,3)) ########## # N(0,1) # ########## hist.scott(samples_stat[2,1, ], prob=TRUE, xlab = expression(s^2), main = "N(0,1)") curve((n - 1)/sigma2_n * dchisq(x * (n-1)/sigma2_n, df = n - 1), add=TRUE, col="red", lwd=2) ######## # t(3) # ######## hist.scott(samples_stat[2,2, ], prob=TRUE, xlab = expression(s^2), main = "t(3)", xlim = c(0,25)) curve((n - 1)/sigma2_t * dchisq(x * (n - 1)/sigma2_t, df = n - 1), add=TRUE, col="red", lwd=2) var_sample_var <- var(samples_stat[2,2, ]) curve(2 * sigma2_t /var_sample_var * dchisq(x * 2 * sigma2_t/var_sample_var , df = (2*sigma2_t^2/var_sample_var)), add=TRUE, col="green", lwd=2) ########## # U(0,1) # ########## hist.scott(samples_stat[2,3, ], prob=TRUE, xlab = expression(s^2), main = "U(0,1)") curve((n-1)/sigma2_u *dchisq(x * (n-1)/sigma2_u, df = n - 1), add=TRUE, col="red", lwd=2) var_sample_var <- var(samples_stat[2,3, ]) curve(2*sigma2_u/var_sample_var * dchisq(x * 2 * sigma2_u/var_sample_var, df = (2 * sigma2_u^2/var_sample_var)), add=TRUE, col="green", lwd=2) ########################################## # It's your turn: biased sample variance # ##########################################