###############################################
#                                             #
#    Standard error of sample Mean vs Median  #
#											  #
# Repeat 1,000 extraction on "N" participants #
#        from a Normal distribution,          #
#        calcultaing meand and median         #
#                                             #
###############################################

sampling.distribution.mean<-c() # the 1,000 random sample means
sampling.distribution.median<-c() # the 1,000 random sample medians

R = 0 # number of replication
set.seed(123456)
repeat{
R=R+1
N = 25 # ...it does not affect the simulation, since we are sampling from normal population
# Sample N values from normal population with parameters mu=10 and s = 2
		X<-rnorm(N,mean=10,sd=2)

# sample mean and median
sampling.distribution.mean<-c(sampling.distribution.mean,mean(X))
sampling.distribution.median<-c(sampling.distribution.median,median(X))

cat("R=",R,"\n")
if(R==1000)break()
	
	
}


#############################################
# Histogram of sampling distribution of mean:
hist(sampling.distribution.mean,prob=TRUE,xlim=c(8,12),ylim=c(0,1.20))
polygon(density(sampling.distribution.mean), col = rgb(0, 0, 1, alpha = 0.35),border="NA")

mean(sampling.distribution.mean)
sd(sampling.distribution.mean) 
2/sqrt(N) # Theoretical value for standard error of the Mean


###############################################
# Histogram of sampling distribution of median:
hist(sampling.distribution.median,prob=TRUE,xlim=c(8,12),ylim=c(0,1.20))
polygon(density(sampling.distribution.median), col = rgb(1, 0, 0, alpha = 0.35),border="NA")

mean(sampling.distribution.median)
sd(sampling.distribution.median) 
1.25*2/sqrt(N) # Theoretical value for standard error of the Median (large sample from normal population)


##########################
# togheter, and overlapped:
hist(sampling.distribution.mean,prob=TRUE,xlim=c(8,12),col="white",border="NA")
polygon(density(sampling.distribution.median), col = rgb(1, 0, 0, alpha = 0.35),border="NA")
polygon(density(sampling.distribution.mean), col = rgb(0, 0, 1, alpha = 0.35),border="NA")
abline(v=10,lwd=4)

sd(sampling.distribution.median)/sd(sampling.distribution.mean) 
# Sample disribution of median approx 1.25 more "variable" than mean


###############################################
#                                             #
#    95% Confidence interval for a proportion #
#											  #
#   Repeat 100 extraction from a binomial     #
#        with n = 100,                        #
#        with p = 0.456                       #
#   construct each time a confidence interval #
#   plot their lower and upper limits         #
###############################################


CI95Inf<-c()
CI95Sup<-c()
R = 0 # number of replication
#set.seed(123456)
repeat{
R=R+1

# Random sample from binomial distribution (n = 100, p = 0.456)   
		X<-rbinom(1,size=100,p=0.456)
        p_hat=X/100
        se=sqrt(p_hat*(1-p_hat)/100)
CI95Inf<-c(CI95Inf,p_hat-1.96*se)
CI95Sup<-c(CI95Sup,p_hat+1.96*se)
cat("R=",R,"\n")
if(R==100)break()
	
	
}
#--------------------------------------------------
plot(1:R,CI95Inf,ylim=c(0,1),type="n")
abline(h=0.456,col="green",lwd=2)
for(i in 1:R){
	
	segments(
	         x0=i, 
	         y0=CI95Inf[i], 
	         x1=i, 
	         y1=CI95Sup[i],
	         col=if(CI95Inf[i]<=0.456 & CI95Sup[i]>=0.456) "black" else"red")
	
}
#--------------------------------------------------


###############################################
#                                             #
#    99% Confidence interval for a proportion #
#											  #
#   Repeat 100 extraction from a binomial     #
#        with n = 100,                        #
#        with p = 0.456                       #
#   construct each time a confidence interval #
#   plot their lower and upper limits         #
###############################################


CI99Inf<-c()
CI99Sup<-c()
R = 0 # number of replication
#set.seed(123456)
repeat{
R=R+1

# Random sample from binomial distribution (n = 100, p = 0.456)   
		X<-rbinom(1,size=100,p=0.456)
        p_hat=X/100
        se=sqrt(p_hat*(1-p_hat)/100)
CI99Inf<-c(CI99Inf,p_hat-2.58*se)
CI99Sup<-c(CI99Sup,p_hat+2.58*se)
cat("R=",R,"\n")
if(R==100)break()
	
	
}
#--------------------------------------------------
plot(1:R,CI99Inf,ylim=c(0,1),type="n")
abline(h=0.456,col="green",lwd=2)
for(i in 1:R){
	
	segments(
	         x0=i, 
	         y0=CI99Inf[i], 
	         x1=i, 
	         y1=CI99Sup[i],
	         col=if(CI99Inf[i]<=0.456 & CI99Sup[i]>=0.456) "black" else"red")
	
}
#--------------------------------------------------


#Example: A Husband Choosing Not to Have Children:
#Of 598 respondents, 366 said yes, and 232 said no.
#Calculate the 99% confidence interval.

n=598
p_hat=366/n
se=se=sqrt(p_hat*(1-p_hat)/n)
CI99Inf=p_hat-2.58*se
CI99Sup=p_hat+2.58*se


#---------------------------------------------------


###############################################
#     t-Distribution simulation.              #
#			           			              #
# Repeat 1,000 extractions from a normal      #
############################################### 

distribution.zmean<-c()
distribution.tmean<-c()

R = 0 # number of replication
set.seed(123)
repeat{
R=R+1
n = 5 # ...try to change N value to obtain different t-distribution with n-1 degree of freedom
# Random sample from normal population    
		X<-rnorm(n,mean=10,sd=2)
        mean=mean(X)
        exact_se=2/sqrt(n)
        se=sd(X)/sqrt(n)
        
distribution.zmean<-c(distribution.zmean,(mean-10)/exact_se)
distribution.tmean<-c(distribution.tmean,(mean-10)/se)
cat("R=",R,"\n")
if(R==1000)break()
	
	
}
#--------------------------------------------------
# Histogram of sampling distribution of z-mean:
plot(density(distribution.zmean),type="l",lwd=2,col="red",bty="n",xlim=c(-5,5))
points(density(distribution.tmean),type="l",lwd=2,col="blue",bty="n",xlim=c(-5,5),)
abline(v=0)
#--------------------------------------------------