############################################### # # # Standard error of sample Mean vs Median # # # # Repeat 1,000 extraction on "N" participants # # from a Normal distribution, # # calcultaing meand and median # # # ############################################### sampling.distribution.mean<-c() # the 1,000 random sample means sampling.distribution.median<-c() # the 1,000 random sample medians R = 0 # number of replication set.seed(123456) repeat{ R=R+1 N = 25 # ...it does not affect the simulation, since we are sampling from normal population # Sample N values from normal population with parameters mu=10 and s = 2 X<-rnorm(N,mean=10,sd=2) # sample mean and median sampling.distribution.mean<-c(sampling.distribution.mean,mean(X)) sampling.distribution.median<-c(sampling.distribution.median,median(X)) cat("R=",R,"\n") if(R==1000)break() } ############################################# # Histogram of sampling distribution of mean: hist(sampling.distribution.mean,prob=TRUE,xlim=c(8,12),ylim=c(0,1.20)) polygon(density(sampling.distribution.mean), col = rgb(0, 0, 1, alpha = 0.35),border="NA") mean(sampling.distribution.mean) sd(sampling.distribution.mean) 2/sqrt(N) # Theoretical value for standard error of the Mean ############################################### # Histogram of sampling distribution of median: hist(sampling.distribution.median,prob=TRUE,xlim=c(8,12),ylim=c(0,1.20)) polygon(density(sampling.distribution.median), col = rgb(1, 0, 0, alpha = 0.35),border="NA") mean(sampling.distribution.median) sd(sampling.distribution.median) 1.25*2/sqrt(N) # Theoretical value for standard error of the Median (large sample from normal population) ########################## # togheter, and overlapped: hist(sampling.distribution.mean,prob=TRUE,xlim=c(8,12),col="white",border="NA") polygon(density(sampling.distribution.median), col = rgb(1, 0, 0, alpha = 0.35),border="NA") polygon(density(sampling.distribution.mean), col = rgb(0, 0, 1, alpha = 0.35),border="NA") abline(v=10,lwd=4) sd(sampling.distribution.median)/sd(sampling.distribution.mean) # Sample disribution of median approx 1.25 more "variable" than mean ############################################### # # # 95% Confidence interval for a proportion # # # # Repeat 100 extraction from a binomial # # with n = 100, # # with p = 0.456 # # construct each time a confidence interval # # plot their lower and upper limits # ############################################### CI95Inf<-c() CI95Sup<-c() R = 0 # number of replication #set.seed(123456) repeat{ R=R+1 # Random sample from binomial distribution (n = 100, p = 0.456)    X<-rbinom(1,size=100,p=0.456) p_hat=X/100 se=sqrt(p_hat*(1-p_hat)/100) CI95Inf<-c(CI95Inf,p_hat-1.96*se) CI95Sup<-c(CI95Sup,p_hat+1.96*se) cat("R=",R,"\n") if(R==100)break() } #-------------------------------------------------- plot(1:R,CI95Inf,ylim=c(0,1),type="n") abline(h=0.456,col="green",lwd=2) for(i in 1:R){ segments( x0=i, y0=CI95Inf[i], x1=i, y1=CI95Sup[i], col=if(CI95Inf[i]<=0.456 & CI95Sup[i]>=0.456) "black" else"red") } #-------------------------------------------------- ############################################### # # # 99% Confidence interval for a proportion # # # # Repeat 100 extraction from a binomial # # with n = 100, # # with p = 0.456 # # construct each time a confidence interval # # plot their lower and upper limits # ############################################### CI99Inf<-c() CI99Sup<-c() R = 0 # number of replication #set.seed(123456) repeat{ R=R+1 # Random sample from binomial distribution (n = 100, p = 0.456)    X<-rbinom(1,size=100,p=0.456) p_hat=X/100 se=sqrt(p_hat*(1-p_hat)/100) CI99Inf<-c(CI99Inf,p_hat-2.58*se) CI99Sup<-c(CI99Sup,p_hat+2.58*se) cat("R=",R,"\n") if(R==100)break() } #-------------------------------------------------- plot(1:R,CI99Inf,ylim=c(0,1),type="n") abline(h=0.456,col="green",lwd=2) for(i in 1:R){ segments( x0=i, y0=CI99Inf[i], x1=i, y1=CI99Sup[i], col=if(CI99Inf[i]<=0.456 & CI99Sup[i]>=0.456) "black" else"red") } #-------------------------------------------------- #Example: A Husband Choosing Not to Have Children: #Of 598 respondents, 366 said yes, and 232 said no. #Calculate the 99% confidence interval. n=598 p_hat=366/n se=se=sqrt(p_hat*(1-p_hat)/n) CI99Inf=p_hat-2.58*se CI99Sup=p_hat+2.58*se #--------------------------------------------------- ############################################### # t-Distribution simulation. # # # # Repeat 1,000 extractions from a normal # ############################################### distribution.zmean<-c() distribution.tmean<-c() R = 0 # number of replication set.seed(123) repeat{ R=R+1 n = 5 # ...try to change N value to obtain different t-distribution with n-1 degree of freedom # Random sample from normal population     X<-rnorm(n,mean=10,sd=2) mean=mean(X) exact_se=2/sqrt(n) se=sd(X)/sqrt(n) distribution.zmean<-c(distribution.zmean,(mean-10)/exact_se) distribution.tmean<-c(distribution.tmean,(mean-10)/se) cat("R=",R,"\n") if(R==1000)break() } #-------------------------------------------------- # Histogram of sampling distribution of z-mean: plot(density(distribution.zmean),type="l",lwd=2,col="red",bty="n",xlim=c(-5,5)) points(density(distribution.tmean),type="l",lwd=2,col="blue",bty="n",xlim=c(-5,5),) abline(v=0) #--------------------------------------------------