## simulate explanatory variable and errors x <- runif(10000, min=-2, max=3) u <- rnorm(10000, sd=0.2) ## generate dependent variable according to DGP: y <- 6 + 0.5*x + u ## visualize relationship between y and x on entire population: plot(x, y) abline(c(6, 0.5), lwd=3, col="green3") ## make a data.frame containing x,y (need this for technical reasons) pop <- data.frame(y=y, x=x) ## this is how one can extract a random sample of 20 obs. mysample <- pop[sample(dim(pop)[[1]], 20), ] ## the estimates resulting from *this particular sample* are: coef(lm(y~x, data=mysample)) ## do it 1000 times to see how beta.hat distributes mysamples <- vector("list") for(i in 1:1000) { mysamples[[i]] <- pop[sample(dim(pop)[[1]], 20), ] } ## this estimates 1000 models, one for each sample, and collects ## the resulting beta.hats betas <- lapply(mysamples, FUN=function(z) coef(lm(y~x, data=z))[2]) betas <- unlist(betas) ## now plot the empirical distribution of beta.hats plot(density(betas), col="green3", lwd=3, ylim=c(0, 30)) ## empirical illustration of unbiasedness: mean of beta.hats will be ## very close to the "true" beta of 0.5 mean(betas) ## empirical illustration of consistency: larger samples produce a ## distribution of estimates with less variance ## take samples of 50 and then 100 mysamples.50 <- vector("list") for(i in 1:1000) { mysamples.50[[i]] <- pop[sample(dim(pop)[[1]], 50), ] } betas.50 <- lapply(mysamples.50, FUN=function(z) coef(lm(y~x, data=z))[2]) betas.50 <- unlist(betas.50) mysamples.100 <- vector("list") for(i in 1:1000) { mysamples.100[[i]] <- pop[sample(dim(pop)[[1]], 100), ] } betas.100 <- lapply(mysamples.100, FUN=function(z) coef(lm(y~x, data=z))[2]) betas.100 <- unlist(betas.100) lines(density(betas.50), col="cyan", lwd=3) lines(density(betas.100), col="orange", lwd=3)