###Estimating the POPULATION MEAN ##To estimate the average price of tranquilizer tablets, we selected a random sample of pharmacies. ##We want the estimate to be within ±0.10 of the true mean with 95% confidence. ##From a pilot study, the standard deviation was 0.85. #How many pharmacies should be selected? z_alpha <- 1.96 #the critical value of a standard Normal distribution corresponding to alpha=0.05 sigma <- 0.85 #estimated variability of the price prec <- 0.10 #our precision level (margin of error) n <- (z_alpha^2*sigma^2)/prec^2 sample_size <- ceiling(n) #this rounds n to units sample_size #at least n=278 pharmacies are required ########################################################## ###Estimating a POPULATION PROPORTION/PREVALENCE ##We want to estimate the prevalence of hypertension in a population. ##We select a random sample and we want a 95% confidence and a 5% precision. ##Suppose we do not know the true prevalence, we consider p=0.5 ##p=0.5 is the scenario with the maximum variability, therefore the most conservative one zstar <- qnorm(.975) #critical value of the Normal distribution corresponding to a 95% confidence level p = 0.5 #expected prevalence of hypertension in the population prec = 0.05 n <- zstar^2*p*(1-p) / prec^2 minsamp <- ceiling(n) minsamp ##Assume we know from previous studies that the prevalence of hypertension is 20% in the population zstar <- qnorm(.975) #critical value of the Normal distribution corresponding to a 95% confidence level p = 0.2 #expected prevalence of hypertension in the population prec = 0.05 #margin of error n <- zstar^2*p*(1-p) / prec^2 minsamp <- ceiling(n) minsamp ######################################################## ###Estimating the CONFIDENCE INTERVAL for an INCIDENCE RATE ##The diet dataframe contains data from 337 subjects and 14 variables ##It is a subsample of a larger cohort used to estimate the incidence of CHD ##These subjects had completed a questionnaire on dietary habits: 46 CHD events occurred in this sample. library(Epi) data(diet) attach(diet) #makes variables directly accessible using their name #Calculate the follow-up time in years for each subject included in the study y <- cal.yr(dox)-cal.yr(doe) #extract years from dox and doe variables Y <- sum(y) #total follow-up of the study D <- sum(chd) #total number of incident cases (46) rate<-D/Y #Incidence rate results<-c(round(rate, digits=3), round(rate-1.96*(sqrt(D)/Y),digits=3), round(rate+1.96*(sqrt(D)/Y), digits=3)) results #Incidence was 0.010 per person-year [95% CI: 0.007-0.013] ############################################################################ ###Sample size for an INCIDENCE RATE based on PRECISION ##Assume that, for a specific disease A, ##the incidence rate from previous studies is estimated at 50*10.000 person-years. ##We want to determine the minimum sample size to estimate, at a confidence level of 95%, ##the incidence rate in that population within ± 5 per 10.000 person-years. #Find the desired SE for the rate #We know that a 95% CI is given by: estimate -/+1.96*SE #As we want a margin of error of 5, we impose: 1.96*SE=5, and SE=5/1.96 se.rate <- (5/1.96) #As SE=[expected rate/SE]^2 --> observed cases=[50/(5*1.96)]^2 number.cases <- (50/se.rate)^2 number.cases #385 cases #The original rate was expressed in 10.000 person-years, #We need to convert cases into person-time: #We know that rate=cases/person-years --> person-years=cases/rate: person.years <- number.cases/50 #7.68*10000 person.years*10000 #true person-years