### Chapter 4 ### Example 4.1 ### Read data and set up data frame nhl <- read.csv("http://www.stat.ufl.edu/~winner/data/nhl_ht_wt.csv") attach(nhl); names(nhl) ### Generate random values (-0.5 to 0.5) to add to Height set.seed(1234) N <- NROW(nhl) Height.dev <- 0.5 - runif(N) Height <- Height + Height.dev ### Compute BMI bmi.nhl <- 703 * Weight / (Height^2) set.seed(98765) num.sim <- 10000 n.sample <- 12 samp.mean <- rep(0, num.sim) samp.sd <- rep(0, num.sim) mu.bmi <- mean(bmi.nhl) std.err.bmi <- sd(bmi.nhl)/sqrt(n.sample) for (i in 1:num.sim) { sample <- sample(1:N, n.sample, replace=FALSE) samp.mean[i] <- mean(bmi.nhl[sample]) samp.sd[i] <- sd(bmi.nhl[sample]) } cbind(samp.mean[1], samp.sd[1]) z_050 <- qnorm(.95, 0, 1) z_025 <- qnorm(.975, 0, 1) z_005 <- qnorm(.99, 0, 1) (cover10a <- sum(samp.mean >= mu.bmi - z_050*std.err.bmi & samp.mean <= mu.bmi + z_050*std.err.bmi) / num.sim) (cover05a <- sum(samp.mean >= mu.bmi - z_025*std.err.bmi & samp.mean <= mu.bmi + z_025*std.err.bmi) / num.sim) (cover01a <- sum(samp.mean >= mu.bmi - z_005*std.err.bmi & samp.mean <= mu.bmi + z_005*std.err.bmi) / num.sim) samp.se <- samp.sd / sqrt(n.sample) (cover10b <- sum(samp.mean >= mu.bmi - z_050*samp.se & samp.mean <= mu.bmi + z_050*samp.se) / num.sim) (cover05b <- sum(samp.mean >= mu.bmi - z_025*samp.se & samp.mean <= mu.bmi + z_025*samp.se) / num.sim) (cover01b <- sum(samp.mean >= mu.bmi - z_005*samp.se & samp.mean <= mu.bmi + z_005*samp.se) / num.sim) t_050 <- qt(.95,11); t_025 <- qt(.975,11); t_005 <- qt(.995,11) (cover10c <- sum(samp.mean >= mu.bmi - t_050*samp.se & samp.mean <= mu.bmi + t_050*samp.se) / num.sim) (cover05c <- sum(samp.mean >= mu.bmi - t_025*samp.se & samp.mean <= mu.bmi + t_025*samp.se) / num.sim) (cover01c <- sum(samp.mean >= mu.bmi - t_005*samp.se & samp.mean <= mu.bmi + t_005*samp.se) / num.sim) ### Example 4.2 rm(list=ls(all=TRUE)) E <- 0.20 sigma <- 1.058 alpha <- 0.05 n <- 1 E.t <- E+1 # Keep increasing $n$ until E.t < E while (E.t >= E) { n <- n+1 E.t <- qt(1-alpha/2,n-1)*sigma/sqrt(n) } cbind(n, E.t) ### Example 4.3 rm(list=ls(all=TRUE)) ## Read data from website and attach data frame and obain variable names rr.mar <- read.csv( "http://www.stat.ufl.edu/~winner/data/rocknroll_marathon_mf2015a.csv") attach(rr.mar); names(rr.mar) male.mph <- mph[Gender == "M"] N <- length(male.mph) mu0 <- mean(male.mph) sigma <- sd(male.mph) set.seed(13579) num.sim <- 10000 num.samp <- 40 cv.lo <- qt(.025,num.samp-1) cv.hi <- qt(.975,num.samp-1) t.stat0 <- rep(0, num.sim) p.value0 <- rep(0, num.sim) for (i in 1:num.sim) { sample <- sample(1:N, num.samp, replace=FALSE) ybar <- mean(male.mph[sample]) s <- sd(male.mph[sample]) t.stat0[i] <- (ybar - mu0) / (s / sqrt(num.samp)) p.value0[i] <- 2*(1-pt(abs(t.stat0[i]),num.samp-1)) } sum(p.value1 <= 0.05) / num.sim par(mfrow=c(1,2)) hist(t.stat0, breaks=50, freq=FALSE, main="t-stat Under H0") xt <- seq(-4,4,.01) lines(xt, dt(xt,num.samp-1)) abline(v=cv.lo,lwd=2) abline(v=cv.hi,lwd=2) hist(p.value0, breaks=50, freq=FALSE, main="P-value Under H0") xp <- seq(0,1,0.01) lines(xp,dbeta(xp,1,1)) mu01 <- 6.0 (Delta1 <- (mu0 - mu01) / (sigma/sqrt(num.samp))) ## Old mu_0 is new mu_A (power1 <- pt(cv.lo, num.samp-1, Delta1) + (1-pt(cv.hi, num.samp-1, Delta1))) mu02 <- 6.5 (Delta2 <- (mu0 - mu02) / (sigma/sqrt(num.samp))) ## Old mu_0 is new mu_A (power2 <- pt(cv.lo, num.samp-1, Delta2) + (1-pt(cv.hi, num.samp-1, Delta2))) set.seed(1234) t.stat1 <- rep(0, num.sim) p.value1 <- rep(0, num.sim) for (i in 1:num.sim) { sample <- sample(1:N, num.samp, replace=FALSE) ybar <- mean(male.mph[sample]) s <- sd(male.mph[sample]) t.stat1[i] <- (ybar - mu01) / (s / sqrt(num.samp)) p.value1[i] <- 2*(1-pt(abs(t.stat1[i]),num.samp-1)) } sum(t.stat1 <= cv.lo | t.stat1 >= cv.hi) / num.sim par(mfrow=c(1,1)) hist(t.stat1, breaks=50, freq=FALSE, main="t-stat Under H0:mu=6") xt <- seq(-4,6,.01) lines(xt, dt(xt,num.samp-1,Delta1)) abline(v=cv.lo,lwd=2) abline(v=cv.hi,lwd=2) set.seed(5678) t.stat2 <- rep(0, num.sim) p.value2 <- rep(0, num.sim) for (i in 1:num.sim) { sample <- sample(1:N, num.samp, replace=FALSE) ybar <- mean(male.mph[sample]) s <- sd(male.mph[sample]) t.stat2[i] <- (ybar - mu02) / (s / sqrt(num.samp)) p.value2[i] <- 2*(1-pt(abs(t.stat2[i]),num.samp-1)) } sum(t.stat2 <= cv.lo | t.stat2 >= cv.hi) / num.sim ### Example 4.4 rm(list=ls(all=TRUE)) n <- 3 alpha <- 0.05 mu_diff <- 0.25 sigma <- 1.058 Delta <- mu_diff/(sigma/sqrt(n)) CV_LO <- qt(alpha/2,n-1) CV_HI <- qt(1-alpha/2,n-1) (power <- pt(CV_LO,n-1,Delta) + (1-pt(CV_HI,n-1,Delta))) while (power <= 0.80) { n <- n+1 Delta <- mu_diff/(sigma/sqrt(n)) CV_LO <- qt(alpha/2,n-1) CV_HI <- qt(1-alpha/2,n-1) power <- pt(CV_LO,n-1,Delta) + (1-pt(CV_HI,n-1,Delta)) } cbind(n, power) par(mfrow=c(2,2)) t <- seq(-6,6,0.01) mu_diff <- 0.25 sigma <- 1.058 for (n in c(10, 30, 70, 150)) { Delta <- mu_diff/(sigma/sqrt(n)) plot(t,dt(t,n-1),type="l",main=paste("Central and Non-Central t, n=",n)) lines(t,dt(t,n-1,Delta),lty=2) abline(v=qt(.025,n-1)) abline(v=qt(.975,n-1)) } ### Example 4.5 rm(list=ls(all=TRUE)) avshotlen <- read.csv( "http://www.stat.ufl.edu/~winner/data/movie_avshotlength.csv") attach(avshotlen); names(avshotlen) median(ASL) sum(ASL > 100) hist(ASL[ASL < 100], breaks=100) abline(v=median(ASL),lwd=2) pbinom(0:20,20,0.5) set.seed(4321) N <- length(ASL) sample1 <- sample(1:N, 20, replace=FALSE) ASL.sample <- ASL[sample1] (ASL.sample.order <- sort(ASL.sample)) ## Sample values sorted cbind(ASL.sample.order[6],ASL.sample.order[15]) ## 6th and 15th selected set.seed(7654) num.sim <- 10000 num.samp <- 100 med.ci <- matrix(rep(0, 2*num.sim),ncol=2) for (i in 1:num.sim) { sample <- sample(1:N, 20, replace=FALSE) med.ci[i,1] <- sort(ASL[sample])[6] med.ci[i,2] <- sort(ASL[sample])[15] } med.pop <- median(ASL) sum(med.ci[,1] <= med.pop & med.ci[,2] >= med.pop) / num.sim ### Example 4.7 rm(list=ls(all=TRUE)) avshotlen <- read.csv( "http://www.stat.ufl.edu/~winner/data/movie_avshotlength.csv") attach(avshotlen); names(avshotlen) (N <- length(ASL)) (mu <- mean(ASL)) (sigma <- sd(ASL)) ## Obtain the original random sample of n=25 set.seed(34567) samp.size <- 25 sample1 <- sample(1:N,samp.size,replace=F) ASL.sample1 <- ASL[sample1] ASL.sample1 mean(ASL.sample1) qqnorm(ASL.sample1); qqline(ASL.sample1) shapiro.test(ASL.sample1) ### Method 1 - Chihara/Hesterberg Section 5.3, pp. 113-114 set.seed(24680) num.boot.inner <- 10000 ASL.mean <- rep(0,num.boot.inner) for (i2 in 1:num.boot.inner) { x <- sample(ASL.sample1, samp.size, replace=T) ASL.mean[i2] <- mean(x) } quantile(ASL.mean,c(.025,.975)) mean(ASL.mean) sd(ASL.mean) mean(ASL.mean) - qt(.975,samp.size-1)*sd(ASL.mean) mean(ASL.mean) + qt(.975,samp.size-1)*sd(ASL.mean) sum(ASL.mean < mean(ASL.mean) - qt(.975,samp.size-1)*sd(ASL.mean)) / num.boot.inner sum(ASL.mean > mean(ASL.mean) + qt(.975,samp.size-1)*sd(ASL.mean)) / num.boot.inner ### Example 4.8 rm(list=ls(all=TRUE)) ### Part 1 avshotlen <- read.csv( "http://www.stat.ufl.edu/~winner/data/movie_avshotlength.csv") attach(avshotlen); names(avshotlen) (N <- length(ASL)) (mu <- mean(ASL)) (sigma <- sd(ASL)) ## Obtain the original random sample of n=25 set.seed(34567) samp.size <- 25 sample1 <- sample(1:N,samp.size,replace=F) ASL.sample1 <- ASL[sample1] mean(ASL.sample1) sd(ASL.sample1) ### Method 2 - Chihara/Hesterberg Section 7.5, pp. 195-198 # ASL.t computes and saves t* set.seed(24680) ybar <- mean(ASL.sample1) num.boot.inner <- 10000 ASL.t <- rep(0,num.boot.inner) ASL.mean <- rep(0,num.boot.inner) for (i2 in 1:num.boot.inner) { x <- sample(ASL.sample1, samp.size, replace=T) ASL.t[i2] <- (mean(x) - ybar) / (sd(x) / sqrt(samp.size)) ASL.mean[i2] <- mean(x) } Q_L <- quantile(ASL.t,0.025) Q_U <- quantile(ASL.t,0.975) mu_L <- ybar - Q_U*sd(ASL.sample1)/sqrt(samp.size) mu_U <- ybar - Q_L*sd(ASL.sample1)/sqrt(samp.size) cbind(Q_L, Q_U, mu_L, mu_U) mean(ASL.mean) - ybar sd(ASL.mean) hist(ASL.t,breaks=40,freq=F, main=expression(paste("Histogram of ",t^"*"," and ",t[24]," Density"))) t.seq <- seq(-4,4,.01) lines(t.seq,dt(t.seq,samp.size-1)) abline(v=c(Q_L,Q_U),lwd=2) ### Part 2 avshotlen <- read.csv( "http://www.stat.ufl.edu/~winner/data/movie_avshotlength.csv") attach(avshotlen); names(avshotlen) (N <- length(ASL)) (mu <- mean(ASL)) (sigma <- sd(ASL)) ## Initialize mean/sd and CI holders - 1000 outer (original) samples set.seed(13579) num.boot.outer <- 1000 ASL.mean.sd <- matrix(rep(0,2*num.boot.outer),ncol=2) ASL.boot1 <- matrix(rep(0,2*num.boot.outer),ncol=2) ASL.boot2 <- matrix(rep(0,2*num.boot.outer),ncol=2) ASL.tnorm <- matrix(rep(0,2*num.boot.outer),ncol=2) samp.size <- 25 sqrt.n <- sqrt(samp.size) t.24 <- qt(c(.025,.975),samp.size-1) ### Begin outer loop for (i1 in 1:num.boot.outer) { sample1 <- sample(1:N,samp.size,replace=F) ASL.sample1 <- ASL[sample1] ### Original Samples ASL.mean.sd[i1,1] <- mean(ASL.sample1) ### Save mean in column 1 ASL.mean.sd[i1,2] <- sd(ASL.sample1) ### Save sd in column 2 ### Begin inner (bootstrap) loop num.boot.inner <- 1000 ASL.mean <- rep(0,num.boot.inner) ASL.t <- rep(0,num.boot.inner) for (i2 in 1:num.boot.inner) { x <- sample(ASL.sample1, samp.size, replace=T) ASL.mean[i2] <- mean(x) ASL.t[i2] <- (mean(x) - ASL.mean.sd[i1,1]) / (sd(x) /sqrt.n) } ### Close inner loop ASL.boot1[i1,] <- quantile(ASL.mean,c(.025,.975)) ASL.boot2[i1,] <- ASL.mean.sd[i1,1] - quantile(ASL.t,c(.975,.025)) * ASL.mean.sd[i1,2]/sqrt.n ASL.tnorm[i1,] <- ASL.mean.sd[i1,1] + t.24 * ASL.mean.sd[i1,2]/sqrt.n } ### Close outer loop ## Obtain coverage probabilities sum(ASL.boot1[,1] <= mu & ASL.boot1[,2] >= mu)/num.boot.outer sum(ASL.boot2[,1] <= mu & ASL.boot2[,2] >= mu)/num.boot.outer sum(ASL.tnorm[,1] <= mu & ASL.tnorm[,2] >= mu)/num.boot.outer