bmi.sim <- read.csv("http://www.stat.ufl.edu/~winner/data/nhl_nba_ebl_bmi.csv", header=TRUE) attach(bmi.sim); names(bmi.sim) N <- rep(0,2) N[1] <- 717 N[2] <- 505 NHL_BMI <- NHL_BMI[1:N[1]] NBA_BMI <- NBA_BMI[1:N[2]] par(mfrow=c(2,1)) hist(NHL_BMI,breaks=25) hist(NBA_BMI,breaks=25) par(mfrow=c(1,1)) sigma <- rep(0,2) (sigma[1] <- sd(NHL_BMI)) (sigma[2] <- sd(NBA_BMI)) mu <- rep(0,2) (mu[1] <- mean(NHL_BMI)) (mu[2] <- mean(NBA_BMI)) sampsz <- rep(2,0) sampsz[1] <- 10 sampsz[2] <- 10 sumssz <- sum(sampsz) N.sim <- 1000 set.seed(4721) ybar1 <- numeric(N.sim) sd1 <- numeric(N.sim) ybar2 <- numeric(N.sim) sd2 <- numeric(N.sim) ranksum.1 <- numeric(N.sim) ranksum.2 <- numeric(N.sim) for (i in 1:N.sim) { samp1 <- sample(1:N[1],sampsz[1],replace=F) samp2 <- sample(1:N[2],sampsz[2],replace=F) y1 <- NHL_BMI[samp1] y2 <- NBA_BMI[samp2] ybar1[i] <- mean(y1); sd1[i] <- sd(y1) ybar2[i] <- mean(y2); sd2[i] <- sd(y2) y1y2 <- c(y1,y2) ranksum.1[i] <- sum(rank(y1y2)[1:sampsz[1]]) ranksum.2[i] <- sum(rank(y1y2)[(sampsz[1]+1):(sampsz[1]+sampsz[2])]) } meandiff12 <- ybar1-ybar2 mean(meandiff12); sd(meandiff12) meandiff.pop <- mu[1] - mu[2] hist(meandiff12, breaks=50) abline(v=meandiff.pop) sp <- sqrt(((sampsz[1]-1)*sd1^2+(sampsz[2]-1)*sd2^2)/(sampsz[1]+sampsz[2]-2)) sediff_ev <- sp*sqrt(1/sampsz[1] + 1/sampsz[2]) sediff_uv <- sqrt(sd1^2/sampsz[1] + sd2^2/sampsz[2]) satter_df1 <- (sd1^2/sampsz[1] + sd1^2/sampsz[1])^2 satter_df2 <- ((sd1^2/sampsz[1])^2 / (sampsz[1]-1)) + ((sd2^2/sampsz[2])^2 / (sampsz[2]-1)) satter_df <- satter_df1 / satter_df2 meandiff_LB_ev <- meandiff12 - qt(.975,(sampsz[1]+sampsz[2]-2))*sediff_ev meandiff_UB_ev <- meandiff12 + qt(.975,(sampsz[1]+sampsz[2]-2))*sediff_ev meandiff_LB_uv <- meandiff12 - qt(.975,satter_df)*sediff_uv meandiff_UB_uv <- meandiff12 + qt(.975,satter_df)*sediff_uv 1 - (sum(meandiff_LB_ev >= meandiff.pop) + sum(meandiff_UB_ev <= meandiff.pop)) / N.sim 1 - (sum(meandiff_LB_uv >= meandiff.pop) + sum(meandiff_UB_uv <= meandiff.pop)) / N.sim t.stat_ev <- meandiff12/sediff_ev t.stat_uv <- meandiff12/sediff_uv sum(t.stat_ev > qt(.975,(sampsz[1]+sampsz[2]-2))) / N.sim sum(t.stat_ev < qt(.025,(sampsz[1]+sampsz[2]-2))) / N.sim sum(t.stat_uv > qt(.975,satter_df)) / N.sim sum(t.stat_uv < qt(.025,satter_df)) / N.sim sum(ranksum.1 < 78) / N.sim sum(ranksum.2 < 78) / N.sim sum(ranksum.1 > 132) / N.sim sum(ranksum.2 > 132) / N.sim ##### Inference Concerning Variances #### Estimating NHL Variance and SD NHL.Sigma2.LB <- (sampsz[1]-1)*sd1^2/qchisq(.975,sampsz[1]-1) NHL.Sigma2.UB <- (sampsz[1]-1)*sd1^2/qchisq(.025,sampsz[1]-1) NHL.Sigma.LB <- sqrt(NHL.Sigma2.LB) NHL.Sigma.UB <- sqrt(NHL.Sigma2.UB) sum((NHL.Sigma2.LB<=sigma[1]^2) & (NHL.Sigma2.UB>=sigma[1]^2)) / N.sim ### Testing Equal Variance - F-test F.obs <- sd1^2/sd2^2 sum(F.obs <= qf(.025,sampsz[1]-1,sampsz[2]-1)) / N.sim sum(F.obs >= qf(.975,sampsz[1]-1,sampsz[2]-1)) / N.sim