> pdf("F:\\sta6208\\yanks1927away_rand.pdf") > > yanksaway <- read.table("F:\\data\\yanks1927away.txt",header=T) > > attach(yanksaway) > > set.seed(1234) > t <- max(opp_id) > N <- length(runs) > ybar_all <- mean(runs) > > n <- rep(0,t) > ybar <- rep(0,t) > teamrange <- matrix(rep(0,2*t),ncol=2) > sumsq_obs <- 0 > > > for (i in 1:t) { + n[i] <- length(runs[opp_id==i]) + if (i==1) { + teamrange[i,1] <- 1 + teamrange[i,2] <- n[i] + } + else { + teamrange[i,1] <- teamrange[(i-1),1]+n[i] + teamrange[i,2] <- teamrange[(i-1),2]+n[i] + } + ybar[i] <- mean(runs[teamrange[i,1]:teamrange[i,2]]) + sumsq_obs <- sumsq_obs + n[i]*(ybar[i]-ybar_all)^2 + } > ybar [1] 7.500000 5.416667 4.750000 8.000000 7.166667 6.416667 4.916667 > sumsq_obs [1] 113.5855 > num_samp <- 10^5-1 > > sumsq_rand <- rep(0,num_samp) > > for (i1 in 1:num_samp) { + randgrp <- sample(N,size=N,replace=FALSE) + sumsq <- 0 + + for (i2 in 1:t) { + ybar[i2] <- mean(runs[randgrp[teamrange[i,1]:teamrange[i,2]]]) + sumsq <- sumsq + n[i2]*(ybar[i2]-ybar_all)^2 + } + + sumsq_rand[i1] <- sumsq + } > > summary(sumsq_rand) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0513 7.3850 37.3800 81.4400 110.9000 1504.0000 > > (p_rand <- (sum(sumsq_rand >= sumsq_obs)+1)/(num_samp+1)) [1] 0.23743 > > hist(sumsq_rand,xlab="Opponent Sum of Squares", + main="Randomization Distribution for Runs Scored by Opponent") > abline(v=sumsq_obs) > > dev.off()