pdf("philrain.pdf") philrain <- read.fwf("C:\\data\\philrain.dat", width=c(8,8,8,8), col.names=c("year","ymonth","smonth","rain")) philrain <- data.frame(philrain) attach(philrain) ymonth <- factor(ymonth) # Summary Statistics for Rainfall mean(rain) median(rain) var(rain) sd(rain) min(rain) quantile(rain,c(0.25,0.5,0.75)) max(rain) # POPULATION Mean and Variance saved to variables MU.rain <- mean(rain) SIGMA2.rain <- (length(rain)-1)*var(rain)/length(rain) # Summary Statistics for rainfall by Month of Year tapply(rain,ymonth,mean) tapply(rain,ymonth,sd) # Plots of Rainfall # Histogram hist(rain,breaks=seq(0,1600,100)) # Boxplot by Month of Year plot(ymonth,rain) # Time series plot(rain,type="o",xlab="Month", ylab="Rainfall (1/100 inches)",main="Philadelphia Monthly Rain -- 1825-1869") abline(h=mean(rain)) mean <- numeric(10000) var <- numeric(10000) # 10000 Random Samples of size n=30 (Sampling without replacement) for (i in 1:10000) { ransamp <- sample(rain,30) mean[i] <- mean(ransamp) var[i] <- var(ransamp) } # Summary statistics for sample Mean and Variance mean(mean) var(mean) sd(mean) mean(var) var(var) sd(var) min(mean) max(mean) min(var) max(var) # Histogram of Sampling Distribution of Y-bar with Normal Super-Imposed hist(mean,breaks=seq(200,550,10),xlab="ybar",main="Sampling Distribution of Y-bar") lines(225:515,100000*dnorm(225:515,MU.rain,sqrt(SIGMA2.rain/30))) # Histogram of (n-1)S^2/SIGMA^2 with Chi-squared Super-Imposed nu <- 29 ys2lim <- 1.1*20000*max(dchisq(0:200,nu)) hist(29*var/SIGMA2.rain,breaks=seq(0,200,2),ylim=c(0,ys2lim),xlab="S2",main="Sampling Distribution of Scaled S2") lines(seq(0,200,2),20000*dchisq(seq(0,200,2),29)) dev.off()