cell_rad <- read.csv("http://users.stat.ufl.edu/~winner/data/cellphnrad.csv") attach(cell_rad); names(cell_rad) # Part 1: Summarizing Data mean(rads) # Compute the mean for the variable rads sd(rads) # Compute the sample standard deviation of rads (lq <- quantile(rads,.25)) # lq is assigned the 25th percentile (uq <- quantile(rads,.75)) # uq is assigned the 75th percentile (med.rads <- median(rads)) # med.rads is assigned the median (iqr <- uq - lq) # iqr is assigned the interquartile range tapply(rads, brand, mean) # Compute the mean seperately for each brand tapply(rads, brand, median) # Compute the median seperately for each brand tapply(rads, brand, sd) # Compute the Std Dev seperately for each brand # Obtain histogram, freq=F makes it a relative frequency histogram # (heights sum to 1) ### win.graph "holds plot" in a window and will not overwrite with ### subsequent plots win.graph(height=5.5, width=7.0) hist(rads,main="",xlab="Radiation",freq=F) lines(density(rads)) # Part 2: Probability n <- length(rads) # Total number of cell phone models sum(rads >= 1.0)/n # Prob(rads>=1) rads.u.8 <- ifelse(rads <= .80,1,0) # rads <= 0.8 rads.g.7 <- ifelse(rads >= .70,1,0) # rads >= 0.7 rads.u.med <- ifelse(rads <= median(rads),1,0) # rads <= median(rads) # Cross-tabulation (aka contingency table) for each brand by criteria (brand.rad.8 <- table(brand,rads.u.8)) # Brand=Rows 0,1=Columns (brand.rad.7 <- table(brand,rads.g.7)) (brand.rad.med <- table(brand,rads.u.med)) # Frequencies converted to various types of proportions # Event: Rad <= 0.80 prop.table(brand.rad.8,1) # P(0|Brand), P(1|Brand) margin.table(brand.rad.8,1)/sum(brand.rad.8) # P(Brand) prop.table(brand.rad.8,2) # P(Brand|0), P(Brand|1) margin.table(brand.rad.8,2)/sum(brand.rad.8) # P(0), P(1) brand.rad.8/sum(brand.rad.8) # P(0,Brand), P(1,Brand) # Event: Rad >= 0.70 prop.table(brand.rad.7,1) # P(0|Brand), P(1|Brand) margin.table(brand.rad.7,1)/sum(brand.rad.7) # P(Brand) prop.table(brand.rad.7,2) # P(Brand|0), P(Brand|1) margin.table(brand.rad.7,2)/sum(brand.rad.7) # P(0), P(1) brand.rad.7/sum(brand.rad.7) # P(0,Brand), P(1,Brand) # Event: Rad <= Median prop.table(brand.rad.med,1) # P(0|Brand), P(1|Brand) margin.table(brand.rad.med,1)/sum(brand.rad.med) # P(Brand) prop.table(brand.rad.med,2) # P(Brand|0), P(Brand|1) margin.table(brand.rad.med,2)/sum(brand.rad.med) # P(0), P(1) brand.rad.med/sum(brand.rad.med) # P(0,Brand), P(1,Brand) # Part 4: Sampling Distribution of Ybar # POPULATION Mean and Variance saved to variables MU.rads <- mean(rads) SIGMA2.rads <- (length(rads)-1)*var(rads)/length(rads) sampmean <- numeric(10000) # Initialize a vector to save sample means # 10000 Random Samples of size n=36 (Sampling without replacement) for (i in 1:10000) { ransamp <- sample(rads,36) # Take a sample of n=36 from the rads sampmean[i] <- mean(ransamp) # Compute sample mean and save } # Summary statistics for the 10000 sample Means mean(sampmean) var(sampmean) sd(sampmean) min(sampmean) max(sampmean) # Histogram of Sampling Distribution of Y-bar with Normal Super-Imposed win.graph(height=5.5, width=7.0) hist(sampmean[(sampmean > 0.80)&(sampmean<1.30)],freq=F,breaks=seq(0.80,1.30,0.0125),ylim=c(0,8), xlab="ybar",main="Sampling Distribution of Y-bar") lines(seq(0.8,1.30,0.001), dnorm(seq(0.8,1.30,0.001),MU.rads,sqrt(SIGMA2.rads/36)))