## Read in NBA Height and Weight data and place in memory nba1 <- read.csv( "http://www.stat.ufl.edu/~winner/data/nba_ht_wt.csv",header=T) attach(nba1); names(nba1) ## Fit Model with X=Height(inches) and Y=Weight(pounds) nba.mod1 <- lm(Weight ~ Height) summary(nba.mod1) ## Plot data, regression line, 95% CI's and PI's Hstar <- seq(min(Height), max(Height), length=100) ymean <- predict(nba.mod1, list(Height=Hstar), int="c") yindiv <- predict(nba.mod1, list(Height=Hstar), int="p") plot(Height, Weight, main="NBA Heights (X) and Weights (Y)", pch=16, cex=0.5, ylim=c(100,300)) abline(nba.mod1, lwd=2) lines(Hstar, ymean[,2], col="blue", lwd=2) lines(Hstar, ymean[,3], col="blue", lwd=2) lines(Hstar, yindiv[,2], col="red", lwd=2) lines(Hstar, yindiv[,3], col="red", lwd=2) legend("topleft", c("Predicted", "Lower CI", "Upper CI", "Lower PI", "Upper PI"), lty=1, col=c("black","blue","blue","red","red"), lwd=2, cex=0.7) ## Save Residuals to "e1" e1 <- resid(nba.mod1) ## Use Shapiro-Wilk Test H0: Errors are normally distributed shapiro.test(e1) qqnorm(e1); qqline(e1) ## ### Breusch-Pagan Test ## H0: Constant Error Variance ## HA: Error variance is related to predictor(s) level(s) ## Direct calculation n <- length(Weight) mod1a <- lm(e1^2 ~ Height) anova(mod1a) SS_Reg.BP <- anova(mod1a)[1,2] X2.BP <- (SS_Reg.BP/2) / (sum(e1^2)/n)^2 BP.out <- cbind(1, X2.BP, qchisq(.95,1), 1-pchisq(X2.BP,1)) colnames(BP.out) <- c("df", "X2(BP)", "X2(.95)", "P>X2(BP)") rownames(BP.out) <- c("Breusch-Pagan Test") round(BP.out, 4) ## install.packages("lmtest") library(lmtest) bptest(Weight ~ Height,studentize=FALSE) ### F-test for Lack of Fit ## H0: mu_j = B0+B1*X_j HA: mu_j ^= B0+B1*X_j ## Fit model with separate means for each X-level (Cell Means Model) nba.mod1a <- lm(Weight ~ factor(Height)-1) summary(nba.mod1a) anova(nba.mod1a) ## Compare the linear model fit (mod1 with 2 parameters) with ## cell means model (mod1a with c=18 parameters) anova(nba.mod1,nba.mod1a) ## Box-Cox Transformation (Goal: Power transformation for Y for: ## normality and/or constant variance and/or linearity) ## Not always achievable library(MASS) bc.mod1 <- boxcox(nba.mod1,plot=T) # Runs series of power transforms and plots print(cbind(bc.mod1$x,bc.mod1$y)) # Print out results (lambda,log-like) print(bc.mod1$x[which.max(bc.mod1$y)]) # Print out "best" lambda ci.bc <- max(bc.mod1$y)-0.5*qchisq(0.95,1) # Obtain cut-off for 95% CI (in log-like) print(bc.mod1$x[bc.mod1$y>= ci.bc]) # Print Values of lambda in 95% CI ### Re-fit Model using log(Weight) as response variable nba.mod2 <- lm(log(Weight) ~ Height) summary(nba.mod2) e2 <- resid(nba.mod2) shapiro.test(e2) library(lmtest) bptest(log(Weight) ~ Height,studentize=FALSE) nba.mod3 <- lm(log(Weight) ~ factor(Height)-1) anova(nba.mod2,nba.mod3)