## Read in data and assign variable names lpga2008 <- read.fwf("http://www.stat.ufl.edu/~winner/data/lpga2008.dat", width=c(30,8,8,8,8,8,8,8,8,8,8), col.names=c("golfer","drive","frwy","grnreg", "puttrnd","sandrnd","sandsv","przrnd","lnprzrnd","rounds","golferid")) attach(lpga2008) # lpga2008 ## Create new variables for analysis that remove outlying golfer(s) drive1 <- drive[grnreg >= 50] frwy1 <- frwy[grnreg >= 50] grnreg1 <- grnreg[grnreg >= 50] puttrnd1 <- puttrnd[grnreg >= 50] ## Create new data frame with only the 4 variables of interest lpga1 <- data.frame(drive1,frwy1,grnreg1,puttrnd1) detach(lpga2008) attach(lpga1) ## Center the variables by subtracting off the mean and save in dataframe drive1c <- drive1 - mean(drive1) frwy1c <- frwy1 - mean(frwy1) grnreg1c <- grnreg1 - mean(grnreg1) puttrnd1c <- puttrnd1 - mean(puttrnd1) lpga1c <- data.frame(drive1c,frwy1c,grnreg1c,puttrnd1c) ## Obtain Variable Means, Covariances (S, S_n), and Correlations - uncentered (n.lpga <- length(drive1)) (mean.lpga <- colMeans(lpga1)) (S_nml.lpga <- cov(lpga1)) (S_n.lpga <- ((n.lpga-1)/n.lpga) * S_nml.lpga) (R.lpga <- cor(lpga1)) ## Basic Scatterplot Matrix - Centered Data plot(lpga1c) ## More Elaborate Scatterplot Matrix - Centered Data (This can be slow) # install.packages("GGally") GGally::ggpairs(lpga1c) ## plot of Fairway versus Drive with original axes and rotated xylims <- 1.2*ceiling(max(abs(drive1c),abs(frwy1c))) plot(drive1c, frwy1c, pch=16, xlim=c(-xylims,xylims), ylim=c(-xylims,xylims), main="Fairway Percent vs Drive Distance with Rotated Axes -21.3 Degrees") abline(v=0, col="red", lty=2, lwd=2) abline(h=0, col="red", lty=1, lwd=2) x1seq <- seq(-xylims, xylims, length=200) x2seq <- seq(-xylims, xylims, length=200) (theta <- acos(eigen(S_n.lpga[1:2,1:2])$vector[1,1])) if (theta > pi/2) theta <- theta - pi x1til <- x1seq * tan(theta) x2til <- x1seq * tan(pi/2 + theta) lines(x1seq, x1til, col="purple", lty=1, lwd=2) lines(x1seq, x2til, col="purple", lty=2, lwd=2) legend(16,-16,c("x1","x2","x1~","x2~"),lty=c(1,2,1,2), col=c("red","red","purple","purple")) ### Plot Bivariate data with ellipse containing a proportion (.80 here) ## of the data points # install.packages("car") library(car) par(mfrow=c(2,1)) xylims <- 1.2*ceiling(max(abs(drive1c),abs(frwy1c))) dataEllipse(drive1c,frwy1c, levels=0.80,xlim=c(-xylims,xylims), ylim=c(-xylims,xylims), main="Fairway Percent vs Drive Distance") xylims <- 1.2*ceiling(max(abs(drive1c),abs(puttrnd1c))) dataEllipse(drive1c,puttrnd1c, levels=0.80,xlim=c(-xylims,xylims), ylim=c(-xylims,xylims), main="Putts per Round vs Drive Distance") par(mfrow=c(1,1)) ########################################################################## #### Ellipses of Points a Constant Statistical Distance from the Center ## #### on Rotated axes ## ########################################################################## #### Points a constant statistical distance from the origin: #### centered drive/fairway Based on Rotated Coordinates ## Set Center Point (Variables are centered, so it is (0,0)) ctr <- c(0,0) ## Set up a series of angles to be used for plotting ellipse angles <- seq(0, 2*pi, length.out=200) ## Use Covariance sub-matrix of S_n for X1,X2(Dist,Fairway) (A <- S_n.lpga[1:2 , 1:2]) # A <- S_n.lpga[c(1,2) , c(1,2)] ## Could be used if not "consecutive" vars ## Obtain Eigenvalues/Eigenvectors of A eigVal <- eigen(A)$values eigVec <- eigen(A)$vectors ## scale eigenvectors to length = square-root of eigenvalue eigScl <- eigVec %*% diag(sqrt(eigVal)) ## Set the points for the major and minor rotated axes xMat <- rbind(ctr[1] + eigScl[1, ], ctr[1] - eigScl[1, ]) yMat <- rbind(ctr[2] + eigScl[2, ], ctr[2] - eigScl[2, ]) ## Create points (at each angle) for normal (Base) and rotated (Rot) ellipses ## ellBase with 200 (# angles) rows and 2 columns (# variables/eigVals) ## ellRot with 2 rows and 200 columns ellBase <- cbind(sqrt(eigVal[1])*cos(angles), sqrt(eigVal[2])*sin(angles)) ellRot <- eigVec %*% t(ellBase) ## Plot the 2 rows from ellRot against each other, adding center points plot((ellRot+ctr)[1, ], (ellRot+ctr)[2, ], asp=1, type="l", lwd=2, xlim=c(-10,10),ylim=c(-10,10), main="Ellipse of Constant Stat Distance from Origin - LPGA Drive/Fairway", xlab="Drive Dist", ylab="Fairway Pct") ## Add lines for the major and minor axes matlines(xMat, yMat, lty=1, lwd=2, col="green") ## Add center point points(ctr[1], ctr[2], pch=4, col="red", lwd=3) #### Points a constant statistical distance from the origin: #### centered drive/putts per round Based on Rotated Coordinates ## Set Center Point (Variables are centered, so it is (0,0) ctr <- c(0,0) ## Set up a series of angles to be used for plotting ellipse angles <- seq(0, 2*pi, length.out=200) ## Use Covariance sub-matrix of S_n for X1,X4(Dist,Putt) (A <- S_n.lpga[c(1,4) , c(1,4)]) ## Obtain Eigenvalues/Eigenvectors of A eigVal <- eigen(A)$values eigVec <- eigen(A)$vectors ## scale eigenvectors to length = square-root of eigenvalue eigScl <- eigVec %*% diag(sqrt(eigVal)) ## Set the points for the major and minor rotated axes xMat <- rbind(ctr[1] + eigScl[1, ], ctr[1] - eigScl[1, ]) yMat <- rbind(ctr[2] + eigScl[2, ], ctr[2] - eigScl[2, ]) ## Create points (at each angle) for normal (Base) and rotated (Rot) ellipses ## Matrices with 200 (# angles) rows and 2 columns (# variables/eigVals) ellBase <- cbind(sqrt(eigVal[1])*cos(angles), sqrt(eigVal[2])*sin(angles)) ellRot <- eigVec %*% t(ellBase) ## Plot the 2 columns from ellRot against each other, adding center points plot((ellRot+ctr)[1, ], (ellRot+ctr)[2, ], asp=1, type="l", lwd=2, xlim=c(-10,10),ylim=c(-10,10), main="Ellipse of Constant Stat Distance from Origin - LPGA Drive/Putt", xlab="Drive Dist", ylab="Putts per Round") ## Add lines for the major and minor axes matlines(xMat, yMat, lty=1, lwd=2, col="blue") ## Add center point points(ctr[1], ctr[2], pch=4, col="orange", lwd=3)