# RPD -- Chapter 7 -- Variable Selection: Linthurst Data linthurst <- read.table("linthurst.dat", row.names=1, col.names=c("obsnum","loc","type","biomass", "salinity","pH","K","Na","Zn")) library(leaps) # provides functions regsubsets and leaps; # may need to install this: install.packages("leaps") linthurst.model <- regsubsets(biomass ~ salinity + pH + K + Na + Zn, data=linthurst, nbest=10, nvmax=5) summary(linthurst.model) plot(linthurst.model, scale="r2") leaps.out <- with(linthurst, leaps(cbind(salinity, pH, K, Na, Zn), biomass, method="r2", nbest=10)) get(getOption("device"))() plot(leaps.out$size, leaps.out$r2, xlab="p' = p + 1", ylab="R-square") leaps.out <- with(linthurst, leaps(cbind(salinity, pH, K, Na, Zn), biomass, method="adjr2", nbest=1)) get(getOption("device"))() plot(leaps.out$size, (1-leaps.out$adjr2)*var(linthurst$biomass), type="b", xlab="p' = p + 1", ylab="MS(Res)") get(getOption("device"))() plot(leaps.out$size, leaps.out$adjr2, type="b", xlab="p' = p + 1", ylab="Adjusted R-square") leaps.out <- with(linthurst, leaps(cbind(salinity, pH, K, Na, Zn), biomass, method="Cp", nbest=1)) get(getOption("device"))() plot(leaps.out$size, leaps.out$Cp, xlab="p' = p + 1", ylab="Cp") abline(0,1) # Cp = p' line leaps.out <- with(linthurst, leaps(cbind(salinity, pH, K, Na, Zn), biomass, method="r2", nbest=1)) n <- nrow(linthurst) SSRes <- (1-leaps.out$r2)*var(linthurst$biomass)*(n-1) AIC <- n*log(SSRes) + 2*leaps.out$size - n*log(n) SBC <- n*log(SSRes) + log(n)*leaps.out$size - n*log(n) get(getOption("device"))() plot(leaps.out$size, AIC, type="b", xlab="p' = p + 1", ylab="AIC") get(getOption("device"))() plot(leaps.out$size, SBC, type="b", xlab="p' = p + 1", ylab="SBC") linthurst.model <- lm(biomass ~ salinity + pH + K + Na + Zn, data=linthurst) # The function step() stops when it can no longer decrease AIC, rather than # using a stopping rule based on SLE and/or SLS. Thus, the final models may # differ from those produced by SAS(R) PROC REG. start.model <- lm(biomass ~ 1, data=linthurst) step(start.model, biomass ~ salinity + pH + K + Na + Zn, direction="forward") step(linthurst.model, direction="backward") step(start.model, biomass ~ salinity + pH + K + Na + Zn, direction="both") library(boot) # provides function cv.glm for cross-validation final.model <- glm(biomass ~ pH + Na, data=linthurst) MSEP.est <- cv.glm(linthurst,final.model)$delta[1] # average squared prediction error # from leave-one-out cross-validation PRESS <- nrow(linthurst)*MSEP.est # PRESS statistic print(PRESS)