# US Highway Fatality Rate (per million vehicle miles) --- 1945-1984 # Data Source: DASL (U.S. and New Mexico Highway Deaths) fatality <- data.frame(year = 1945:1984, rate = c(11.3, 9.8, 8.8, 8.1, 7.5, 7.6, 7.5, 7.4, 7.0, 6.3, 6.4, 6.4, 6.0, 5.6, 5.4, 5.3, 5.2, 5.3, 5.5, 5.7, 5.5, 5.7, 5.5, 5.4, 5.3, 4.9, 4.7, 4.5, 4.3, 3.6, 3.4, 3.3, 3.4, 3.4, 3.5, 3.5, 3.3, 2.9, 2.7, 2.7)) fatality <- transform(fatality, lograte = log(rate)) plot(rate ~ year, type="l", data=fatality) dev.new() plot(lograte ~ year, type="l", data=fatality) y <- fatality$lograte n <- length(y) X <- cbind(1,fatality$year) # Start with OLS analysis: XtXinv <- solve(t(X)%*%X) betahat <- XtXinv%*%(t(X)%*%y) yhat <- X%*%betahat e <- y - yhat s2.betahat <- XtXinv * sum(e^2)/(n-2) se.slope <- sqrt(s2.betahat[2,2]) print(betahat) print(s2.betahat) print(se.slope) # Create lagged residuals for plot and computations: e.t <- e[2:n] e.t.1 <- e[1:(n-1)] # Plot lag-1 residuals for original fit: dev.new() plot(e.t.1, e.t) abline(h=0, v=0) # Perform GLS analysis with estimated AR(1) covariance: rhohat <- drop(t(e.t)%*%e.t.1) / sqrt(sum(e.t^2)*sum(e.t.1^2)) print(rhohat) V <- toeplitz(rhohat^(0:(n-1))) / (1 - rhohat^2) # "toeplitz" creates a symmetric diagonally-banded matrix print(round(V[1:10,1:10],1)) # for display purposes only TT <- t(chol(V)) # one particular TT such that TT %*% t(TT) = V y.star <- solve(TT,y) X.star <- solve(TT,X) XtXinv.G <- solve(t(X.star)%*%X.star) betahat.G <- XtXinv.G%*%(t(X.star)%*%y.star) e.star <- y.star - X.star%*%betahat.G s2.betahat.G <- XtXinv.G * sum(e.star^2)/(n-2) se.slope.G <- sqrt(s2.betahat.G[2,2]) print(betahat.G) print(s2.betahat.G) print(se.slope.G) e.star.t <- e.star[2:n] e.star.t.1 <- e.star[1:(n-1)] rhohat.star <- drop(t(e.star.t)%*%e.star.t.1) / sqrt(sum(e.star.t^2)*sum(e.star.t.1^2)) print(rhohat.star) # Plot lag-1 residuals for transformed fit: dev.new() plot(e.star.t.1, e.star.t) abline(h=0, v=0) yhat.G <- X%*%betahat.G # Note: This is NOT the most computationally efficient method for # autoregressive model computations. Specialized time-series methods # are much more efficient. dev.new() plot(lograte ~ year, type="l", data=fatality) lines(fatality$year, yhat, lty=2, col=2) lines(fatality$year, yhat.G, lty=3, col=3) legend("topright", c("lograte","yhat","yhat.G"), lty=1:3, col=1:3)