options nodate nonumber ps=55 ls=80; title 'US Highway Fatality Rate (per million vehicle miles) --- 1945-1984'; /* Data Source: DASL (U.S. and New Mexico Highway Deaths) */ data fatality; input year rate; lograte = log(rate); datalines; 1945 11.3 1946 9.8 1947 8.8 1948 8.1 1949 7.5 1950 7.6 1951 7.5 1952 7.4 1953 7.0 1954 6.3 1955 6.4 1956 6.4 1957 6.0 1958 5.6 1959 5.4 1960 5.3 1961 5.2 1962 5.3 1963 5.5 1964 5.7 1965 5.5 1966 5.7 1967 5.5 1968 5.4 1969 5.3 1970 4.9 1971 4.7 1972 4.5 1973 4.3 1974 3.6 1975 3.4 1976 3.3 1977 3.4 1978 3.4 1979 3.5 1980 3.5 1981 3.3 1982 2.9 1983 2.7 1984 2.7 ; proc gplot; plot rate*year; run; plot lograte*year; run; proc iml; use fatality; read all; Y = lograte; N = nrow(Y); X = j(N,1)||year; /* Start with OLS analysis: */ XTXINV = inv(X`*X); BETAHAT = XTXINV*(X`*Y); YHAT = X*BETAHAT; E = Y - YHAT; S2_BETAHAT = XTXINV # ssq(E)/(N-2); SE_SLOPE = sqrt(S2_BETAHAT[2,2]); print BETAHAT S2_BETAHAT SE_SLOPE; /* Create data for lagged-residuals plot and correlation estimate: */ E_T = E[2:N]; E_T_1 = E[1:(N-1)]; create lagres var {E_T E_T_1}; append; /* Perform GLS analysis with estimated AR(1) covariance: */ RHOHAT = E_T`*E_T_1 / sqrt(ssq(E_T)#ssq(E_T_1)); print RHOHAT; V = toeplitz(RHOHAT##(0:(N-1))) / (1 - RHOHAT##2); /* "toeplitz" creates a symmetric diagonally-banded matrix */ V10 = V[1:10,1:10]; /* for display purposes only */ print V10 [format = 3.1]; T = root(V)`; /* one particular T such that T*T` = V */ Y_STAR = solve(T,Y); X_STAR = solve(T,X); XTXINV_G = inv(X_STAR`*X_STAR); BETAHAT_G = XTXINV_G*(X_STAR`*Y_STAR); E_STAR = Y_STAR - X_STAR*BETAHAT_G; S2_BETAHAT_G = XTXINV_G # ssq(E_STAR)/(N-2); SE_SLOPE_G = sqrt(S2_BETAHAT_G[2,2]); print BETAHAT_G S2_BETAHAT_G SE_SLOPE_G; E_STAR_T = E_STAR[2:N]; E_STAR_T_1 = E_STAR[1:(N-1)]; RHOHAT_STAR = E_STAR_T`*E_STAR_T_1 / sqrt(ssq(E_STAR_T)#ssq(E_STAR_T_1)); print RHOHAT_STAR; create translagres var {E_STAR_T E_STAR_T_1}; append; YHAT_G = X*BETAHAT_G; create fittedy var {lograte year YHAT YHAT_G}; append; /* Note: This is NOT the most computationally efficient method for autoregressive model computations. Specialized time-series methods are much more efficient. */ quit; /* Plot lag-1 residuals for original fit: */ symbol value=dot interpol=none; proc gplot data=lagres; plot e_t*e_t_1 / href = 0 vref = 0; run; /* Plot lag-1 residuals for transformed fit: */ proc gplot data=translagres; plot e_star_t*e_star_t_1 / href = 0 vref = 0; run; /* Plot data and fitted values: */ symbol interpol=join value=none width=2; proc gplot data=fittedy; plot (lograte yhat yhat_g)*year / overlay legend; run;