options nodate nonumber ps=55 ls=80; title 'RPD -- Exercise 12.7 -- Watershed Flow Data (Box-Cox)'; data watershed; input area areaimperv slope flow absorbency capacity infiltration rainfall raintime peakrate; /* log-transform all independent variables, as suggested in exercise: */ area = log(area); areaimperv = log(areaimperv); slope = log(slope); flow = log(flow); absorbency = log(absorbency); capacity = log(capacity); infiltration = log(infiltration); rainfall = log(rainfall); raintime = log(raintime); datalines; 0.03 0.006 3.0 1 70 1.5 0.25 1.75 2.0 46 0.03 0.006 3.0 1 70 1.5 0.25 2.25 3.7 28 0.03 0.006 3.0 1 70 1.5 0.25 4.00 4.2 54 0.03 0.021 3.0 1 80 1.0 0.25 1.60 1.5 70 0.03 0.021 3.0 1 80 1.0 0.25 3.10 4.0 47 0.03 0.021 3.0 1 80 1.0 0.25 3.60 2.4 112 0.13 0.005 6.5 2 65 2.0 0.35 1.25 0.7 398 0.13 0.005 6.5 2 65 2.0 0.35 2.30 3.5 98 0.13 0.005 6.5 2 65 2.0 0.35 4.25 4.0 191 0.13 0.008 6.5 2 68 0.5 0.15 1.45 2.0 171 0.13 0.008 6.5 2 68 0.5 0.15 2.60 4.0 150 0.13 0.008 6.5 2 68 0.5 0.15 3.90 3.0 331 1.00 0.023 15.0 10 60 1.0 0.20 0.75 1.0 772 1.00 0.023 15.0 10 60 1.0 0.20 1.75 1.5 1268 1.00 0.023 15.0 10 60 1.0 0.20 3.25 4.0 849 1.00 0.023 15.0 10 65 2.0 0.20 1.80 1.0 2294 1.00 0.023 15.0 10 65 2.0 0.20 3.10 2.0 1984 1.00 0.023 15.0 10 65 2.0 0.20 4.75 6.0 900 3.00 0.039 7.0 15 67 0.5 0.50 1.75 2.0 2181 3.00 0.039 7.0 15 67 0.5 0.50 3.25 4.0 2484 3.00 0.039 7.0 15 67 0.5 0.50 5.00 6.5 2450 5.00 0.109 6.0 15 62 1.5 0.60 1.50 1.5 1794 5.00 0.109 6.0 15 62 1.5 0.60 2.75 3.0 2067 5.00 0.109 6.0 15 62 1.5 0.60 4.20 5.0 2586 7.00 0.055 6.5 19 56 2.0 0.50 1.80 2.0 2410 7.00 0.055 6.5 19 56 2.0 0.50 3.25 4.0 1808 7.00 0.055 6.5 19 56 2.0 0.50 5.25 6.0 3024 7.00 0.063 6.5 19 56 1.0 0.50 1.25 2.0 710 7.00 0.063 6.5 19 56 1.0 0.50 2.90 3.4 3181 7.00 0.063 6.5 19 56 1.0 0.50 4.76 5.0 4279 ; proc reg; model peakrate = area areaimperv slope flow absorbency capacity infiltration rainfall raintime; plot residual.*predicted. / nomodel nostat; run; proc iml; use watershed; read all; Y = peakrate; X = j(nrow(Y),1)||area||areaimperv||slope||flow||absorbency|| capacity||infiltration||rainfall||raintime; XTXINV = inv(X`*X); YDOT = exp(sum(log(Y))/nrow(Y)); do LAMBDA = -1.0 to 1.0 by 0.05; if round(LAMBDA,0.05) = 0 then YTRANS = YDOT#log(Y); else YTRANS = (Y##LAMBDA-1)/(LAMBDA*YDOT##(LAMBDA-1)); RESID = YTRANS - X*(XTXINV*(X`*YTRANS)); LAMBDAS = LAMBDAS//LAMBDA; /* "//" concatenates vertically */ SSRES = SSRES//ssq(RESID); /* "ssq" is sum of squares */ end; print LAMBDAS [format=5.2] SSRES [format=9.0]; NU = nrow(Y)-ncol(X); CI_95LIM = min(SSRES) # (1 + tinv(1-0.05/2,NU)##2/NU); print CI_95LIM; create boxcoxres var {LAMBDAS SSRES}; append; quit; proc gplot; plot SSRES*LAMBDAS / vaxis = 0 to 1000000 by 100000 vref = 306525.5; run; /* Log transformation (LAMBDA = 0) is almost within the confidence limit, but not quite. */ data watershed; set watershed; transpeakrate = peakrate**(-0.15); run; proc reg; model transpeakrate = area areaimperv slope flow absorbency capacity infiltration rainfall raintime; plot residual.*predicted. / nomodel nostat; run; /* Note that the transformation is actually a decreasing function. Therefore, a negative coefficient for a variable implies that an increase in that variable is associated with increase in peakrate (all else constant). */