sg1 <- read.csv("http://www.stat.ufl.edu/~winner/data/shotgun_spread.csv")
attach(sg1); names(sg1)

## OLS and Weighted Least Squares for Cartridge Type 1 using lm Function
## Weights = 1/variance of measurements at the distance

Y1 <- sqrtSprd[cartridge==1]
X1 <- range[cartridge==1]
WT1 <- 1/(sdGrp[cartridge==1])^2
n1 <- length(Y1)

# cbind(Y1,X1)

mod1.ols <- lm(Y1 ~ X1)
summary(mod1.ols)
anova(mod1.ols)

mod1.wls <- lm(Y1 ~ X1, weight=WT1)
summary(mod1.wls)
anova(mod1.wls)

## Matrix Form for Weighted Least Squares
## Note that for TSS and SSR, P0=(1/n)J is replaced with 
##        P0*=X0*(X0*'X0*)^(-1)X0*'

W1 <- diag(sqrt(WT1))
X01 <- cbind(rep(1,n1),X1)
Xs1 <- W1 %*% X01
Ys1 <- W1 %*% Y1

beta_WLS <- solve(t(Xs1) %*% Xs1) %*% t(Xs1) %*% Ys1

e_WLS <- Ys1 - Xs1 %*% beta_WLS
SSE_WLS <- t(e_WLS) %*% e_WLS
s2_WLS <- SSE_WLS/(n1-ncol(Xs1))
V_beta_WLS <- s2_WLS[1,1] * solve(t(Xs1) %*% Xs1)
SE_beta_WLS <- sqrt(diag(V_beta_WLS))
t_beta_WLS <- beta_WLS / SE_beta_WLS
p_beta_WLS <- 2*(1-pt(abs(t_beta_WLS),n1-ncol(Xs1)))
LB_beta_WLS <- beta_WLS + qt(.025,n1-ncol(Xs1)) * SE_beta_WLS
UB_beta_WLS <- beta_WLS + qt(.975,n1-ncol(Xs1)) * SE_beta_WLS

WLS_out <- cbind(beta_WLS, SE_beta_WLS, t_beta_WLS, p_beta_WLS,
     LB_beta_WLS, UB_beta_WLS)
colnames(WLS_out) <- c("Estimate","Std. Err.","t","P>|t|","Lower","Upper")
rownames(WLS_out) <- c("Intercept", "RangeFire")
round(WLS_out, 4)

Xs0 <- Xs1[,1]
Ps0 <- Xs0 %*%  solve(t(Xs0) %*% Xs0) %*% t(Xs0)
Ps1 <- Xs1 %*%  solve(t(Xs1) %*% Xs1) %*% t(Xs1)
I1 <- diag(n1)

TSS_WLS <- t(Ys1) %*% (I1 - Ps0) %*% Ys1
SSR_WLS <- t(Ys1) %*% (Ps1 - Ps0) %*% Ys1

df_WLS <- rbind(ncol(Xs1)-1,n1-ncol(Xs1),n1-1)
SS_WLS <- rbind(SSR_WLS,SSE_WLS,TSS_WLS)
MS_WLS <- rbind(SS_WLS[1]/df_WLS[1],SS_WLS[2]/df_WLS[2], NA)
F_WLS <- rbind(MS_WLS[1]/MS_WLS[2],NA,NA)
p_WLS <- rbind(1-pf(F_WLS[1],df_WLS[1],df_WLS[2]),NA,NA)

WLS_anova <- cbind(df_WLS,SS_WLS,MS_WLS,F_WLS,p_WLS)
colnames(WLS_anova) <- c("df","SS","MS","F","P>F")
rownames(WLS_anova) <- c("Regression", "Error", "Total")
round(WLS_anova,4)

## Power Variance function estimate
## V{Y_i} = sigma^2 * mu_i^(2*delta)
## gls function gives sigma and delta (power)

library(nlme)
mod1.gls <- gls(Y1 ~ X1, weights=varPower(form = ~fitted(.)), method="ML")
summary(mod1.gls)
intervals(mod1.gls)

mod2.gls <- gls(Y1 ~ X1, method="ML")
summary(mod2.gls)
anova(mod2.gls, mod1.gls)


## Allow different variances by rangeFire group
rangeFireF <- factor(range[cartridge==1])
mod3.gls <- gls(Y1 ~ X1, weights=varIdent(form = ~ 1|rangeFireF), method="ML")
summary(mod3.gls)
# intervals(mod3.gls)

anova(mod3.gls, mod1.gls)

par(mfrow=c(2,2))
plot(resid(mod1.ols) ~ predict(mod1.ols))
plot(resid(mod1.wls) ~ predict(mod1.wls))
plot(resid(mod1.gls, type="p") ~ predict(mod1.gls))
plot(resid(mod3.gls, type="p") ~ predict(mod3.gls))
par(mfrow=c(1,1))


################################################################
### Matrix form for power model  ###############################
################################################################
### This needs to be changed (muhat^2) see newer program
## Preliminary OLS and starting values for variance parameters
n <- length(Y1)
X <- cbind(rep(1,n1),X1)
Y <- Y1
Vhat <- diag(n)
Vhat_I <- solve(Vhat)
beta_old <- solve(t(X) %*% Vhat_I %*% X) %*% t(X) %*% Vhat_I %*% Y
muhat <- X %*% beta_old
e <- Y - muhat 
log.s2_old <- log(sum(e^2)/n)
delta_old <- 0

## Set up theta vector, g=dl/dtheta vector, and G=d^2l/dtheta^2 matrix
theta.old <- matrix(c(log.s2_old,delta_old),ncol=1)
g.theta <- matrix(rep(0,2),ncol=1)
G.theta <- matrix(rep(0,4),ncol=2)

g.theta[1,1] <- -(n/2) + (1/2)*exp(-log.s2_old) *
    sum(e^2*exp(-delta_old*log(muhat^2)))
g.theta[2,1] <- -(1/2)*sum(log(muhat^2)) + (1/2)*exp(-log.s2_old) *
    sum(e^2*log(muhat^2)*exp(-delta_old*log(muhat^2)))

G.theta[1,1] <-  -(1/2)*exp(-log.s2_old) *
    sum(e^2*exp(-delta_old*log(muhat^2)))
G.theta[1,2] <- -(1/2)*exp(-log.s2_old) *
    sum(e^2*log(muhat^2)*exp(-delta_old*log(muhat^2)))
G.theta[2,1] <- G.theta[1,2]
G.theta[2,2] <- -(1/2)*exp(-log.s2_old)*
    sum(e^2*((log(muhat^2))^2)*exp(-delta_old*log(muhat^2)))

## theta.new <- theta.old - (G^(-1))g
theta.new <- theta.old - solve(G.theta) %*% g.theta

## Update elements of theta, V{Y_i}, V{Y}, inv(V{Y})
log.s2_old <- theta.new[1,1]
s2_old <- exp(theta.new[1,1])
delta_old <- theta.new[2,1]

sigma2.Y <- s2_old * ((muhat^2)^delta_old)
Vhat <- diag(sigma2.Y[,1])
Vhat_I <- solve(Vhat)

## Begin iteration process
delta.beta <- 100              ## Used to measure SS of differences in beta 
num.iter <- 0                  ## Counter for number of iterations

while (delta.beta > 0.000000001) {
num.iter <- num.iter + 1
beta_new <- solve(t(X) %*% Vhat_I %*% X) %*% t(X) %*% Vhat_I %*% Y

muhat <- X %*% beta_new
e <- Y - muhat

theta.old <- matrix(c(log.s2_old,delta_old),ncol=1)
g.theta <- matrix(rep(0,2),ncol=1)
G.theta <- matrix(rep(0,4),ncol=2)

g.theta[1,1] <- -(n/2) + (1/2)*exp(-log.s2_old) *
    sum(e^2*exp(-delta_old*log(muhat^2)))
g.theta[2,1] <- -(1/2)*sum(log(muhat^2)) + (1/2)*exp(-log.s2_old) *
    sum(e^2*log(muhat^2)*exp(-delta_old*log(muhat^2)))

G.theta[1,1] <-  -(1/2)*exp(-log.s2_old) *
    sum(e^2*exp(-delta_old*log(muhat^2)))
G.theta[1,2] <- -(1/2)*exp(-log.s2_old) *
    sum(e^2*log(muhat^2)*exp(-delta_old*log(muhat^2)))
G.theta[2,1] <- G.theta[1,2]
G.theta[2,2] <- -(1/2)*exp(-log.s2_old)*
    sum(e^2*((log(muhat^2))^2)*exp(-delta_old*log(muhat^2)))

theta.new <- theta.old - solve(G.theta) %*% g.theta

log.s2_old <- theta.new[1,1]
s2_old <- exp(theta.new[1,1])
delta_old <- theta.new[2,1]
sigma2.Y <- s2_old * ((muhat^2)^delta_old)
Vhat <- diag(sigma2.Y[,1])
Vhat_I <- solve(Vhat)

delta.beta <- t(beta_new - beta_old) %*% (beta_new - beta_old)
beta_old <- beta_new
}

## Obtain final EWLS estimates and summary
num.iter

beta_wls <- beta_new
muhat <- X %*% beta_wls
e <- Y - muhat 

g.theta <- matrix(rep(0,2),ncol=1)
G.theta <- matrix(rep(0,4),ncol=2)

g.theta[1,1] <- -(n/2) + (1/2)*exp(-log.s2_old) *
    sum(e^2*exp(-delta_old*log(muhat^2)))
g.theta[2,1] <- -(1/2)*sum(log(muhat^2)) + (1/2)*exp(-log.s2_old) *
    sum(e^2*log(muhat^2)*exp(-delta_old*log(muhat^2)))

G.theta[1,1] <-  -(1/2)*exp(-log.s2_old) *
    sum(e^2*exp(-delta_old*log(muhat^2)))
G.theta[1,2] <- -(1/2)*exp(-log.s2_old) *
    sum(e^2*log(muhat^2)*exp(-delta_old*log(muhat^2)))
G.theta[2,1] <- G.theta[1,2]
G.theta[2,2] <- -(1/2)*exp(-log.s2_old)*
    sum(e^2*((log(muhat^2))^2)*exp(-delta_old*log(muhat^2)))

theta.new <- theta.old - solve(G.theta) %*% g.theta
theta_wls <- theta.new

log.s2_new <- theta_wls[1,1]
s2_new <- exp(theta_wls[1,1])
delta_new <- theta_wls[2,1]
sigma2.Y <- s2_new * muhat^(2*delta_new)
Vhat <- diag(sigma2.Y[,1])
Vhat_I <- solve(Vhat)

## When computing V{b_wls}, scale by n/(n-p') as n was denominator in ML
V_b_wls <- (n/(n-ncol(X)))*solve(t(X) %*% Vhat_I %*% X)
SE_b_wls <- sqrt(diag(V_b_wls))
t_b_wls <- beta_wls / SE_b_wls
p_b_wls <- 2*(1-pt(abs(t_b_wls),n-ncol(X)))
sigma <- rbind(sqrt(exp(theta_wls[1,1])), NA)
delta <- rbind(theta_wls[2,1],NA)

power_wls_out <- cbind(beta_wls, SE_b_wls, t_b_wls, p_b_wls, sigma, delta)
colnames(power_wls_out) <- c("Estimate", "Std. Err.", "t", "P>|t|","sigma",
  "delta")
rownames(power_wls_out) <- c("Intercept","Range")
round(power_wls_out,6)

summary(mod1.gls)

Yhat_wls <- X %*% beta_wls

cbind(Yhat_wls, predict(mod1.gls))

V_b_wls
vcov(mod1.gls)


################################################################
### Matrix form for heterogeneous group variance model  ########
################################################################

## Preliminary OLS and starting values for variance parameters
n <- length(X1)
X <- cbind(rep(1,n),X1)
Y <- Y1
rangeFireF <- factor(rangeFire[cartType==1])
c <- length(unique(X1))            ## Number of distinct X-levels
n.grp <- as.vector(tapply(Y, rangeFireF, length))

Vhat <- diag(n)
Vhat_I <- solve(Vhat)
beta_old <- solve(t(X) %*% Vhat_I %*% X) %*% t(X) %*% Vhat_I %*% Y
muhat <- X %*% beta_old
e <- Y - muhat 
log.s2_old <- log(sum(e^2)/n)
log.m2_old <- rep(0,c-1)

## Set up theta vector, g=dl/dtheta vector, and G=d^2l/theta^2 matrix
theta.old <- matrix(c(log.s2_old,log.m2_old),ncol=1)
g.theta <- matrix(rep(0,c),ncol=1)
G.theta <- matrix(rep(0,c^2),ncol=c)

sse <- rep(0, c)
logmj2 <- rep(0,c)
n.cume <- 0
for (i1 in 1:c) {
  sse[i1] <- sum((e[(n.cume+1):(n.cume+n.grp[i1])])^2)
  n.cume <- n.cume+n.grp[i1]
  if (i1 > 1) {logmj2[i1] <- log.m2_old[i1-1]}
}

g.theta[1,1] <- -(n/2) + (1/2)*(exp(-log.s2_old)) * 
      sum(exp(-logmj2) * sse)

for (i2 in 2:c) {
  g.theta[i2,1] <- -(n.grp[i2]/2) + (1/2) * exp(-log.s2_old) *
    exp(-logmj2[i2]) * sse[i2]
}

G.theta[1,1] <-  -(1/2) * (exp(-log.s2_old)) * sum(exp(-logmj2) * sse)
for (i2 in 2:c) {
  G.theta[i2,i2] <- -(1/2) * exp(-log.s2_old) * exp(-logmj2[i2]) * sse[i2]
  G.theta[1,i2] <- G.theta[i2,1] <- G.theta[i2,i2] 
}


## theta.new <- theta.old - (G^(-1))g
theta.new <- theta.old - solve(G.theta) %*% g.theta

## Update elements of theta, V{Y_i}, V{Y}, inv(V{Y})
log.s2_old <- theta.new[1,1]
s2_old <- exp(theta.new[1,1])
log.m2_old <- theta.new[2:c,1]
sigma2.Y <- rep(0, n)

n.cume <- 0
sigma2.Y[1:n.grp[1]] <- s2_old
n.cume <- n.grp[1]

for (i1 in 2:c) {
  sigma2.Y[(n.cume+1):(n.cume+n.grp[i1])] <- s2_old * exp(log.m2_old[i1-1])
  n.cume <- n.cume + n.grp[i1]
}

Vhat <- diag(sigma2.Y)
Vhat_I <- solve(Vhat)

theta.old
g.theta
G.theta
sse
logmj2
theta.new
beta_old

## Begin iteration process
delta.beta <- 100              ## Used to measure SS of differences in beta 
num.iter <- 0                  ## Counter for number of iterations

while (delta.beta > 0.000000001) {
  num.iter <- num.iter + 1
  beta_new <- solve(t(X) %*% Vhat_I %*% X) %*% t(X) %*% Vhat_I %*% Y
#  beta_new <- beta_old

  muhat <- X %*% beta_new
  e <- Y - muhat

  log.s2_old <- theta.new[1,1]
  s2_old <- exp(theta.new[1,1])
  log.m2_old <- theta.new[2:c,1]

## Set up theta vector, g=dl/dtheta vector, and G=d^2l/dtheta^2 matrix
  theta.old <- matrix(c(log.s2_old,log.m2_old),ncol=1)
  g.theta <- matrix(rep(0,c),ncol=1)
  G.theta <- matrix(rep(0,c^2),ncol=c)

  sse <- rep(0, c)
  logmj2 <- rep(0,c)
  n.cume <- 0
  for (i1 in 1:c) {
    sse[i1] <- sum((e[(n.cume+1):(n.cume+n.grp[i1])])^2)
    n.cume <- n.cume+n.grp[i1]
    if (i1 > 1) {logmj2[i1] <- log.m2_old[i1-1]}
  }

  g.theta[1,1] <- -(n/2) + (1/2)*(exp(-log.s2_old)) * 
        sum(exp(-logmj2) * sse)

  for (i2 in 2:c) {
    g.theta[i2,1] <- -(n.grp[i2]/2) + (1/2) * exp(-log.s2_old) *
      exp(-logmj2[i2]) * sse[i2]
  }

  G.theta[1,1] <-  -(1/2) * (exp(-log.s2_old)) * sum(exp(-logmj2) * sse)
  for (i2 in 2:c) {
    G.theta[i2,i2] <- -(1/2) * exp(-log.s2_old) * exp(-logmj2[i2]) * sse[i2]
    G.theta[1,i2] <- G.theta[i2,1] <- G.theta[i2,i2] 
  }

  g.theta
  G.theta
  sse

  ## theta.new <- theta.old - (G^(-1))g
  theta.new <- theta.old - solve(G.theta) %*% g.theta
  theta.new

  log.s2_old <- theta.new[1,1]
  s2_old <- exp(theta.new[1,1])
  log.m2_old <- theta.new[2:c,1]
  sigma2.Y <- rep(0, n)

  n.cume <- 0
  sigma2.Y[1:n.grp[1]] <- s2_old
  n.cume <- n.grp[1]

  for (i1 in 2:c) {
    sigma2.Y[(n.cume+1):(n.cume+n.grp[i1])] <- s2_old * exp(log.m2_old[i1-1])
    n.cume <- n.cume + n.grp[i1]
  }

  Vhat <- diag(sigma2.Y)
  Vhat_I <- solve(Vhat)
  
  beta_new <- solve(t(X) %*% Vhat_I %*% X) %*% t(X) %*% Vhat_I %*% Y
  beta_new

  delta.beta <- t(beta_new - beta_old) %*% (beta_new - beta_old)
  beta_old <- beta_new
}

## Obtain final EWLS estimates and summary
num.iter
beta_wls <- beta_new
muhat <- X %*% beta_wls
e <- Y - muhat 

beta_wls


g.theta <- matrix(rep(0,c),ncol=1)
G.theta <- matrix(rep(0,c^2),ncol=c)

sse <- rep(0, c)
logmj2 <- c(1,rep(0,c-1))
n.cume <- 0
for (i1 in 1:c) {
  sse[i1] <- sum((e[(n.cume+1):(n.cume+n.grp[i1])])^2)
  n.cume <- n.cume+n.grp[i1]
  if (i1 > 1) {logmj2[i1] <- log.m2_old[i1-1]}
}

g.theta[1,1] <- -(n/2) + (1/2)*(exp(-log.s2_old)) * sum(exp(-logmj2) * sse)

for (i2 in 2:c) {
  g.theta[i2,1] <- -(n.grp[i2]/2) + (1/2) * exp(-log.s2_old) *
    exp(-logmj2[i2]) * sse[i2]
}

G.theta[1,1] <-  -(1/2) * (exp(-log.s2_old)) * sum(exp(-logmj2 * sse)
for (i2 in 2:c) {
  G.theta[i2,i2] <- -(1/2) * exp(-log.s2_old) * exp(-logmj2[i2]) * sse[i2]
  G.theta[1,i2] <- G.theta[i2,1] <- G.theta[i2,i2] 
}


## theta.new <- theta.old - (G^(-1))g
theta.new <- theta.old - solve(G.theta) %*% g.theta

log.s2_old <- theta.new[1,1]
s2_old <- exp(theta.new[1,1])
log.m2_old <- theta.new[2:c,1]
sigma2.Y <- rep(0, n)

n.cume <- 0
sigma2.Y[1:n.grp[1]] <- s2_old
n.cume <- n.grp[1]

for (i1 in 2:c) {
  sigma2.Y[(n.cume+1):(n.cume+n.grp[i1])] <- s2_old * exp(log.m2_old[i1-1])
  n.cume <- n.cume + n.grp[i1]
}

Vhat <- diag(sigma2.Y)
Vhat_I <- solve(Vhat)

## When computing V{b_wls}, scale by n/(n-p') as n was denominator in ML
V_b_wls <- (n/(n-ncol(X)))*solve(t(X) %*% Vhat_I %*% X)
SE_b_wls <- sqrt(diag(V_b_wls))
t_b_wls <- beta_wls / SE_b_wls
p_b_wls <- 2*(1-pt(abs(t_b_wls),n1-ncol(X)))
sigma <- rbind(sqrt(exp(theta.new[1,1])), NA)
m.grp <- rbind(t(sqrt(exp(theta.new[2:c,1]))),rep(NA,c-1))

power_wls_out <- cbind(beta_wls, SE_b_wls, t_b_wls, p_b_wls, sigma, 
  m.grp)
colnames(power_wls_out) <- c("Estimate", "Std. Err.", "t", "P>|t|","sigma",
  "m2", "m3", "m4", "m5")
rownames(power_wls_out) <- c("Intercept","RangeFire")
round(power_wls_out,4)

Yhat_wls <- X %*% beta_wls

# cbind(Yhat_wls, predict(mod3.gls))

V_b_wls
vcov(mod3.gls)

summary(mod3.gls)