nfl <- read.csv("http://www.stat.ufl.edu/~winner/sta4702/nfl_cancor.csv")
attach(nfl); names(nfl)

## Standardize all variables
zheight <- (height - mean(height)) / sd(height)
zarmlen <- (armlen - mean(armlen)) / sd(armlen)
zwt <- (wt - mean(wt)) / sd(wt)
zhandlen <- (handlen - mean(handlen)) / sd(handlen)
ztime40 <- (time40 - mean(time40)) / sd(time40)
zbench <- (bench - mean(bench)) / sd(bench)
zvjump <- (vjump - mean(vjump)) / sd(vjump)
zbjump <- (bjump - mean(bjump)) / sd(bjump)

## length(zheight); length(zarmlen); length(zwt); length(zhandlen)
## Z1 = Standardized Body Dimensions, Z2 = Standardized Physical Performance
Z1 <- cbind(zheight,zarmlen,zwt,zhandlen)
Z2 <- cbind(ztime40,zbench,zvjump,zbjump)

## X1 = Body Dimensions, X2 = Physical Performance (Original Scale)
X1 <- nfl[,2:5]
X2 <- nfl[,6:9]
p <- ncol(X1); q <- ncol(X2)

## Obtain canonical correlation analysis
(nfl.cc <- cancor(X1,X2,xcenter=TRUE,ycenter=TRUE))

## Obtain R and the 4 submatrices corresponding to X1 and X2
R <- cor(cbind(X1,X2))
round(R,4)

R11 <- R[1:p,1:p]
R12 <- R[1:p,(p+1):(p+q)]
R21 <- R[(p+1):(p+q),1:p]
R22 <- R[(p+1):(p+q),(p+1):(p+q)]

## Obtain eigenvalues and eigenvectors of R11 
## Obtain R11^(-1/2) and R11^(-1) by Spectral Decomposition
R11.lam <- eigen(R11)$val
R11.e   <- eigen(R11)$vec
R11.m12 <- matrix(rep(0,p^2),ncol=p)
R11.m1  <- matrix(rep(0,p^2),ncol=p)
for (i1 in 1:p) {
R11.m12 <- R11.m12 + (1/sqrt(R11.lam[i1])) * (R11.e[,i1] %*% t(R11.e[,i1]))
R11.m1 <-  R11.m1 + (1/R11.lam[i1]) * (R11.e[,i1] %*% t(R11.e[,i1]))
}
solve(R11.m12 %*% R11.m12)   ## check that R11^(-1/2) works

## Obtain eigenvalues and eigenvectors of R22 
## Obtain R22^(-1/2) and R22^(-1) by Spectral Decomposition
R22.lam <- eigen(R22)$val
R22.e   <- eigen(R22)$vec
R22.m12 <- matrix(rep(0,q^2),ncol=q)
R22.m1 <-  matrix(rep(0,q^2),ncol=q)
for (i1 in 1:q) {
R22.m12 <- R22.m12 + (1/sqrt(R22.lam[i1])) * (R22.e[,i1] %*% t(R22.e[,i1]))
R22.m1  <- R22.m1 + (1/R22.lam[i1]) * (R22.e[,i1] %*% t(R22.e[,i1]))
}

## Compute R11^(-1/2) x R12 x R22^(-1) x R21 x R11^(-1/2)   {1}
## Compute R22^(-1/2) x R21 x R11^(-1) x R12 x R22^(-1/2)   {2}

CC.mat1 <- R11.m12 %*% R12 %*% R22.m1 %*% R21 %*% R11.m12
CC.mat2 <- R22.m12 %*% R21 %*% R11.m1 %*% R12 %*% R22.m12

## Obtain the eigenvalues and eigenvectors of {1} and {2}
## e_k and f_k are the sets of eigenvectors of {1} and {2}, respectively
## rho_1^*2 >= ...>= rho_p^*2 are non-zero eigenvalues of {1}
##    "         "      "      are largest     "        of {2}

CC.mat1.lam <- eigen(CC.mat1)$val; CC.mat1.e <- eigen(CC.mat1)$vec
CC.mat2.lam <- eigen(CC.mat2)$val; CC.mat2.f <- eigen(CC.mat2)$vec

cbind(CC.mat1.lam, sqrt(CC.mat1.lam))
cbind(CC.mat2.lam, sqrt(CC.mat2.lam))

## Set up A matrix with row i being transpose of ith eigenvector of {1}
##  "   " B    "     "    " "   "       "      "  "       "      "  {2}
A <- matrix(rep(0,p^2),ncol=p)     # A_{pxp}
B <- matrix(rep(0,q^2),ncol=q)     # B_{qxq}

for (i1 in 1:p)  {
A[i1,] <- t(t(R11.m12) %*% CC.mat1.e[,i1])
}

for (i1 in 1:q)  {
B[i1,] <- t(t(R22.m12) %*% CC.mat2.f[,i1])
}

A %*% R11 %*% t(A)     # A*R11*A' = I_p
B %*% R22 %*% t(B)     # B*R22*B' = I_q

## Print out A and B
A.out <- A
rownames(A.out) <- c("a1'","a2'","a3'","a4'")
round(A.out,5)

B.out <- B                                                     
rownames(B.out) <- c("b1'","b2'","b3'","b4'")
round(B.out,5)


## U_{nxp} = Z1 * A'   V_{nxq} = Z2 * B'
U <- Z1 %*% t(A)
V <- Z2 %*% t(B)

## COV{U,Z1}, COV{V,Z2}  - Slide 7
(COV_U_Z1 <- A %*% R11)
(COV_V_Z2 <- B %*% R22)
cor(U[,1], Z1[,1]); cor(U[,1], Z1[,2])     ## Brute Force
cor(U[,2], Z1[,1]); cor(U[,2], Z1[,2])     ## Brute Force

## CORR{U1,V1}, CORR{U2,V2}
cor(U[,1], V[,1]); cor(U[,2], V[,2])     ## Brute Force

## COV{U,V} - Slide 12
(COV_U_V <- A %*% R12 %*% t(B))

## Matrix Errors of Approximation - Slide 13
CC.rho <- matrix(rep(0,p*q), ncol=q)  ## Set up diag matrix of canonical corrs
for (i in 1:p)  CC.rho[i,i] <- sqrt(CC.mat1.lam[i])

AI <- solve(A);  BI <- solve(B)
AI1 <- matrix(AI[,1],ncol=1)
AI2 <- matrix(AI[,2],ncol=1)
AI3 <- matrix(AI[,3],ncol=1)
AI4 <- matrix(AI[,4],ncol=1)

BI1 <- matrix(BI[,1],ncol=1)
BI2 <- matrix(BI[,2],ncol=1)
BI3 <- matrix(BI[,3],ncol=1)
BI4 <- matrix(BI[,4],ncol=1)

## This is messed up (Only R12 part)
(AI %*% CC.rho %*% t(BI)) - R12
AI %*% t(AI) - R11
BI %*% t(BI) - R22

R11 - AI1 %*% t(AI1)
R11 - AI1 %*% t(AI1) - AI2 %*% t(AI2)
R11 - AI1 %*% t(AI1) - AI2 %*% t(AI2) - AI3 %*% t(AI3)
R11 - AI1 %*% t(AI1) - AI2 %*% t(AI2) - AI3 %*% t(AI3) - AI4 %*% t(AI4)

R22 - BI1 %*% t(BI1)
R22 - BI1 %*% t(BI1) - BI2 %*% t(BI2)
R22 - BI1 %*% t(BI1) - BI2 %*% t(BI2) - BI3 %*% t(BI3)
R22 - BI1 %*% t(BI1) - BI2 %*% t(BI2) - BI3 %*% t(BI3) - BI4 %*% t(BI4)

## Must add not subtract due to signs - still does not sum to 0
R12 + (sqrt(CC.mat1.lam[1]) * AI1 %*% t(BI1) + 
       sqrt(CC.mat1.lam[2]) * AI2 %*% t(BI2) + 
       sqrt(CC.mat1.lam[3]) * AI3 %*% t(BI3) +
       sqrt(CC.mat1.lam[4]) * AI4 %*% t(BI4))

## Proportions of Sample Variance Explained - (pp. 561-563,Slide 14)
sum(diag(AI1 %*% t(AI1))) / p                                     # R11.1
(sum(diag(AI1 %*% t(AI1))) + sum(diag(AI2 %*% t(AI2)))) / p       # R11.2
(sum(diag(AI1 %*% t(AI1))) + sum(diag(AI2 %*% t(AI2))) + 
     sum(diag(AI3 %*% t(AI3)))) / p                               # R11.3
(sum(diag(AI1 %*% t(AI1))) + sum(diag(AI2 %*% t(AI2))) + 
     sum(diag(AI3 %*% t(AI3))) + sum(diag(AI4 %*% t(AI4)))) / p   # R11.4

sum(diag(BI1 %*% t(BI1))) / q                                     # R22.1
(sum(diag(BI1 %*% t(BI1))) + sum(diag(BI2 %*% t(BI2)))) / q       # R22.2
(sum(diag(BI1 %*% t(BI1))) + sum(diag(BI2 %*% t(BI2))) + 
     sum(diag(BI3 %*% t(BI3)))) / q                               # R22.3
(sum(diag(BI1 %*% t(BI1))) + sum(diag(BI2 %*% t(BI2))) + 
     sum(diag(BI3 %*% t(BI3))) + sum(diag(BI4 %*% t(BI4)))) / q   # R22.4

# Matrix form
AIAI <- t(AI) %*% AI 
BIBI <- t(BI) %*% BI

prop.Z1 <- rep(0,p);  prop.Z2 <- rep(0,q)
prop.Z1[1] <- AIAI[1,1]/p; prop.Z2[1] <- BIBI[1,1]/p
for (r in 2:p) prop.Z1[r] <- sum(diag(AIAI[1:r,1:r]))/p
for (r in 2:q) prop.Z2[r] <- sum(diag(BIBI[1:r,1:r]))/q

prop.out <- cbind(t(prop.Z1),t(prop.Z2))
colnames(prop.out) <- c("Z1,r=1","r=2","r=3","r=4","Z2,r=1","r=2","r=3","r=4")
round(prop.out,4)

#### Large-Sample Tests (pp. 563-566)

## Test  Sigma_12 = Rho_12 = 0
n <- nrow(Z1)

## Bartlett Multiplier
TS1 <- -(n-1-0.5*(p+q+1))

## Product of (1 - rho_i^*2) from 1 - eigenvalues of {1}
TS2 <- 1
for (i in 1:p) TS2 <- TS2*(1-CC.mat1.lam[i])

## Likelihood Ratio Test
TS <- TS1 * log(TS2)
TS.DF <- p*q
TS.CV <- qchisq(.95,TS.DF)
TS.PV <- 1-pchisq(TS,TS.DF)

test.out <- cbind(TS,TS.DF,TS.CV,TS.PV)
colnames(test.out) <- c("Test Stat","DF","X2(.05)","Pr>TS")
round(test.out,4)

## Test  rho_1^* ^= 0,  rho_2^* = .. = rho_p^* = 0
TS2.rho <- rep(1,p)
TS.DF <- rep(0,p)
for (i1 in 1:p) {
TS.DF[i1] <- (p-(i1-1))*(q-(i1-1))
for (i2 in i1:p) {
TS2.rho[i1] <- TS2.rho[i1] * (1-CC.mat1.lam[i2])
}}

TS <- TS1*log(TS2.rho)
TS.CV <- qchisq(.95,TS.DF)
TS.PV <- 1-pchisq(TS,TS.DF)

test.out <- cbind(1:4,TS,TS.DF,TS.CV,TS.PV)
colnames(test.out) <- c("k+1","Test Stat","DF","X2(.05)","Pr>TS")
round(test.out,4)