bbo <- read.csv("http://www.stat.ufl.edu/~winner/sta4210/mydata/bollywood_boxoffice.csv", header=T) attach(bbo) names(bbo) X <- Budget Y <- Gross n <- length(Y) X0 <- rep(1,n) X <- as.matrix(cbind(X0,X)) # Form the X-matrix (n=55 rows, 2 Cols) Y <- as.matrix(Y,ncol=1) # Form the Y-vector (n=55 rows, 1 col) # Notes: t(X) = transpose of X, %*% = matrix multiplication, solve(A) = A^(-1) (XX <- t(X) %*% X) # Obtain X'X matrix (2 rows, 2 cols) (XY <- t(X) %*% Y) # Obtain X'Y vector (2 rows, 1 col) (XXI <- solve(XX)) # Obtain (X'X)^(-1) matrix (2 rows, 2 cols) (b <- XXI %*% XY) # Obtain b-vector (2 rows, 1 col) Y_hat <- X %*% b # Obtain the vector of fitted values (n=55 rows, 1 col) e <- Y - Y_hat # Obtain the vector of residuals (n=55 rows, 1 col) print(cbind(Y_hat,e)) H <- X %*% XXI %*% t(X) # Obtain the Hat matrix J_n <- matrix(rep(1/n,n^2),ncol=n) # Obtain the (1/n)J matrix (n=55 rows, n=55 cols) I_n <- diag(n) # Obtain the identity matrix (n=55 rows, n=55 cols) (SSTO <- t(Y) %*% (I_n - J_n) %*% Y) # Obtain Total Sum of Squares (SSTO) # SSTO can also be computed as: # (SSTO <- (t(Y) %*% Y) - (t(Y) %*% (I_n - J_n) %*% Y)) (SSE <- t(Y) %*% (I_n - H) %*% Y) # Obtain Error Sum of Squares (SSE) # SSE can also be computed as: # (SSE <- (t(Y) %*% Y) - (t(b) %*% XY)) (SSR <- t(Y) %*% (H - J_n) %*% Y) # Obtain Regression Sum of Squares (SSR) # SSR can also be computed as: # (SSR <- (t(b) %*% XY) - (t(Y) %*% J_n %*% Y)) (MSE <- SSE/(n-2)) # Obtain MSE = s^2 (s2_b <- MSE[1,1] * XXI) # Obtain s^2{b}, must use MSE[1,1] and * to do scalar multiplication (X_h <- matrix(c(1,20),ncol=1)) # Create X_h vector, for case where budget=20 (Y_hat_h <- t(X_h) %*% b) # Obtain the fitted value when budget=20 (s2_yhat_h <- t(X_h) %*% s2_b %*% X_h) # Obtain s^2{Y_hat_h} (s2_pred <- MSE + (t(X_h) %*% s2_b %*% X_h)) # Obtain s^2{pred}