Q1: 9.12

S <- 10^(-3)*matrix(c(11.072, 8.019, 8.160,
                      8.019, 6.417, 6.005,
                      8.160, 6.005, 6.773), 
                    nrow = 3, ncol = 3)

Sn <- (23/24)*S # n = 24, Convert to Sn for M.L.E.

\(\mathbf{S_n} = \frac{23}{24} \mathbf{S} = \begin{pmatrix} 0.01061 & 0.00768 & 0.00782 \\ 0.00768 & 0.00615 & 0.00575 \\ 0.00782 & 0.00575 & 0.00649 \\ \end{pmatrix}\)

(a) Specific Variances

L <- c(.1022, .0752, .0765)
LL <- matrix(L, ncol = 1) %*% matrix(L, nrow = 1)
Psi <- diag(Sn - matrix(L, ncol = 1) %*% matrix(L, nrow = 1))

\(\mathbf{S_n} \approx \hat{\mathbf{L}} \hat{\mathbf{L}}^T + \hat{\mathbf{\Psi}}\), where \(diag(\mathbf{S_n}) = diag(\hat{\mathbf{L}} \hat{\mathbf{L}}^T) + diag(\hat{\mathbf{\Psi}})\)

Hence, \(\hat{\mathbf{\Psi}} = diag(\hat{\mathbf{\Psi}}) = diag(\mathbf{S_n} - \hat{\mathbf{L}} \hat{\mathbf{L}}^T)\)

\(\hat{\mathbf{\Psi}} = \begin{pmatrix} 0.000166 & 0.000000 & 0.000000 \\ 0.000000 & 0.000495 & 0.000000 \\ 0.000000 & 0.000000 & 0.000639 \\ \end{pmatrix}\)

(b) Communalities

\(\sigma_{ii} = \ell_{i1}^2 + \ell_{i2}^2 + ~...~ + \ell_{im}^2 + \psi_i\)

\(h_i^2 = \ell_{i1}^2 + \ell_{i2}^2 + ~...~ + \ell_{im}^2 = 0.022\)

(c) Proportion of variance explained by the factor

p <-  sum(L^2) / (diag(Sn) %>% sum())

\(\frac{s_{11} + s_{22} + ~...~ + s_{pp}}{h_i^2} = 0.9441\)

(d) Residual Matrix

Residual <- Sn - LL - Psi 

$ - ^T - =

xm(Residual, 6) %_% "$"
\[\begin{pmatrix} 0.000000 & -0.000166 & -0.000164 \\ -0.000495 & 0.000000 & -0.000493 \\ -0.000637 & -0.000637 & 0.000000 \\ \end{pmatrix}\]

$

Q2: 9.32

library(readr)
library(psych)
bulls <- read_table2("T1-10.dat", col_names = FALSE)
colnames(bulls) <- c("Breed", "SalePr", "YrHgt", 
                     "FtFrBody", "PrctFFB", "Frame",
                     "BkFat", "SaleHt", "SaleWt")
bulls <- bulls[,-c(1,2)]
source("fa_functions.R") # for formating loading mt

(a) S PC

fa_S_pc <- principal(bulls, 
              nfactors = 3, 
              rotate = 'varimax', 
              covar = TRUE)
fa_S_pc[["loadings"]][,] %>% round(3) %>%
    format_loading_matrix(criterion = 30, markdown = T) %>%
    kable("markdown",align = 'c', escape = F)
RC1 RC2 RC3
YrHgt 0.566 0.734 0.870
FtFrBody 31.179 83.935 24.022
PrctFFB 0.601 1.488 2.742
Frame 0.296 0.39 0.415
BkFat 0.014 -0.006 -0.055
SaleHt 1.021 0.934 0.820
SaleWt 122.551 39.068 -17.480

(b) S ML

fa_S_ml <- fa(bulls, nfactors=3, 
              rotate="varimax", covar = T,
              fm="ml", scores="regression")
fa_S_ml[["loadings"]][,] %>% round(3) %>%
    format_loading_matrix(criterion = 30, markdown = T) %>%
    kable("markdown",align = 'c', escape = F)
ML1 ML2 ML3
YrHgt -0.001 0.581 1.629
FtFrBody 21.802 84.458 31.383
PrctFFB -0.253 2.155 1.056
Frame 0.019 0.307 0.817
BkFat 0.033 -0.015 -0.027
SaleHt 0.498 0.841 1.532
SaleWt 119.136 33.925 38.8

(c) R PC

fa_R_pc <- principal(bulls, 
              nfactors = 3, 
              rotate = 'varimax', 
              covar = F)
fa_R_pc[["loadings"]][,] %>% round(3) %>%
    format_loading_matrix(criterion = 0.65, markdown = T) %>%
    kable("markdown",align = 'c', escape = F)
RC1 RC3 RC2
YrHgt 0.941 0.27 -0.082
FtFrBody 0.447 0.794 0.205
PrctFFB 0.262 0.859 -0.295
Frame 0.938 0.219 -0.028
BkFat -0.231 -0.339 0.812
SaleHt 0.833 0.419 0.109
SaleWt 0.352 0.43 0.722

(d) R ML

fa_R_ml <- fa(bulls, nfactors=3, 
              rotate="varimax", covar = F, 
              fm="ml", scores="regression")
fa_R_ml[["loadings"]][,] %>% round(3) %>%
    format_loading_matrix(criterion = 0.6, markdown = T) %>%
    kable("markdown",align = 'c', escape = F)
ML1 ML2 ML3
YrHgt 0.941 0.286 0.164
FtFrBody 0.414 0.505 0.553
PrctFFB 0.231 0.947 0.212
Frame 0.891 0.251 0.18
BkFat -0.256 -0.514 0.273
SaleHt 0.755 0.269 0.434
SaleWt 0.253 -0.05 0.879

(e) Compare S & R

(f) scatter plots of factor2 vs factor1 in (a) and (c)

(a)

library(ggplot2)
fa_S_pc[["scores"]] %>% as.data.frame() %>%
    ggplot()+
    geom_point(aes(x=RC1, y=RC2), size=1)+
    labs(x="Factor 1", y="Factor 2", title="FA with S",
         caption="Principle component method")

(c)

library(ggplot2)
fa_R_pc[["scores"]] %>% as.data.frame() %>%
    ggplot()+
        geom_point(aes(x=RC1, y=RC2), size=1)+
        labs(x="Factor 1", y="Factor 2", title="FA with R",
             caption="Principle component method")

Q3

(a) Appropriate number of factors

library(readr)
drinks <- read_table2("drinks.DAT", col_names = FALSE, skip = 2)
colnames(drinks) <- c("ID", "BRAND",
                      paste("X",1:10, sep=""))
drinks <- drinks[-nrow(drinks),]
library(psych)
fa_R_pc2 <- principal(drinks[,3:12], 
              nfactors = 4, 
              rotate = 'varimax', 
              covar = F)
fa_R_pc2[["values"]] %>% 
    cbind(X=1:10) %>% as.data.frame() %>%
    ggplot()+
        geom_point(aes(x=X, y=.), size=2) +
        scale_x_continuous(breaks = 1:10)+
        labs(x="i", y="eigenvalue",
             title="Scree Plot")

By the unity criterion we use 4 factors1.

source("fa_functions.R")
fa_R_pc2[["loadings"]][,] %>% round(3) %>%
    format_loading_matrix(criterion = 0.5, markdown = T) %>%
    kable("markdown",align = 'c', escape = F)
RC4 RC1 RC2 RC3
X1 0.131 -0.215 0.905 -0.044
X2 0.953 0.212 0.05 -0.023
X3 0.293 0.899 -0.165 -0.043
X4 0.027 -0.091 0.955 0.054
X5 0.933 0.261 0.111 -0.076
X6 -0.083 -0.043 0.041 0.994
X7 0.228 0.918 -0.23 -0.028
X8 0.057 -0.224 0.935 0.051
X9 0.935 0.276 0.073 -0.048
X10 0.273 0.896 -0.199 -0.016

(b) Label the Factors

(c) Average factor scores of each brand

br1 <- which(drinks$BRAND == "1") #index for brand 1
br2 <- which(drinks$BRAND == "2")
br3 <- which(drinks$BRAND == "3")
br4 <- which(drinks$BRAND == "4")
br5 <- which(drinks$BRAND == "5")
br6 <- which(drinks$BRAND == "6")
fs1 <- fa_R_pc2[["scores"]][br1,] %>% colMeans() %>% t() %>%
    as.data.frame()
fs2 <- fa_R_pc2[["scores"]][br2,] %>% colMeans() %>% t() %>%
    as.data.frame()
fs3 <- fa_R_pc2[["scores"]][br3,] %>% colMeans() %>% t() %>%
    as.data.frame() 
fs4 <- fa_R_pc2[["scores"]][br4,] %>% colMeans() %>% t() %>%
    as.data.frame()
fs5 <- fa_R_pc2[["scores"]][br5,] %>% colMeans() %>% t() %>%
    as.data.frame()
fs6 <- fa_R_pc2[["scores"]][br6,] %>% colMeans() %>% t() %>%
    as.data.frame()

avg_fs <- rbind(fs1, fs2, fs3, fs4, fs5, fs6) %>% 
    as.data.frame()
rownames(avg_fs) <-  c("Pepsi", "Coke", "Gatorade",
                    "Allsport", "Lipton tea", "Nestea")

Average factor scores

kable(avg_fs, "markdown", align = 'c',
      digits = 3,
      col.names= c("F1", "F2", "F3", "F4"),
      row.names = T, 
      caption = "Average Factor Scores of each brand")
F1 F2 F3 F4
Pepsi -0.629 -1.008 -0.107 0.354
Coke -1.215 -0.403 0.194 -0.177
Gatorade 0.420 0.928 -0.980 -0.077
Allsport -0.177 0.666 -1.074 -0.181
Lipton tea 0.885 0.085 0.613 0.031
Nestea 0.615 0.122 0.821 -0.044

(d)

avg_fs <- cbind(avg_fs, rownames(avg_fs))
colnames(avg_fs) <- c("F1", "F2", "F3", "F4", "Brand")

### F1, F2
ggplot(avg_fs[,c(1,2, 5)],aes(x=F1, y=F2, color=Brand)) +
    geom_point() +
    geom_text(aes(label=Brand),
              hjust=0.5, vjust=-0.4, size = 3) +
    theme(legend.position = 'none')

### F1, F3
ggplot(avg_fs[,c(1,3, 5)],aes(x=F1, y=F3, color=Brand)) +
    geom_point() +
    geom_text(aes(label=Brand),
              hjust=0.5, vjust=-0.4, size = 3) +
    theme(legend.position = 'none')