First load the package
library(dplyr);
library(data.table)
library(FixedEffectjlr)
JULIA_HOME <- "/Applications/Julia-1.1.app/Contents/Resources/julia/bin/"
FixedEffect_setup(JULIA_HOME)
We are going to use the standard dataset:
dt <- Ecdat::Cigar %>% data.table
IFE is similar to a principal component analysis whenever there is no right hand side parameter. The standard specification is:
ife <-
FixedEffectInteract(dt, lhs = "sales", rhs = "0",
ife = "state+year", rank=1,
fe = "0")
To compare with the principal component procedure, we look at loadings and factors first and then the residuals.
ife_loadings <- unique(ife$dt_augment[, .(state, ife_loadings1=loadings1)])
ife_factors <- unique(ife$dt_augment[, .(year, ife_factors1=factors1)])
Before we can use prcomp
we need to transform the data from long to
wide:
dt_wide <- dt[, .(state, year, sales) ] %>%
dcast(year ~ state, value.var = "sales")
Now we are able to directly run the PC analysis. Note that IFE does not rescale the variable, so we are going to run it with both centering and rescaling options turned off:
pca_res <- prcomp(dt_wide[, -"year"], center=FALSE, scale=FALSE)
# keep loadings and factors separately
pca_loadings <- cbind(unique(dt[, .(state)]),
data.table(pca_res$rotation)[, .(pc_loadings1=PC1, pc_loadings2=PC2) ] )
pca_factors <- cbind(unique(dt[, .(year)]),
data.table(pca_res$x)[, .(pc_factors1 = PC1, pc_factors2 = PC2) ] )
# remerge to estimate residuals
dt_pca <- dt[, .(state, year, sales) ] %>%
merge(., pca_loadings, by = c("state") , all.x = T) %>%
merge(., pca_factors, by = c("year"), all.x = T)
Finally we compare loadings, factors and R2:
# loadings
dt_loadings <- merge(pca_loadings, ife_loadings, by = c("state"))
# are they proportional to each other (difference in scale)
dt_loadings[, .(state, prop_loadings1 = ife_loadings1/pc_loadings1)] %>% as_tibble
## # A tibble: 46 x 2
## state prop_loadings1
## <int> <dbl>
## 1 1 -4721
## 2 3 -4721
## 3 4 -4721
## 4 5 -4721
## 5 7 -4721
## 6 8 -4721
## 7 9 -4721
## 8 10 -4721
## 9 11 -4721
## 10 13 -4721
## # ... with 36 more rows
We do the same thing for factors
# factors
dt_factors <- merge(pca_factors, ife_factors, by = c("year"))
# are they proportional to each other (difference in scale)
dt_factors[, .(year, prop_factors1 = ife_factors1/pc_factors1)] %>% as_tibble
## # A tibble: 30 x 2
## year prop_factors1
## <int> <dbl>
## 1 63 -0.000212
## 2 64 -0.000212
## 3 65 -0.000212
## 4 66 -0.000212
## 5 67 -0.000212
## 6 68 -0.000212
## 7 69 -0.000212
## 8 70 -0.000212
## 9 71 -0.000212
## 10 72 -0.000212
## # ... with 20 more rows
Last note that the proportionality factors are actually the inverse of each other:
dt_loadings[, .(mean(ife_loadings1/pc_loadings1)) ] *
dt_factors[, .(mean(ife_factors1/pc_factors1))]
## V1
## 1: 1
And finally we look to see if residuals are the same:
# residuals for only one factor
dt_residuals <-
merge(ife$dt_augment[, .(state, year, ife_residuals=residuals)],
dt_pca[, .(state, year, pc_residuals=sales-pc_loadings1*pc_factors1)],
by = c("year", "state"))
dt_residuals[, .(ife_residuals = sum(ife_residuals^2),
pc_residuals = sum(pc_residuals^2)) ]
## ife_residuals pc_residuals
## 1: 242566.2 242566.2
We start with the PC analysis. Except that this time we allow for a centering:
pca_res <- prcomp(dt_wide[, -"year"], center=TRUE, scale=FALSE)
# keep loadings and factors separately
pca_loadings <- cbind(unique(dt[, .(state)]),
data.table(pca_res$rotation)[, .(pc_loadings1=PC1, pc_loadings2=PC2) ] )
pca_factors <- cbind(unique(dt[, .(year)]),
data.table(pca_res$x)[, .(pc_factors1 = PC1, pc_factors2 = PC2) ] )
# remerge to estimate residuals
dt_pca <- dt[, .(state, year, sales) ] %>%
merge(., pca_loadings, by = c("state") , all.x = T) %>%
merge(., pca_factors, by = c("year"), all.x = T)
This is similar to removing the within group average, which corresponds to group fixed effects. We obtain the centering in IFE through:
ife <-
FixedEffectInteract(dt, lhs = "sales", rhs = "0",
ife = "state+year", rank=1,
fe = "state")
ife_loadings <- unique(ife$dt_augment[, .(state, ife_loadings1=loadings1)])
ife_factors <- unique(ife$dt_augment[, .(year, ife_factors1=factors1)])
As in the previous example, we merge loadings, factors and compare R2:
# loadings
dt_loadings <- merge(pca_loadings, ife_loadings, by = c("state"))
# are they proportional to each other (difference in scale)
dt_loadings[, .(state, prop_loadings1 = ife_loadings1/pc_loadings1)] %>% as_tibble
## # A tibble: 46 x 2
## state prop_loadings1
## <int> <dbl>
## 1 1 488
## 2 3 488
## 3 4 488
## 4 5 488
## 5 7 488
## 6 8 488
## 7 9 488
## 8 10 488
## 9 11 488
## 10 13 488
## # ... with 36 more rows
# factors
dt_factors <- merge(pca_factors, ife_factors, by = c("year"))
# are they proportional to each other (difference in scale)
dt_factors[, .(year, prop_factors1 = ife_factors1/pc_factors1)] %>% as_tibble
## # A tibble: 30 x 2
## year prop_factors1
## <int> <dbl>
## 1 63 0.00205
## 2 64 0.00205
## 3 65 0.00205
## 4 66 0.00205
## 5 67 0.00205
## 6 68 0.00205
## 7 69 0.00205
## 8 70 0.00205
## 9 71 0.00205
## 10 72 0.00205
## # ... with 20 more rows
We confirm that the proportionality factors are actually the inverse of each other:
dt_loadings[, .(mean(ife_loadings1/pc_loadings1)) ] *
dt_factors[, .(mean(ife_factors1/pc_factors1))]
## V1
## 1: 1.000045
And finally we look to see if residuals are the same:
# residuals for only one factor
dt_residuals <-
merge(ife$dt_augment[, .(state, year, ife_residuals=residuals)],
dt_pca[, .(year, pc_residuals=sales-mean(sales)-pc_loadings1*pc_factors1),
by = .(state) ],
by = c("year", "state"))
dt_residuals[, .(ife_residuals = sum(ife_residuals^2),
pc_residuals = sum(pc_residuals^2)) ]
## ife_residuals pc_residuals
## 1: 174608.2 174608.2