Sala i Martin (1997)

library(LalRUtils)
libreq(tidyverse, data.table, foreach, magrittr, tictoc, readxl, doParallel,
  patchwork)
##      wants        loaded
## [1,] "tidyverse"  TRUE  
## [2,] "data.table" TRUE  
## [3,] "foreach"    TRUE  
## [4,] "magrittr"   TRUE  
## [5,] "tictoc"     TRUE  
## [6,] "readxl"     TRUE  
## [7,] "doParallel" TRUE  
## [8,] "patchwork"  TRUE
theme_set(lal_plot_theme())
set.seed(42)

data prep

if (!file.exists("millions.XLS")){
  download.file("http://www.columbia.edu/~xs23/data/millions.XLS", "millions.XLS")
}
vcodes = read_excel("millions.XLS", sheet = "Variable code") %>% setDT
colnames(vcodes) = c('rawname', 'name')
growth_dat = read_excel("millions.XLS", sheet = "BARROSHO") %>% setDT
growth_dat[, gamma := as.numeric(gamma)]
## Warning in eval(jsub, SDenv, parent.frame()): NAs introduced by coercion
perm = c("X1", "X2", "X3")
others = setdiff(paste0("X", 4:62), "X35")  # because 35 is missing for some reason

workhorse function

Then, for each variable \(z\), compute

\[ \hat{\beta}_z = \sum_{j = 1}^M \omega_{zj} \beta_{zj} \]

where the weights are

\[ \omega_{wj} = \frac{L_{zj}}{\sum_{i=1}^m L_{zi}} \]

the variance is computed analogously.

salaimartin = function(v){
  all_others = others[others != v] # all other vars
  rhses = t(combn(all_others, 3)) # combinations of all other vars
  formulae = apply(rhses, 1, function(x) paste(c(perm, v , x), collapse = " + "))
  # parallelise
  registerDoParallel(4)
  res = foreach(d=1:length(formulae), .combine='rbind') %dopar% {
    f = formulae[[d]]
    m = lm(as.formula(paste0('gamma ~', f)), data = growth_dat)
    β_j = coef(m)[v] ; σ_j = vcov(m)[v,v]
    c(β_j, σ_j, logLik(m))
  }
  stopImplicitCluster()
  res = data.frame(res)
  β = res[, 1]; σ = res[, 2]; L = res[, 3]
  sum_L = sum(L); ω = L / sum(L)
  β_j_bar = weighted.mean(β, ω)
  σ_j_bar = weighted.mean(σ, ω)
  CDF = ecdf(β)
  cdf0 = max(CDF(0), 1 - CDF(0))
  list(c(β_j_bar, σ_j_bar, cdf0), res)
}

Run

# %% run everything
tic()
results = map(others, salaimartin)
toc()

saveRDS(results, 'XSI_regs.rds')
results = readRDS('XSI_regs.rds')
vcodes2 = vcodes[rawname %in% others]
distplotter = function(k){
  out = results[[k]]
  sumstats = out[[1]]
  sims = out[[2]]
  colnames(sims) = c("beta", "SE", "LL")
  p = ggplot(sims, aes(x = beta)) + geom_density() +
    geom_vline(xintercept = sumstats[1], linetype = 'dotted', colour = 'red') +
    geom_vline(xintercept = 0, colour = 'cornflowerblue') +
    labs(title = vcodes2[k, 2]$name, x = '', y = '', subtitle = glue::glue("{round(sumstats[1], 2)}"))
  return(p)
}
map(1:length(results), distplotter) %>%
  wrap_plots(nrow = 12) +
  plot_annotation("Two Million Regressions!")