Entropy Balancing Performance Considerations

library(ebal); library(knitr)

## Loading required package: torch

## ##
## ## ebal: Implements Entropy Balancing.

# modified dummy function from documentation
trial_fn = function(k) {
  n0 = 5 * k; n1 = 3 * k
  treatment = c(rep(0, n0), rep(1, n1))
  X = rbind(replicate(3, rnorm(n0, 0)), replicate(3, rnorm(n1, .5)))
  status = try({
    old_time = system.time({
      ebout1 = ebalance(Treatment = treatment, X = X, method = "GaussNewton")
    })[3]
    }, silent = TRUE)
  # gauss newton fails often
  if(inherits(status, "try-error")) {
      old_time = NA
      cat("GN did not converge", k, "\n");
      ebout1 = list(w = rep(1, n0))
    }
  new_time = system.time({
    ebout2 = ebalance(Treatment = treatment, X = X, method = "AutoDiff")
  })[3]
  list(
    problem_size = n0 + n1,
    # means in reweighted control group data
    max_imbalance_old = max(abs(
                colMeans(X[treatment == 1, ]) -
                apply(X[treatment == 0, ], 2, weighted.mean, w = ebout1$w)
              ))  %>% round(3),
            # estimates from new ebal
    max_imbalance_new = max(abs(
            colMeans(X[treatment == 1, ]) -
            apply(X[treatment == 0, ], 2, weighted.mean, w = ebout2$w)
          )) %>% round(3),
    old_time = round(unname(old_time),3),
    new_time = round(unname(new_time),3)
  )
}

trial_fn(1e2)

## $problem_size
## [1] 800
## 
## $max_imbalance_old
## [1] 0
## 
## $max_imbalance_new
## [1] 0
## 
## $old_time
## [1] 0.002
## 
## $new_time
## [1] 0.016

Summarize for growing problem size.

sapply(10^(1:7), trial_fn) %>% t %>% as.data.frame %>% kable()

problem_size	max_imbalance_old	old_time	new_time
80	0.001	0.001	0.017
800	0.001	0	0.009
8000	0	0.002	0.026
80000	0	0.027	0.368
8e+05	0	0.477	0.229
8e+06	0	4.255	1.281
8e+07	0	31.117	9.085

Apoorva Lal