Entropy Balancing Performance Considerations
Apoorva Lal
performance_comparisons.rmd## Loading required package: torch
## ##
## ## ebal: Implements Entropy Balancing.
# modified dummy function from documentation
trial_fn = function(k) {
n0 = 5 * k; n1 = 3 * k
treatment = c(rep(0, n0), rep(1, n1))
X = rbind(replicate(3, rnorm(n0, 0)), replicate(3, rnorm(n1, .5)))
status = try({
old_time = system.time({
ebout1 = ebalance(Treatment = treatment, X = X, method = "GaussNewton")
})[3]
}, silent = TRUE)
# gauss newton fails often
if(inherits(status, "try-error")) {
old_time = NA
cat("GN did not converge", k, "\n");
ebout1 = list(w = rep(1, n0))
}
new_time = system.time({
ebout2 = ebalance(Treatment = treatment, X = X, method = "AutoDiff")
})[3]
list(
problem_size = n0 + n1,
# means in reweighted control group data
max_imbalance_old = max(abs(
colMeans(X[treatment == 1, ]) -
apply(X[treatment == 0, ], 2, weighted.mean, w = ebout1$w)
)) %>% round(3),
# estimates from new ebal
max_imbalance_new = max(abs(
colMeans(X[treatment == 1, ]) -
apply(X[treatment == 0, ], 2, weighted.mean, w = ebout2$w)
)) %>% round(3),
old_time = round(unname(old_time),3),
new_time = round(unname(new_time),3)
)
}
trial_fn(1e2)## $problem_size
## [1] 800
##
## $max_imbalance_old
## [1] 0
##
## $max_imbalance_new
## [1] 0
##
## $old_time
## [1] 0.002
##
## $new_time
## [1] 0.016
Summarize for growing problem size.
| problem_size | max_imbalance_old | max_imbalance_new | old_time | new_time |
|---|---|---|---|---|
| 80 | 0.001 | 0 | 0.001 | 0.017 |
| 800 | 0.001 | 0 | 0 | 0.009 |
| 8000 | 0 | 0 | 0.002 | 0.026 |
| 80000 | 0 | 0 | 0.027 | 0.368 |
| 8e+05 | 0 | 0 | 0.477 | 0.229 |
| 8e+06 | 0 | 0 | 4.255 | 1.281 |
| 8e+07 | 0 | 0 | 31.117 | 9.085 |