Minimal example - simulated data (from ebal documentation)
- solving for weights in ebal
- solving for weights in CVXR
  - compare weights
Entropy Balancing for causal inference

rm(list = ls())
library(LalRUtils)
libreq(CVXR, tidyverse, data.table, ebal, tictoc, knitr)

##      wants        loaded
## [1,] "CVXR"       TRUE  
## [2,] "tidyverse"  TRUE  
## [3,] "data.table" TRUE  
## [4,] "ebal"       TRUE  
## [5,] "tictoc"     TRUE  
## [6,] "knitr"      TRUE

knitr::opts_chunk$set(fig.width=12, fig.height=8,
                      echo=TRUE, warning=FALSE, message=FALSE, cache = TRUE)

CVXR is a disciplined convex programming (DCP) domain-specific-language (DSL) that allows rapid prototyping of estimators that solve convex optimization problems. This writeup illustrates the implementation of Entropy Balancing (Hainmueller 2012) in CVXR.

https://cvxr.rbind.io/

Choose balancing weights \(w_i\) that solves

\[\begin{align*} \max_{\mathbf{w}} H(w) &= - \sum_{i : D_i = 0} w_i \log w_i \\ \text{Balance constraints:} & \sum_{i : D_i = 0} w_i c_{ri}(\mathbf{X}_i) = m_r \text{ with } r \in 1, \dots, R \\ \text{'Proper' weights:} & \sum_{i : D_i = 0} w_i = 1 \; \; \text{and} w_i \geq 0 \; \forall \; \{i: D = 0 \} \end{align*}\]

Hainmueller (2012) solves the dual of this problem in ebal. Here, we implement the math directly to illustrate the rapid translation of math into code.

Minimal example - simulated data (from ebal documentation)

Simulated data where treated observations have mean 0.5, and control obs have mean 0.

# %% # create a toy dataset (50 controls, 30 treated)
d <- c(rep(0,50),rep(1,30))
# 3 covariates normals, treated have higher means
X           <- rbind(replicate(3,rnorm(50, 0)),
                     replicate(3,rnorm(30, .5)))
colnames(X) <- paste("x",1:3,sep="")
df = data.table(d, X)

solving for weights in `ebal`

# %% # entropy balancing - original
eb.out <- ebalance(Treatment=df$d, X=df[, 2:4])

## Converged within tolerance

# %% # means in reweighted control group data match treated
rbind(
  # means in T and C groups
  df[, lapply(.SD, mean), by = d],
  # means in reweighted control group data
  data.table(d = -1, t(apply(df[d==0, 2:4], 2, weighted.mean, w=eb.out$w)))
)

##     d      x1       x2         x3
## 1:  0 0.08333 -0.09338 -0.0007558
## 2:  1 0.42659  0.17099  0.3293803
## 3: -1 0.39624  0.17093  0.3090740

bottom row is reweighted mean of control Xs using ebal weights

solving for weights in `CVXR`

solving for weights in CVXR involves writing the optimisation problem almost verbatim as code, and running it. the entr ‘atom’ is an internal CVXR shortcut to evaluate \(-w_i \log w_i\). Several other similar atoms exist (for computing norms, and so on).

# balance with treatment means
b = df[d == 1, lapply(.SD, mean)][1, 2:4] |> as.numeric()
# control observations
X = df[d == 0, 2:4] |> as.matrix()
# %% manual entropy balancing
ω = Variable(df[d == 0, .N])                  # weights are of length n_0
objective <- Maximize(sum(entr(ω)))           # entropy objective fn
constraints <- list(
    ω >= 0, sum(ω) == 1,  # proper weights
    t(X) %*% ω == b       # balance
  )
prob <- Problem(objective, constraints)
result <- solve(prob, solver = "MOSEK")
ω_hat <- result$getValue(ω)

# %%
rbind(
  # means in T and C groups
  df[, lapply(.SD, mean), by = d],
  # means in reweighted control group data
  data.table(d = -1, t(apply(df[d==0, 2:4], 2, weighted.mean, w=ω_hat)))
)

##     d      x1       x2         x3
## 1:  0 0.08333 -0.09338 -0.0007558
## 2:  1 0.42659  0.17099  0.3293803
## 3: -1 0.42659  0.17099  0.3293803

compare weights

# %% compare solutions
cbind(eb.out$w / sum(eb.out$w), ω_hat)  |> head()

##         [,1]    [,2]
## [1,] 0.06431 0.06844
## [2,] 0.01413 0.01330
## [3,] 0.03443 0.03532
## [4,] 0.01662 0.01609
## [5,] 0.04143 0.04430
## [6,] 0.01488 0.01494

Entropy Balancing for causal inference

libreq(causalsens, glue, lfe)

##      wants        loaded
## [1,] "causalsens" TRUE  
## [2,] "glue"       TRUE  
## [3,] "lfe"        TRUE

data(lalonde.exp); data(lalonde.psid);
dtlalonde = data.table(lalonde.exp)
dtpsid = data.table(lalonde.psid)
y = 're78'
w =  'treat'
x = setdiff(colnames(dtpsid), c(y, w))

using `ebal` weights in regression

# %% ebal reg
eb_reg = function(df, y, w, ctrls){
  setorderv(df, w)
  X = df[, ..ctrls]; w = df[[w]]; y = df[[y]]
  out_eb <- ebalance(Treatment= w, X= X, print.level = -1)
  wt1 = c(out_eb$w, w[w==1])
  eff = felm(y ~ w, weights = wt1) %>% robustify()
  summary(eff)$coefficients[2,1:2]
}

solving for ebal weights in `CVXR`

eb_reg2 = function(df, y, w, x){
  setorderv(df, w)
  # treated and control obs
  ctrl  = df[eval(parse(text = glue("{w} == 0")))]
  treat = df[eval(parse(text = glue("{w} == 1")))]
  # balance constraints - means of all Xs in treated group
  b = treat[, lapply(.SD, mean), .SDcols = x]  |> as.numeric()
  X = ctrl[, ..x] |> as.matrix() # matrix of control obs
  # solve optimisation problem in CVXR
  ω = Variable(nrow(ctrl))                      # weights are of length n_0
  objective <- Maximize(sum(entr(ω)))           # entropy objective fn
  constraints <- list(
      ω >= 0, sum(ω) == 1,  # proper weights
      t(X) %*% ω     == b       # balance
  )
  prob <- Problem(objective, constraints)
  result <- solve(prob, solver = "MOSEK")
  ω_hat <- result$getValue(ω)
  # line up weights (solved weights for control units and 1/n1 for treat units)
  ω_all = c(ω_hat, rep(1/nrow(df), nrow(treat) ) )
  treatment = df[[w]]; y = df[[y]]
  eff = felm(y ~ treatment, weights = ω_all)  |> robustify()
  summary(eff)$coefficients[2,1:2]
}

demo

rbind(
  eb_reg(dtlalonde, y, w, x),
  eb_reg2(dtlalonde, y, w, x)
)

##      Estimate Std. Error
## [1,]     1712      699.2
## [2,]     1712      699.2

rbind(
  eb_reg(dtpsid, y, w, x),
  eb_reg2(dtpsid, y, w, x)
)

##      Estimate Std. Error
## [1,]     2425      876.5
## [2,]     2425      876.5

solutions are identical.

Convex Optimization for reweighting estimators

Apoorva Lal

December 09, 2021

Minimal example - simulated data (from ebal documentation)

solving for weights in `ebal`

solving for weights in `CVXR`

compare weights

Entropy Balancing for causal inference

using `ebal` weights in regression

solving for ebal weights in `CVXR`

demo

Convex Optimization for reweighting estimators

Apoorva Lal

December 09, 2021

Minimal example - simulated data (from ebal documentation)

solving for weights in ebal

solving for weights in CVXR

compare weights

Entropy Balancing for causal inference

using ebal weights in regression

solving for ebal weights in CVXR

demo

solving for weights in `ebal`

solving for weights in `CVXR`

using `ebal` weights in regression

solving for ebal weights in `CVXR`