Processing math: 1%

\newcommand{\var}{\mathrm{Var}} \newcommand{\epsi}{\varepsilon} \newcommand{\phii}{\varphi} \newcommand{\tra}{^{\top}} \newcommand{\sumin}{\sum_{i=1}^n} \newcommand{\sumiN}{\sum_{i=1}^n} \newcommand{\norm}[1]{\left\Vert{#1} \right\Vert} \newcommand\Bigpar[1]{\left( #1 \right )} \newcommand\Bigbr[1]{\left[ #1 \right ]} \newcommand\Bigcr[1]{\left\{ #1 \right \}} \newcommand\SetB[1]{\left\{ #1 \right\}} \newcommand\Sett[1]{\mathcal{#1}} \newcommand{\Data}{\mathcal{D}} \newcommand{\Ubr}[2]{\underbrace{#1}_{\text{#2}}} \newcommand{\Obr}[2]{ \overbrace{#1}^{\text{#2}}} \newcommand{\sumiN}{\sum_{i=1}^N} \newcommand{\dydx}[2]{\frac{\partial #1}{\partial #2}} \newcommand\Indic[1]{\mathds{1}_{#1}} \newcommand{\Realm}[1]{\mathbb{R}^{#1}} \newcommand{\Exp}[1]{\mathbb{E}\left[#1\right]} \newcommand{\Expt}[2]{\mathbb{E}_{#1}\left[#2\right]} \newcommand{\Var}[1]{\mathbb{V}\left[#1\right]} \newcommand{\Covar}[1]{\text{Cov}\left[#1\right]} \newcommand{\Prob}[1]{\mathbf{Pr}\left(#1\right)} \newcommand{\Supp}[1]{\text{Supp}\left[#1\right]} \newcommand{\doyx}{\Prob{Y \, |\,\mathsf{do} (X = x)}} \newcommand{\doo}[1]{\Prob{Y |\,\mathsf{do} (#1) }} \newcommand{\R}{\mathbb{R}} \newcommand{\Z}{\mathbb{Z}} \newcommand{\wh}[1]{\widehat{#1}} \newcommand{\wt}[1]{\widetilde{#1}} \newcommand{\wb}[1]{\overline{#1}} \newcommand\Ol[1]{\overline{#1}} \newcommand\Ul[1]{\underline{#1}} \newcommand\Str[1]{#1^{*}} \newcommand{\F}{\mathsf{F}} \newcommand{\ff}{\mathsf{f}} \newcommand{\Cdf}[1]{\mathbb{F}\left(#1\right)} \newcommand{\Cdff}[2]{\mathbb{F}_{#1}\left(#2\right)} \newcommand{\Pdf}[1]{\mathsf{f}\left(#1\right)} \newcommand{\Pdff}[2]{\mathsf{f}_{#1}\left(#2\right)} \newcommand{\dd}{\mathsf{d}} \newcommand\Normal[1]{\mathcal{N} \left( #1 \right )} \newcommand\Unif[1]{\mathsf{U} \left[ #1 \right ]} \newcommand\Bern[1]{\mathsf{Bernoulli} \left( #1 \right )} \newcommand\Binom[1]{\mathsf{Bin} \left( #1 \right )} \newcommand\Pois[1]{\mathsf{Poi} \left( #1 \right )} \newcommand\BetaD[1]{\mathsf{Beta} \left( #1 \right )} \newcommand\Diri[1]{\mathsf{Dir} \left( #1 \right )} \newcommand\Gdist[1]{\mathsf{Gamma} \left( #1 \right )} \def\mbf#1{\mathbf{#1}} \def\mrm#1{\mathrm{#1}} \def\mbi#1{\boldsymbol{#1}} \def\ve#1{\mbi{#1}} \def\vee#1{\mathbf{#1}} \newcommand{\Mat}[1]{\mathbf{#1}} \newcommand{\eucN}[1]{\norm{#1}} \newcommand{\lzero}[1]{\norm{#1}_0} \newcommand{\lone}[1]{\norm{#1}_1} \newcommand{\ltwo}[1]{\norm{#1}_2} \newcommand{\pnorm}[1]{\norm{#1}_p}

#' weighted OLS (can handle negative weights)
OLSw = \(y, X, w) {
    XtWX = car::wcrossprod(X, w = w)
    XtWy = car::wcrossprod(X, y, w = w)
    solve(XtWX) %*% XtWy
}

#' IPW and AIPW
#' @param y outcome vector
#' @param w treatment dummy
#' @param X1 matrix of covariates for propensity score
#' @param X2 matrix of covariates in outcome model
#' @param f family for propensity score

drHT = \(y, w, X1, X2 = cbind(1, X1), f = binomial(), estimand = "ATE"){
    # solve for pscore weights
    pihat = glm.fit(X1, w, family = f)$fitted.values
    if(estimand == "ATE"){
        wt = sqrt(w/pihat + (1-w)/(1-pihat))
    } else if(estimand == "ATT"){
        rho = mean(w)
        wt = sqrt(
            w +                                       # all treat obs weighted as 1
            ((1-w) * pihat) / (rho * (1-pihat) )      # control obs weights
        )
    }
    if(is.null(X2)) X2 = matrix(rep(1, nrow(X1)))
    # run weighted regression
    OLSw(y, as.matrix(cbind(w, X2)), wt)[1,1]
}

Average Treatment Effect (ATE)

To implement AIPW as a weighted regression, we fit

Y_i = \alpha + \tau W_i + X_i ' \beta + \epsilon_i

with weights

\lambda_i = \sqrt{ \frac{W_i}{\pi(X_i)} + \frac{1-W_i}{1- \pi(X_i)} }

data(lalonde.exp); data = setDT(lalonde.exp)
yn = 're78'; wn = 'treat'; xn = setdiff(colnames(data), c(yn, wn))
y = data[[yn]]; w = data[[wn]]; X = data[, ..xn]

drHT(y, w, X, X2 = NULL)
##    w 
## 1674
drHT(y, w, X)
##    w 
## 1642

inference using bootstrap

bootf = \(d, i, yn, wn, xn){
    y = d[[yn]][i]; w = d[[wn]][i]; X = as.matrix(d[i, ..xn])
    c(  # naive
        mean(y[w==1]) - mean(y[w==0]),
        # ipw
        drHT(y, w, X, X2 = NULL),
        # aipw
        drHT(y, w, X)
    )
}

boot(
    lalonde.exp,
    bootf,
    yn = 're78', wn = 'treat', xn = setdiff(colnames(data), c(yn, wn)),
    R = 1000,
    parallel = "multicore", ncpus = 6
)
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = lalonde.exp, statistic = bootf, R = 1000, yn = "re78", 
##     wn = "treat", xn = setdiff(colnames(data), c(yn, wn)), parallel = "multicore", 
##     ncpus = 6)
## 
## 
## Bootstrap Statistics :
##     original  bias    std. error
## t1*     1794  -6.055       673.4
## t2*     1674  -5.217       659.9
## t3*     1642  -3.170       688.1

Average Treatment effect on the Treated (ATT )

Next, we give all treatment units weights of 1 and control units receive weights

\lambda_i = \sqrt{ \frac{\pi(\vee{X}_i)}{\rho} \frac{1 - W_i}{1 - \pi(\vee{X}_i)} }

where \rho = \Prob{W_i = 1}.

data(lalonde.psid); data = setDT(lalonde.psid)
yn = 're78'; wn = 'treat'; xn = setdiff(colnames(data), c(yn, wn))
y = data[[yn]]; w = data[[wn]]; X = data[, ..xn]

drHT(y, w, X, estimand = "ATT")
##    w 
## 1958