In [1]:
options(digits=2)
# R Imports Boilerplate
rm(list=ls())
In [23]:
library(LalRUtils)
load_or_install(c('tidyverse','data.table', 'stargazer','lfe', 'estimatr', 'systemfit', 
  'magrittr','Hmisc', 'rio', 'texreg', 'knitr', 'foreach', 'doParallel', 'gridExtra',
  'rdrobust', 'rdd', 'foreign'))
theme_set(lal_plot_theme())

exp_type = 'text'
  1. TRUE
  2. TRUE
  3. TRUE
  4. TRUE
  5. TRUE
  6. TRUE
  7. TRUE
  8. TRUE
  9. TRUE
  10. TRUE
  11. TRUE
  12. TRUE
  13. TRUE
  14. TRUE
  15. TRUE
  16. TRUE
  17. TRUE
In [3]:
root = '/home/alal/Dropbox/0_GradSchool/0_classes/450b/applied-econometrics-notes/'
data = paste0(root, 'data/')
In [4]:
#%%
# Read the data into a dataframe
pums = read.table(paste0(data, 'asciiqob.txt'),
                  header           = FALSE,
                  stringsAsFactors = FALSE)
names(pums) <- c('lwklywge', 'educ', 'yob', 'qob', 'pob')
pums$age <- 80 - pums$yob

Instrumental Variables

Structural Equations view

Endogeneity : $Cov(X_i, \epsilon_i) \neq 0$

Instrumental Variable requirements:

  • Relevance : $Cov(X_i, Z_i) \neq 0$
  • Exclusion Restriction: $Cov(Z_i, \epsilon_i) = 0$

IV estimation

$$ \beta_{IV} = \frac{Cov(Y_i,Z_i)}{Cov(X_i, Z_i)} = \frac{\beta_{RF}}{\beta_{FS}} $$

General form:

$$ \hat{\beta}_{IV} = (X'P_z X)^{-1}X'P_z Y $$

where $P_z$ is the projection matrix of Z:= $Z(Z'Z)^{-1}Z'$

More general estimation strategies:

  • 2 Stage Least Squares:

    • Use OLS to regress X on Z and get $\hat{X} = P_z X$
    • Use OLS to regress y on $\hat{X}$ to get $\hat{\beta}_{iv}$
  • Control Function approach:

    • Use OLS (or some other estimation strategy) to regress X on Z and get estimated residuals $\hat{v} = I - P_z X = M_z X$
    • Use OLS to regress y on X and $\hat{v}$ to get $\hat{\beta}_{iv}$

IV estimation with covariates

conditional independence assumption

$$ {Y_{1i}, Y_{0i}, D_{1i}, D_{0i}} \coprod Z_i | X_i $$

Angrist and Krueger - Compulsory Schooling / Quarter of Birth Instrument

Visual test of Instrument Relevance

Z = Quarter of Birth , Y = wages, X = education

In [15]:
pums.qob.means <- pums %>% group_by(yob, qob) %>% summarise_all(funs(mean))
#%%
# Add dates
pums.qob.means$yqob <- lubridate::ymd(paste0("19",
                                  pums.qob.means$yob,
                                  pums.qob.means$qob * 3),
                           truncated = 2)
In [16]:
# Function for plotting data
plot.qob <- function(ggplot.obj, ggtitle, ylab) {
  gg.colours <- c("firebrick", rep("black", 3), "white")
  ggplot.obj + geom_line()                                              +
               geom_point(aes(colour = factor(qob)),
                              size = 5)                                 +
               geom_text(aes(label = qob, colour = "white"),
                         size  = 3,
                         hjust = 0.5, vjust = 0.5,
                         show_guide = FALSE)                            +
               scale_colour_manual(values = gg.colours, guide = FALSE)  +
               ggtitle(ggtitle)                                         +
               xlab("Year of birth")                                    +
               ylab(ylab)
}
# Plot
p.educ     <- plot.qob(ggplot(pums.qob.means, aes(x = yqob, y = educ)),
                       "A. Average education by quarter of birth (first stage)",
                       "Years of education")
p.lwklywge <- plot.qob(ggplot(pums.qob.means, aes(x = yqob, y = lwklywge)),
                       "B. Average weekly wage by quarter of birth (reduced form)",
                       "Log weekly earnings")

p.ivgraph  <- grid.arrange(p.educ, p.lwklywge)
Warning message:
“`show_guide` has been deprecated. Please use `show.legend` instead.”Warning message:
“`show_guide` has been deprecated. Please use `show.legend` instead.”
In [19]:
### Replication (Table 4.1.1 in MHE)
pums        <- fread(paste0(data, 'asciiqob.txt'),
                     header           = FALSE,
                     stringsAsFactors = FALSE)
names(pums) <- c('lwklywge', 'educ', 'yob', 'qob', 'pob')

qobs      <- unique(pums$qob)
qobs.vars <- sapply(qobs, function(x) paste0('qob', x))
pums[, (qobs.vars) := lapply(qobs, function(x) 1*(qob == x))]
#%%
# Column 1: OLS
col1 <- felm(lwklywge ~ 1|0|(educ~educ)|0, pums)
# Column 2: OLS with YOB, POB dummies
col2 <- felm(lwklywge ~ 1| factor(yob) + factor(pob)|(educ~educ)|0, pums)
#%%
col3 <- felm(lwklywge ~ 1 | 0 | (educ~qob1) |0, data=pums)
# Column 3: 2SLS with instrument QOB = 1
#%%
# Column 4: 2SLS with YOB, POB dummies and instrument QOB = 1
col4 <- felm(lwklywge ~ 1 | 0 |(educ~qob1 + qob2 + qob3)|0 , data=pums)
#%%
# Column 5: 2SLS with YOB, POB dummies and instrument (QOB = 1)
col5 <- felm(lwklywge ~ 1 | factor(yob) + factor(pob) |
                        (educ ~ qob1)|0,data=pums)
# Column 6: 2SLS with YOB, POB dummies and full QOB dummies
col6 <- felm(lwklywge ~ 1 | factor(yob) + factor(pob) |
                        (educ ~ factor(qob))|0,data=pums)
# Column 7: 2SLS with YOB, POB dummies and full QOB dummies interacted with YOB
col7 <- felm(lwklywge ~ 1 | factor(yob) + factor(pob) |
                       (educ ~ factor(qob) * factor(yob)),
              data = pums)
Warning message in chol.default(mat, pivot = TRUE, tol = tol):
“the matrix is either rank-deficient or indefinite”Warning message in chol.default(mat, pivot = TRUE, tol = tol):
“the matrix is either rank-deficient or indefinite”Warning message in chol.default(mat, pivot = TRUE, tol = tol):
“the matrix is either rank-deficient or indefinite”
In [20]:
stargazer(col1,col2,col3,col4,col5,col6,col7,
    keep.stat="n",
    covariate.labels=c('Years of education'),
    add.lines = list(
        c("YOB fixed effects", '', 'X','','','X','X','X' ),
        c("POB fixed effects", '', 'X','','','X','X','X' ),
        c("Instruments",       '', '','','','','' ),
        c("QOB=1",             '', '','X','X','X','X' ),
        c("QOB=2",             '', '','','X','','X' ),
        c("QOB=3",             '', '','','X','','X' ),
        c("QOB X YOB dummies", '', '','','','','','X' )),
    omit= "Constant",
    type=exp_type)
=================================================================================
                                        Dependent variable:                      
                   --------------------------------------------------------------
                                              lwklywge                           
                     (1)      (2)      (3)      (4)      (5)      (6)      (7)   
---------------------------------------------------------------------------------
Years of education 0.071*** 0.067*** 0.100*** 0.100*** 0.100*** 0.110*** 0.087***
                   (0.0003) (0.0003) (0.024)  (0.020)  (0.026)  (0.020)  (0.016) 
                                                                                 
---------------------------------------------------------------------------------
YOB fixed effects              X                          X        X        X    
POB fixed effects              X                          X        X        X    
Instruments                                                                      
QOB=1                                   X        X        X        X             
QOB=2                                            X                 X             
QOB=3                                            X                 X             
QOB X YOB dummies                                                           X    
Observations       329,509  329,509  329,509  329,509  329,509  329,509  329,509 
=================================================================================
Note:                                                 *p<0.1; **p<0.05; ***p<0.01

The Wald Estimator

Structural equation:

$$ Y_i = \alpha + \rho S_i + \eta_i $$

$Cov(\eta_i, s_i) \neq 0$. Let $Z_i \in {0,1}$ s.t. $P(Z_i=1) = p$, $$ Cov(Y_i, Z_i) = \{ E[Y_i|Z_i = 1] - E[Y_i|Z_i = 0]\}p(1-p) $$

Then, $$ \rho = \frac{E[Y_i|Z_i = 1] - E[Y_i|Z_i = 0]}{E[s_i|Z_i = 1] - E[s_i|Z_i=0]} $$

In [23]:
pums$z <- (pums$qob == 3 | pums$qob == 4) * 1

# Compare means (and differences)
ttest.lwklywge <- t.test(lwklywge ~ z, pums)
ttest.educ     <- t.test(educ ~ z, pums)

# Compute Wald estimate
sur  <- systemfit(list(first  = educ ~ z,
                       second = lwklywge ~ z),
                  data   = pums,
                  method = "SUR")
wald <- deltaMethod(sur, "second_z / first_z")

wald.estimate <- (mean(pums$lwklywge[pums$z == 1]) - mean(pums$lwklywge[pums$z == 0])) /
                 (mean(pums$educ[pums$z == 1]) - mean(pums$educ[pums$z == 0]))
# wald.se       <- wald.estimate^2 * ()

# OLS estimate
ols <- lm(lwklywge ~ educ, pums)

# Construct table
lwklywge.row <- c(ttest.lwklywge$estimate[1],
                  ttest.lwklywge$estimate[2],
                  ttest.lwklywge$estimate[2] - ttest.lwklywge$estimate[1])
educ.row     <- c(ttest.educ$estimate[1],
                  ttest.educ$estimate[2],
                  ttest.educ$estimate[2] - ttest.educ$estimate[1])
wald.row.est <- c(NA, NA, wald$Estimate)
wald.row.se  <- c(NA, NA, wald$SE)

ols.row.est <- c(NA, NA, summary(ols)$coef['educ' , 'Estimate'])
ols.row.se  <- c(NA, NA, summary(ols)$coef['educ' , 'Std. Error'])

table           <- rbind(lwklywge.row, educ.row,
                         wald.row.est, wald.row.se,
                         ols.row.est, ols.row.se)
colnames(table) <- c("Born in the 1st or 2nd quarter of year",
                     "Born in the 3rd or 4th quarter of year",
                     "Difference")
rownames(table) <- c("ln(weekly wage)",
                     "Years of education",
                     "Wald estimate",
                     "Wald std error",
                     "OLS estimate",
                     "OLS std error")
In [24]:
table
Born in the 1st or 2nd quarter of yearBorn in the 3rd or 4th quarter of yearDifference
ln(weekly wage) 5.9 5.9 0.01198
Years of education12.7 12.8 0.10569
Wald estimate NA NA 0.11339
Wald std error NA NA 0.02153
OLS estimate NA NA 0.07085
OLS std error NA NA 0.00034

The Local Average Treatment Effect (LATE)

Types of validity in research design:

  • Internal Validity - whether a given design successfully uncovers causal effects for the population being studied
  • External Validity - predictive value of the study's finding in a different context

Setup for LATE THM

Populations in IV

  • Compliers : Subpopulation with $D_{1i} > D_{0i} ; D_{1i} = 1 \land D_{0i} = 0$
  • Defiers: Subpopulation with $D_{1i} = 0 \land D_{0i} = 1$ ; Violates monotonicity - should be $\emptyset$
  • Always takers: Subpopulation with $D_{1i} = D_{0i} = 1$
  • Never takers: Subpopulation with $D_{1i} = D_{0i} = 0$

Define 4 potential outcomes

$$ Y(D = 1, Z = 1); Y(D = 1, Z = 0) ; Y(D = 0, Z = 1); Y(D = 0, Z = 0) $$

LATE Theorem (Angrist, Imbens, Rubin 1996)

Assumptions

  • SUTVA

  • Ignorability of Instrument - Instrument is as good as randomly assigned - it is independent of the vector of potential outcomes and potential treatment assignments $$ [\{ Y_i(d,z); \forall d, z \}, D_{1i}, D_{0i}] \bot z_i $$

  • Exclusion Restriction: $Y(D, 0 ) = Y(D, 1) \; \forall D \in \{0, 1\}$ - no direct effect of $Z$ on $Y$ except through $D$.

$Y_i(d,z) =f(d)$,i.e. only a function of $d$ ; $Y_i(d,0) = Y_i(d,1) \equiv Y_{di}$ for $d \in {0,1}$

Combining these, we can write the ignorability condition as

$$ \left( Y _ { 0 } , Y _ { 1 } , D _ { 0 } , D _ { 1 } \right) \perp Z $$
  • Nonzero Average Effect (Relevance) - First Stage - $Cov(d,z) \neq 0$ ; $E[D_{1i}- D_{0i}] \neq 0$
  • Monotonicity Either $\pi_{1i} \geq 0 \; \forall \; i \lor \pi_{1i} \leq 0 \forall i$ (everyone affected by the instrument are affected in the same way - red flag for jackknife instruments) $\iff D_1 \geq D_0$

Sufficient for a causal interpretation of the reduced form

Given the above conditions, The wald estimand can be interpreted as the causal effect of the endogenous variable of interest on the outcome on those whose treatment status can be changed by the instrument

$$ \begin{aligned} \rho & = \frac{E[Y_i|Z_i = 1] - E[Y_i|Z_i = 0]}{E[s_i|Z_i = 1] - E[s_i|Z_i=0]} \\ & = E[Y_{1i} - Y_{0i} | D_{1i} > D_{0i}] \\ & = E[\rho_i | \pi_{1i} > 0] \end{aligned} $$

LATE is the effect of the treatment on the population of compliers. Not informative about never-takers and always-takers

Treatment Effects

  • Average Treatment Effect (ATE) = $E[Y_{1i}-Y_{0i}]$
  • Intent to Treat (ITT) = $E[Y_{i}|Z_i = 1] - E[Y_{i}|Z_i = 0]$ - difference in outcomes between those who were offered treatment and those who weren't
  • Treatment on Treated (TOT) = Weighted average of effect on always- takers and effect on compliers = $E[Y_{1i} - Y_{0i}|D_i=1]$ - mean effect for those who actually participated in the program
  • Local Average Treatment Effect (LATE) : $E[Y_{1i} - Y_{0i} | D_{1i} > D_{0i}]$ for compliers

Under randomisation and perfect compliance, ITT == ATE

In a randomised trial with one-sided non-compliance: Bloom Result

$$ ToT = E[Y_{1i} - Y_{0i}|D_i = 1] = \frac{E[Y_i|z_i = 1] - E[Y_i|Z_i=0]}{E[D_i|z_i =1]} = \frac{RF}{FS} = \frac{ITT}{Compliance} $$
  • When $D$ is randomised, $Z = D$ and every individual is a complier
  • One-sided noncompliance $D_0 = 0$ - those who are assigned to control do not get treatment

Standard errors

Manually running IV using 2 stages does not give you correct standard errors.

OLS residual variance is the variance of $\eta + \rho(s_i - \hat{s_i})$ while for proper 2SLS standard errors, we want the variance of $\eta_i$ only.

Multiple Instruments

2 mutually exclusive dummy instruments ($z_{1i}, z_{2i}$). Resulting 2SLS_estimate of $\rho$ is a weighted average of causal effects for instrument-specific compliant subpopulations

$$ \rho_j = \frac{Cov(Y_i, z_{ji})}{Cov(D_i, z_{ji})} , j = 1,2 $$

First stage fitted values are $\hat{D_i} = \pi_{11}z_{1i} + \pi_{12}z_{2i}$

$$\rho_{2sls} = \psi \rho_1 + (1-\psi) \rho_2 $$

where

$$ \psi = \frac{\pi_{11} Cov(D_1, Z_{1i})}{\pi_{11}Cov(D_1, Z_{1i})+ \pi_{21}Cov(D_1, Z_{2i})} $$

$\psi$ is a number between 0 and 1 that depends on the relative strength of each instrument in the first stage

2SLS vs LIML

Only relevant When the model is overidentified. 2SLS estimation is done by fitting the first stage and reduced form separately; at each step, $\beta$ and $\pi$ are estimated to minimise errors in that equation. On the other hand, limited information maximum-likelihood jointly minimises the sum of squared errors in both stages.

Overidentification

Basic idea: when model is overidentified, we can test exclusion restriction ($Cov(z, e)=0$) by estimating the model with the two instruments separately and testing whether $Cov(z_1, e_2)=0$ (where $e_2$ is the error term from the structural equation when $x$ is instrumented by $z_2$) and vice versa.

Steps:

  • estimate the model with all available instruments, keep residual $\hat{e}$
  • regress $\hat{e}$ on $z_1$ and $z_2$, store $R^2$.
  • Under the null that overid restrictions are satisfied, $nR^2 \sim \chi^2(q)$, where $q$ is the number of overidentifying restrictions (1 in this case)

Hausman Test

IV comes at a price of less precision in estimates, so better to use OLS_unless it can be shown that IV significantly changes results. Define:

$$ H = \frac{(\hat{\beta}_{2sls} - \hat{\beta}_{OLS})^2} {\hat{V}(\hat{\beta}_{2sls}) - \hat{V}(\hat{\beta}_{OLS})} $$

Under the null that OLS is consistent, $H \sim \chi^2_1$. This is equivalent to testing $\gamma = 0$ in the equation $y = \beta x + \gamma \hat{u} + \epsilon$, where $\hat{u}$ are the residuals from the first stage of IV (regression of x on z).

Bartik

In [36]:
set.seed(6152011)
# Number of cities
L <- 1000
# Number of industries
K <- 3
# National growth rates by  industry
g.raw <- runif(K)
g.k <- g.raw/sum(g.raw)
#%%
CreateCity <- function(g.k, eta.s){
    #  Give each city some industry shares
    K <- length(g.k)
    z.raw <- runif(K)
    z <- z.raw / sum(z.raw)
    # Give each city some idiosyncratic growth by industry
    g.kl.tilde <- g.raw/sum(g.raw)
    # Change in employement is inner product of the national
    # + idiosyncratic component with industry share
    x <- (g.kl.tilde + g.k) %*% z
    # Bartik instrument
    # (national growth & city industry share inner product)
    B <- g.k %*% z
    # Realized change in wage
    y <- (1/eta.s) * x + rnorm(1,0,1/10)
    list("y" = y, "B" = B, "x" = x)
}
# Create a city data frame
city.data <- rep(CreateCity(g.k, 1), L)
df <- data.frame(matrix(unlist(city.data), ncol = 3))
colnames(df) <- c("y", "B", "x")
# Estimates
m.ols <- felm(y ~ x | 0 | 0 | 0, data = df)
m.iv <- felm(y ~ 1 | 0 | (x ~ B) | 0, data = df)

stargazer(m.ols, m.iv, type = exp_type)
===========================================================
                                   Dependent variable:     
                               ----------------------------
                                            y              
                                    (1)            (2)     
-----------------------------------------------------------
x                                -0.500***                 
                                  (0.027)                  
                                                           
`x(fit)`                                        1.000***   
                                                 (0.110)   
                                                           
Constant                          1.000***       -0.0004   
                                  (0.020)        (0.077)   
                                                           
-----------------------------------------------------------
Observations                       1,000          1,000    
R2                                 0.250         -2.000    
Adjusted R2                        0.250         -2.000    
Residual Std. Error (df = 998)     0.180          0.350    
===========================================================
Note:                           *p<0.1; **p<0.05; ***p<0.01

Weak Instruments

Given first stage $s_i = \pi z_i + \epsilon_i$ and structural equation $y_i = \rho s_i + \eta_i$, define $\sigma_{\epsilon \eta} = Cov(\epsilon, \eta)$ and $\sigma_{\epsilon}^2 = Var(\epsilon)$. Let $F$ be the F statistic of the first-stage. Then, bias in IV term is:

$$ \rho_{2sls} - \rho = \frac{\sigma_{\epsilon,\eta}}{\sigma_{\epsilon}^2} \frac{1}{F+1} $$

So, as $F \rightarrow \infty$, bias $\rightarrow 0$.

Bound, Jaeger, and Baker (1995) IV may be biased / inconsistent if the instruments are only weakly correlate with the endogenous variables

$$ \rho = \frac{Cov(Y_i, Z_i)}{Cov(D_i,Z_i)} + \frac{Cov(\eta_i, Z_i)}{Cov(D_i, Z_i)} = \alpha_D + \frac{Cov(\eta_i, Z_i)}{Cov(D_i, Z_i)} $$

The second term gets inflated if the instrument is weak the moment there is slight failures of exogeneity.

$$ plim \hat{\beta}_{iv} = \beta + \frac{plim \Delta \bar{\epsilon}}{plim \Delta \bar{x}} $$$$ \frac{plim \hat{\beta}_{iv} - \beta}{plim \hat{\beta}_{ols} - \beta} = \frac{cov(\hat{x}, \epsilon) / cov(x,\epsilon)}{R^2_{x,z}} $$

Staiger, Stock (1997) : rule of thumb - first-stage F statistic $\geq$ 10 (for 1 endogenous variable)

Montiel-Olea, Pflueger (2013) - construct Effective F Statistic

$$ \hat{F}_{eff} = \frac{1}{S} \frac{x'ZZ'x}{tr(\hat{W}_2)} $$

Inference

Power is linear in compliance ratio.

$$ S E _ { \widetilde { L A T E } } \approx \frac { S E _ { \widehat {I T T } } } { \text { Compliance Ratio } } $$

With 10% compliance, LATE SE is 10x the ITT SE

Abadie's Kappa

Likelihood that Complier has a given value of characteristic X relative to the rest of the population

$$ \frac{E[D | Z=1, X=x]-E[D | Z=0, X=x]}{E[D | Z=1]-E[D | Z=0]} $$

Abadie's $\kappa$

Let $g$ be any measurable function whose conditional expectation is defined

$$ \kappa=1-\frac{D \cdot(1-Z)}{P(Z=0 | X)}-\frac{(1-D) \cdot Z}{P(Z=1 | X)} $$

Then, $$ E[g(Y, D, X) | D_{1}>D_{0}]=\frac{E[\kappa \cdot g(Y, D, X)]}{E[\kappa]} $$

Used to calculate the average of $X$ in complier popn:

$$ E[X | D_{1}>D_{0}]=\frac{E[\kappa \cdot X]}{E[\kappa]} $$
  • Estimate $\kappa$
  • Average $X$ with $\kappa$ weights

Replication of Angrist 1991

In [36]:
draft = import(paste0(data, 'draft.txt'))
colnames(draft) = c('lwage', 'age', 'elig', 'vet')
draft %>% group_by(vet) %>% summarise(lw = mean(lwage)) %>% .$lw -> w
wdiff = w[2] - w[1]
m1 = lm(lwage ~ vet, draft)
In [37]:
stargazer(m1, m1,
  se = list(NULL, robust_se(m1)), 
  add.lines = list(c('SE', 'HC0', 'HC2')),
  type = exp_type, header = F, df = F
)
================================================
                        Dependent variable:     
                    ----------------------------
                               lwage            
                         (1)            (2)     
------------------------------------------------
vet                     -0.021        -0.021    
                       (0.017)        (0.017)   
                                                
Constant               5.400***      5.400***   
                       (0.008)        (0.008)   
                                                
------------------------------------------------
SE                       HC0            HC2     
Observations            10,100        10,100    
R2                      0.0001        0.0001    
Adjusted R2             0.0001        0.0001    
Residual Std. Error     0.700          0.700    
F Statistic             1.500          1.500    
================================================
Note:                *p<0.1; **p<0.05; ***p<0.01
In [38]:
freq_table(draft, 'elig') %>% kable(.)
freq_table(draft, 'vet') %>% kable(.)

| elig|    n| prop|
|----:|----:|----:|
|    0| 7320| 0.72|
|    1| 2780| 0.28|

| vet|    n| prop|
|---:|----:|----:|
|   0| 7863| 0.78|
|   1| 2237| 0.22|
In [39]:
draft %>% group_by(elig) %>% summarise(mu = mean(vet)) %>% .$mu -> mu
always_takers = nrow(filter(draft, elig != 1 & vet == 1)) / nrow(filter(draft, elig != 1))
never_takers = nrow(filter(draft,  elig == 1 & vet == 0)) / nrow(filter(draft, elig == 1))
compliers = mu[2] - mu[1]

shares = data.frame(subpop = c('always takers', 'never takers', 'compliers'),
           share  = c(always_takers, never_takers, compliers))
shares %>% kable()

|subpop        | share|
|:-------------|-----:|
|always takers |  0.19|
|never takers  |  0.69|
|compliers     |  0.12|
In [41]:
ivmod = felm(lwage ~ 1 | 0 | (vet ~ elig) | 0, draft)
stargazer(ivmod, header = F, model.names = F, type = exp_type, df = F,
  column.labels = c('2SLS Estimates'),
  covariate.labels = c('Veteran Status'), keep.stat = c('n'))
==========================================
                   Dependent variable:    
               ---------------------------
                          lwage           
                     2SLS Estimates       
------------------------------------------
Veteran Status           -0.230*          
                         (0.130)          
                                          
Constant                5.500***          
                         (0.029)          
                                          
------------------------------------------
Observations             10,100           
==========================================
Note:          *p<0.1; **p<0.05; ***p<0.01
In [42]:
draft %>% filter(age >= 52) -> a52
fml = as.formula('lwage ~ 1 | 0 | (vet ~ elig) | 0')
fs_all = felm(fml, draft)$coefficients[2]
fs_a52 = felm(fml, a52)$coefficients[2]

(lik = fs_a52/fs_all)
1.62564729249267
In [43]:
# logit for selection
selmod = glm(vet ~ elig, family=binomial(link=logit), draft)
draft$dhat = selmod$fitted.values

draft %<>% mutate(
  κ = 1 - ((vet * (1-elig))/(1-dhat)) - ((1-vet)*elig/dhat))
avg_complier_age = weighted.mean(draft$age, draft$κ)
avg_complier_age
51.3718645391492

Regression Discontinuity

Estimand (Hahn et al 2001)

$$ \alpha_{SRDD} = E[Y_{1}-Y_{0} | X=c] =\lim_{x \downarrow c} E[Y_{1} | X=x]- \lim_{x \uparrow c} E[Y_{0} | X=x] $$

Sharp RD

Treatment changes discontinuously at some particular value $x_0$ of x

$$ D_i = \begin{cases} 1 \; \text{ if} \; x_i \geq x_0, \\ 0 \; \text{ if} \; x_i < x_0, \end{cases} $$

This value $x$ is likely to be arbitrary in the neighbourhood of $x_0$

RD_takes advantage of a discrete change in treatment in response to a small change in the running variable x. Causal inference relies on the (local) arbitrariness of whether unit $i$ has $x_i < x_0 | x_i > x_0$

Formally,

\begin{align*} E[Y_{0i}|x_i] & = \alpha + \beta x_i \\ Y_{1i} = Y_{0i} + \rho \end{align*}

which leads to the regression:

$$ Y_i = \alpha + \beta x_i + \tau D_i + \eta_i $$

To permit nonlinearity, we estimate:

$$ Y_i = f(x_i) + \tau D_i + \eta_i $$

General Setup with normalised running/forcing variable

Define $c := x_0$. The treatment effect can be uncovered by running a pooled regression

$$ Y = \alpha_l +_\tau D + f(X - c) + \epsilon $$

It is recommended to let the regression function differ on the two sides of the cutoff by including interaction terms between D and X.

Then, the pooled regression is

$$ Y = \alpha_l + \tau D + \beta_l (X - c) + (\beta_r - \beta_l) D (X-c) + \epsilon $$

Higher order polynomials can be included to better approximate the regression functions (per Stone Weirstrass thm).

In [27]:
# Generate series
nobs      = 100
x         <- runif(nobs)
y.linear  <- x + (x > 0.5) * 0.25 + rnorm(n = nobs, mean = 0, sd = 0.1)
y.nonlin  <- 0.5 * sin(6 * (x - 0.5)) + 0.5 + (x > 0.5) * 0.25 + rnorm(n = nobs, mean = 0, sd = 0.1)
y.mistake <- 1 / (1 + exp(-25 * (x - 0.5))) + rnorm(n = nobs, mean = 0, sd = 0.1)
rd.series <- data.frame(x, y.linear, y.nonlin, y.mistake)

# Make graph with ggplot2
g.data   <- ggplot(rd.series, aes(x = x, group = x > 0.5))

p.linear <- g.data + geom_point(aes(y = y.linear))  +
                     stat_smooth(aes(y = y.linear),
                                 method = "lm",
                                 se     = FALSE)    +
                     geom_vline(xintercept = 0.5)   +
                     ylab("Outcome")                +
                     ggtitle(bquote('A. Linear E[' * Y["0i"] * '|' * X[i] * ']'))

p.nonlin <- g.data + geom_point(aes(y = y.nonlin))  +
                     stat_smooth(aes(y = y.nonlin),
                                 method  = "lm",
                                 formula = y ~ poly(x, 2),
                                 se      = FALSE)   +
                     geom_vline(xintercept = 0.5)   +
                     ylab("Outcome")                +
                     ggtitle(bquote('B. Nonlinear E[' * Y["0i"] * '|' * X[i] * ']'))

f.mistake <- function(x) {1 / (1 + exp(-25 * (x - 0.5)))}
p.mistake <- g.data + geom_point(aes(y = y.mistake))     +
                      stat_smooth(aes(y = y.mistake),
                                  method = "lm",
                                  se     = FALSE)        +
                      stat_function(fun      = f.mistake,
                                    linetype = "dashed") +
                      geom_vline(xintercept = 0.5)       +
                      ylab("Outcome")                    +
                      ggtitle('C. Nonlinearity mistaken for discontinuity')

grid.arrange(p.linear, p.nonlin, p.mistake, ncol = 1)

Lee (2008)

Regression Formulation

$$ Y_{i t+1} = \alpha + \beta_0 \tilde{X}_{it} + \tau D_{it} + \beta_1 \tilde{X}_{it} D_{it} + \epsilon_i $$
In [5]:
lee = import(paste0(data, 'Lee.dta'))

lee %<>% filter(party == 100) # subset to dems
lee %<>%
  mutate(
    win = 1 * (origvote == highestvote),
    vote_share = origvote / totvote,
    margin = ifelse(win == 1, # if win
                    (origvote - sechighestvote)/totvote, # margin if win
                    (origvote - highestvote)/totvote) # margin if loss
    ) %>%
  arrange(distid, yearel) %>%
  group_by(distid) %>%
  mutate(last_margin = dplyr::lag(margin, 1), # margin in last election
         incumbent   = 1 * (dplyr::lag(win, 1) == 1)
  ) %>%
  ungroup()

Testing Balance across discontinuity

In [31]:
ggplot(lee, aes(x = last_margin)) + geom_histogram(bins = 50) + geom_vline(xintercept = 0)

McCrary (2008)

In [32]:
DCdensity(lee$last_margin, verbose = T)
Assuming cutpoint of zero.
Using calculated bin size:  0.008 
Using calculated bandwidth:  0.200 
Log difference in heights is  0.001  with SE  0.076 
  this gives a z-stat of  0.010 
  and a p value of  0.992 
0.991765996299681

Treatment Assignment

In [6]:
ggplot(lee, aes(x = last_margin, y = incumbent, colour = as.factor(incumbent))) +
  geom_point(alpha = 0.5, size = 0.3) +
  labs(title = 'Incumbency as function of past margin',
      y = bquote(incumbent[t+1]), x = bquote(margin[t]))
Warning message:
“Removed 1664 rows containing missing values (geom_point).”
In [8]:
lee %>% filter(abs(last_margin) <= 0.25) -> cut_25
m1 = lm(vote_share ~ incumbent*last_margin, cut_25)
yhat = predict(m1, cut_25)
stargazer(m1, header = F, type = exp_type, se = list(robust_se(m1)),
  model.names = F,
  keep = c('incumbent$'),  dep.var.caption = 'Vote Share',
  dep.var.labels.include = FALSE,  multicolumn = F,
  omit.stat = c('f', 'adj.rsq', 'ser'))
========================================
                     Vote Share         
             ---------------------------
----------------------------------------
incumbent             0.086***          
                       (0.006)          
                                        
----------------------------------------
Observations            4,133           
R2                      0.410           
========================================
Note:        *p<0.1; **p<0.05; ***p<0.01
In [9]:
df = data.frame(yhat, vote_share = cut_25$vote_share,
  margin_last = cut_25$last_margin, inc = cut_25$incumbent) %>% na.omit()

ggplot(df, aes(x = margin_last)) +
  geom_vline(xintercept = 0, linetype = 'dashed') +
  geom_point(aes(y = vote_share, colour = as.factor(inc)),
    alpha = 0.4, size = 0.4) +
  geom_line(aes(y = yhat, colour = as.factor(inc))) +
  theme(legend.position = "none") +
  labs(title = 'RD estimates of the Incumbency Advantage',
    y = bquote(VoteShare[t+1]), x = bquote(margin[t]))