options(digits=2)
rm(list=ls())

library(LalRUtils)
load_or_install(c('tidyverse','data.table', 'stargazer','lfe','lmtest', 'panelView', 'gsynth',
                  'magrittr', 'gridExtra', 'rio', 'plm', 'lme4', 'Rcpp', 'foreach', 'doParallel', 
                  'GGally', 'abind'))
theme_set(theme_bw())
exp_type = 'text'

root = '/home/alal/Dropbox/0_GradSchool/0_classes/450b/applied-econometrics-notes/'
data = paste0(root, 'data/')

Basic Differences-in-Differences¶

DiD¶

Notation:

$Y_{1i}(t)$ potential outcome unit i attains in period t when treated between t and t − 1
$Y_{0i}(t)$ potential outcome unit i attains in period t with control between t and t − 1

Parallel Trends Assumption¶

$$ E \left[ Y _ { 0 } ( 1 ) - Y _ { 0 } ( 0 ) | D = 1 \right] = E \left[ Y _ { 0 } ( 1 ) - Y _ { 0 } ( 0 ) | D = 0 \right] $$

Given this, we can write the DiD Estimand:

$$ E \left[ Y _ { 1 } ( 1 ) - Y _ { 0 } ( 1 ) | D = 1 \right] = \{ E [ Y ( 1 ) | D = 1 ] - E [ Y ( 1 ) | D = 0 ] \} - \{ E [ Y ( 0 ) | D = 1 ] - E [ Y ( 0 ) | D = 0 ] \} $$

Rearranging the parallel trends assumption yields:

$$ E \left[ Y _ { 0 } ( 1 ) | D = 0 \right] = E \left[ Y _ { 0 } ( 1 ) | D = 1 \right] - E \left[ Y _ { 0 } ( 0 ) | D = 1 \right] + E \left[ Y _ { 0 } ( 0 ) | D = 0 \right] $$

Identification Proof¶

$$ \begin{aligned} E \left[ Y _ { 1 } ( 1 ) - Y _ { 0 } ( 1 ) | D = 1 \right] & =E \left[ Y _ { 0 } ( 1 ) | D = 1 \right] - E \left[ Y _ { 0 } ( 1 ) | D = 1 \right] - E \left[ Y _ { 0 } ( 0 ) | D = 1 \right] + E \left[ Y _ { 0 } ( 0 ) | D = 0 \right] \\ & = \left\{ E \left[ Y _ { 1 } ( 1 ) | D = 1 \right] - E \left[ Y _ { 0 } ( 1 ) | D = 0 \right] \right\} - \left\{ E \left[ Y _ { 0 } ( 0 ) | D = 1 \right] - E \left[ Y _ { 0 } ( 0 ) | D = 0 \right] \right\} \\ & = \left\{ E \left[ Y _ { 1 } ( 1 ) | D = 1 \right] - \left( E \left[ Y _ { 0 } ( 1 ) | D = 1 \right] - E \left[ Y _ { 0 } ( 0 ) | D = 1 \right] + E \left[ Y _ { 0 } ( 0 ) | D = 0 \right] \right) \right\} \\ & = \left\{ E \left[ Y _ { 1 } ( 1 ) | D = 1 \right] - \left( E \left[ Y _ { 0 } ( 1 ) | D = 1 \right] - E \left[ Y _ { 0 } ( 0 ) | D = 1 \right] + E \left[ Y _ { 0 } ( 0 ) | D = 0 \right] \right) \right\} \\ & - \left\{ E \left[ Y _ { 0 } ( 0 ) | D = 1 \right] - E \left[ Y _ { 0 } ( 0 ) | D = 0 \right] \right\} \\ & = E \left[ Y _ { 1 } ( 1 ) - Y _ { 0 } ( 1 ) | D = 1 \right] + \left\{ E \left[ Y _ { 0 } ( 0 ) | D = 1 \right] - E \left[ Y _ { 0 } ( 0 ) | D = 0 \right] \right\} \\ - & \left\{ E \left[ Y _ { 0 } ( 0 ) | D = 1 \right] - E \left[ Y _ { 0 } ( 0 ) | D = 0 \right] \right\} \\ & = E \left[ Y _ { 1 } ( 1 ) - Y _ { 0 } ( 1 ) | D = 1 \right] \end{aligned} $$

Regression Estimator¶

$$ Y_{ist} = \alpha + \gamma Treat_s + \lambda Post_t + \tau (Treat_s \times Post_t) + \epsilon_{ist} $$

Card and Krueger (1994) replication¶

# Import data
njmin        <- read.table(paste0(data, 'public.dat'),
                          header           = FALSE,
                          stringsAsFactors = FALSE,
                          na.strings       = c("", ".", "NA"))
#%%
names(njmin) <- c('SHEET', 'CHAIN', 'CO_OWNED', 'STATE', 'SOUTHJ', 'CENTRALJ',
                  'NORTHJ', 'PA1', 'PA2', 'SHORE', 'NCALLS', 'EMPFT', 'EMPPT',
                  'NMGRS', 'WAGE_ST', 'INCTIME', 'FIRSTINC', 'BONUS', 'PCTAFF',
                  'MEALS', 'OPEN', 'HRSOPEN', 'PSODA', 'PFRY', 'PENTREE', 'NREGS',
                  'NREGS11', 'TYPE2', 'STATUS2', 'DATE2', 'NCALLS2', 'EMPFT2',
                  'EMPPT2', 'NMGRS2', 'WAGE_ST2', 'INCTIME2', 'FIRSTIN2', 'SPECIAL2',
                  'MEALS2', 'OPEN2R', 'HRSOPEN2', 'PSODA2', 'PFRY2', 'PENTREE2',
                  'NREGS2', 'NREGS112')

njmin %>% head()

#%%
# Calculate FTE employement
njmin$FTE  <- njmin$EMPFT  + 0.5 * njmin$EMPPT  + njmin$NMGRS
njmin$FTE2 <- njmin$EMPFT2 + 0.5 * njmin$EMPPT2 + njmin$NMGRS2
#%%
# Create function for calculating standard errors of mean
semean <- function(x, na.rm = FALSE) {
    n <- ifelse(na.rm, sum(!is.na(x)), length(x))
    sqrt(var(x, na.rm = na.rm) / n)
}
#%%

# Calucate means
summary.means <- njmin[ , c("FTE", "FTE2", "STATE")]         %>%
                 group_by(STATE)                             %>%
                 summarise_all(funs(mean(., na.rm = TRUE)))
summary.means <- as.data.frame(t(summary.means[ , -1]))
#%%
colnames(summary.means)  <- c("PA", "NJ")
summary.means$dSTATE     <- summary.means$NJ - summary.means$PA
summary.means            <- rbind(summary.means,
                                  summary.means[2, ] - summary.means[1, ])
row.names(summary.means) <- c("FTE employment before, all available observations",
                              "FTE employment after, all available observations",
                              "Change in mean FTE employment")
#%%

# Calculate
summary.semeans <- njmin[ , c("FTE", "FTE2", "STATE")]         %>%
                 group_by(STATE)                               %>%
                 summarise_all(funs(semean(., na.rm = TRUE)))
summary.semeans <- as.data.frame(t(summary.semeans[ , -1]))
#%%
colnames(summary.semeans)  <- c("PA", "NJ")
summary.semeans$dSTATE     <- sqrt(summary.semeans$NJ + summary.semeans$PA) # / length
njmin         <- njmin[ , c("FTE", "FTE2", "STATE")]
njmin         <- melt(njmin,
                      id.vars       = c("STATE"),
                      variable.name = "Period",
                      value.name    = "FTE")

summary.means <- njmin                   %>%
                mutate(STATE = dplyr::recode(STATE, `0` = 'PA (control)',
                `1` = 'NJ (treatment)')) %>%
                group_by(STATE, Period) %>%
                summarise_all(
                     funs(
                         mean(., na.rm = TRUE),
                         semean(., na.rm = TRUE)
                     ))

difftable = data.table::dcast(setDT(summary.means),
            STATE ~ Period, value.var = c('mean','semean'))
difftable[, diff:=mean_FTE2 - mean_FTE]
knitr::kable(difftable)


|STATE          | mean_FTE| mean_FTE2| semean_FTE| semean_FTE2|  diff|
|:--------------|--------:|---------:|----------:|-----------:|-----:|
|NJ (treatment) |       20|        21|       0.51|        0.52|  0.59|
|PA (control)   |       23|        21|       1.35|        0.94| -2.17|

$$ \begin{aligned} \delta = & {E[Y_{ist} | s = NJ, t = Nov] - E[Y_{ist}| s = NJ, t = Feb]} - \\ & {E[Y_{ist} | s = PA, t = Nov] - E[Y_{ist}| s = PA, t = Feb]} \end{aligned} $$

difftable[1,'diff'] - difftable[2,'diff']

emps <- summary.means  %>%
    dplyr::select(state = STATE, period = Period, employment = mean)

ggplot(data=emps,
        aes(x=period, y=employment, group=state, colour=state)) +
    geom_point() +
    geom_line()

Panel Data¶

General Error Components Model:¶

Decompose error into terms:

$$ e_i = \mathbf{1}_i u_i + \epsilon_i $$

Observation level:

$$ y_{it} = x_{it}'\beta + \theta_i + \epsilon_{it} $$

Individual Level:

$$ y_i = \mathbf{X_i} \beta + \mathbf{1}_i \theta_i + \epsilon_i $$

One-way fixed effects and Random effects both use this form, although they make different assumptions about $\theta_i$

Fixed Effects¶

Setup¶

Define an individual fixed effect for individual $i$

$$ A_i = \begin{cases} 1 \text{ if the observation involves unit i} \\ 0 \text{ otherwise} \end{cases} $$

and define the same for each time period for panel data.

If $D_{it}$ is as good as randomly assigned conditional on $A_i$:

$$ E[Y_{0it}|A_i, X_{it}, t, D_{it}] = E[Y_{0it}|A_i, X_{it}, t] $$

Then, assuming $A_i$ enter linearly,

$$ E[Y_{0it}|A_i, X_{it}, t, D_{it}] = \alpha + \lambda_t + A_i'\gamma + X_{it}'\beta $$

Assuming the causal effect of the treatment is additive and constant,

$$ E[Y_{1it}|A_i, X_{it}, t] = E[Y_{0it}|A_i, X_{it}, t] + \rho $$

where $\rho$ is the causal effect of interest.

Restrictions

Linear
Additive functional form
Variation in $D_{it}$, over time, for $i$, must be as good as random

Then, we can write:

$$ \begin{aligned} Y_{it} & = \alpha_i +\lambda_t +_\rho D_{it} + X_{it}' \beta + \epsilon_{it} \\ \epsilon_{it} & = Y_{0it} - E[Y_{0it}|A_i, X_{it}, t] \\ \alpha_i & = \alpha + A_i \gamma \end{aligned} $$

$\alpha_i$ captures all observable and unobservable time- invariant for individual i
$\lambda_t$ captures all observable and unobservable time- varying characteristics that are constant across the population

Include individual-specific time trends for more flexible estimation

Strict Exogeneity:

$E(\epsilon_{it}|x_i)= E(\epsilon_{it}|x_{i1}, \cdots x_{iT})=0$ (i.e. errors are uncorrelated with lags and leads of x)

Within Estimator¶

Calculate individual averages by averaging the FE equation

$$ \bar{Y_i} = \alpha_i + \bar{\lambda} + \rho \bar{D_i} + \bar{X_i}'\beta + \epsilon_i $$

then subtract from the FE equation

$$ Y_{it} - \bar{Y_i} = \lambda_{t} - \bar{\lambda} + \rho (D_{it} - \bar{D_i}) + (X_{it} - \bar{X_i}')\beta + (\epsilon_{it} - \epsilon_i) $$

yields the same $\rho$

Alternatively, demeaning can be done by premultiplying by the individual specific demeaning operator

$$ \mathbf{M}_i = \mathbf{I}_i - \mathbf{1}_i (\mathbf{1}_i ' \mathbf{1}_i)^{-1} \mathbf{1}_i' $$

Demeaned regressors:

$\dot{y}_i = M_i y_i$
$\dot{X}_i = M_i X_i$

First Differencing¶

Define $\Delta Y_{i,t} = Y_{i,t} - Y_{i,t-1}$ and the corresponding first-differences for $D$ and $X$

The transformation is

$$ \Delta y_{it} = y_{it} - y_{it-1} $$

At the individual level, this is written as:

$$ \Delta y_i = F_i y_i $$$$ \Delta X_i = F_i X_i $$

where $\mathbf{F}_i$ is the $(T_i - 1) \times T_i$ matrix differencing operator

$$ F_i = \begin{bmatrix} -1 & 1 & 0 & \dots & 0 & 0 \\ 0 & -1 & 1 & \dots & 0 & 0 \\ \vdots & & \ddots & & \vdots & \vdots \\ 0 & 0 & 0 & \dots & -1 & 1 \\ \end{bmatrix} $$

Then, the first-differences estimating equation is:

$$ \Delta Y_{it} = \Delta \lambda_{t} + \rho \Delta D_{it} + \Delta X_{it}' \beta + \Delta \epsilon_{it} $$

Numerically identical from the FD/Within estimator for $T=2$, not otherwise. Both consistent, but FE is efficient.

Random Effects¶

Assume $\theta_i \coprod X_i$ - strong assumption

In other words, entire error term $e_{it} = \nu_{it} + \theta_{i}$ is independent of $X$.

When there is autocorrelation in time series (i.e. $\epsilon_t$ s are correlated over time ), GLS estimates can be obtained by estimating OLS on quasi-differenced data. This allows us to estimate the effects of time-invariant characteristics (assuming the independence condition is met).

Equi-correlation model:

$$ \begin{aligned} E (e_{it} | \mathbf{X}_i) & = 0 ) \\ E (e_{it}^2 | \mathbf{X}_i) & = \sigma^2) \\ E (e_{ij} \; e_{it} | \mathbf{X}_i) & = \rho \sigma^2 \\ \end{aligned} $$

This implies the following:

$\sigma^2_u = \rho \sigma^2$
$\sigma^2_\epsilon = (1-\rho) \sigma^2$

Implementation

$$ y_{it} - \lambda \bar{y_{i}} = (x_{it} - \lambda \bar{x_i}) \beta + (1-\lambda)\theta_i + \nu_{it} - \lambda \bar{\nu}_i $$

where $$\lambda = 1 - \left[ \frac{\sigma_\nu^2} {\sigma_\nu^2 + T \sigma_\theta^2} \right]^{\frac{1}{2}} $$

If the error component $\theta$ is correlated with $x$, RE estimates are not consistent. Perform Hausman test for random vs fixed effects (where under the null, $Cov(\theta_i, x_{it}) = 0$)

When the idiosyncratic error variance $\hat{\sigma}^2_\nu$ is large relative to $T_i \hat{\sigma}^2_\theta$, $\lambda \to 0$ and $\hat{\beta}_{RE} \approx \hat{\beta}_{pool}$. In words, the individual effect is relatively small, so Pooled OLS is suitable.
When the idiosyncratic error variance $\hat{\sigma}^2_\nu$ is small relative to $T_i \hat{\sigma}^2_\theta$, $\lambda \to 1$ and $\hat{\beta}_{RE} \approx \hat{\beta}_{FE}$. Individual effects are relatively large, so FE is suitable.

Levitt (1996) Replication¶

freak = import(paste0(data, 'prison.dta')) 
freak %>% head()

sumvars = c(
  'pris', 'criv', 'crip', 'polpc', 'incpc', 'unem',
  'final1',
  'black', 'metro', 'ag0_14', 'ag15_17', 'ag18_24', 'ag25_34'
)

sumlabs = c(
  'Prison Population', 'Violent Crime', 'Property Crime',
  'Police Employees', 'Per Capita Income', 'Unemployment Rate',
  'Final Decision (Litigation)',
  'Share Black', 'Metro Areas', 'Age 0-14', 'Age 15-17',
  'Age 18-24', 'Age 25-34'
)

stargazer(freak[, sumvars], type = exp_type, header=F,
  covariate.labels=sumlabs, digits = 2,
  title = 'Summary Statistics: Levitt 1996',
  notes = c('Per 100,000 residents where Applicable',
    paste0(nrow(freak), ' observations'),
    'Levitt (1996) has 1173 observations'),
  summary.stat = c('mean', 'sd', 'min', 'max')
)

Summary Statistics: Levitt 1996
=================================================================
Statistic                     Mean    St. Dev.   Min       Max   
-----------------------------------------------------------------
Prison Population            200.00    133.00   21.00   1,287.00 
Violent Crime                508.00    344.00   48.00   2,922.00 
Property Crime              4,644.00  1,212.00 2,096.00 8,839.00 
Police Employees             268.00    87.00    161.00   908.00  
Per Capita Income           14,782.00 4,087.00 6,883.00 29,859.00
Unemployment Rate             0.07      0.02     0.02     0.18   
Final Decision (Litigation)   0.01      0.09      0         1    
Share Black                   0.11      0.12    0.002     0.70   
Metro Areas                   0.64      0.23     0.15     1.00   
Age 0-14                      0.22      0.02     0.16     0.33   
Age 15-17                     0.05      0.01     0.03     0.06   
Age 18-24                     0.12      0.01     0.08     0.15   
Age 25-34                     0.17      0.01     0.13     0.24   
-----------------------------------------------------------------
Per 100,000 residents where Applicable                           
714 observations                                                 
Levitt (1996) has 1173 observations

Pooled OLS, Random Effects, and Fixed Effects

freak %<>% group_by(state) %>%
  mutate(lag_pris = dplyr::lag(pris, order_by = year)) %>% ungroup()

outcome = 'log(criv)'
covars = c('lag_pris', 'log(incpc)', 'unem', 'log(polpc)', 'black', 'metro',
    'ag0_14', 'ag15_17', 'ag18_24', 'ag25_34')

#formulas for felm / plm
base = formula_stitcher(outcome, covars)
clust = formula_lfe(outcome, covars, C = ('state'))


m1 = robustify(felm(base, freak)) # pooled OLS
m2 = felm(clust, freak) # pooled OLS with clustered standard errors
# random effects
m3 <- plm(base, data=freak, index = c("state", "year"), model="random")
# Cluster by state
m3_se = coeftest(m3, vcov=vcovHC(m3,type="HC2",cluster="group"))[, 2]
# fixed effects
femod = formula_lfe(outcome, covars, D = c('state'), C = ('state'))
m4 = felm(femod, freak)
# export
sumlabs2 = c( 'Lagged Prison Population', 'Log Per Capita Income',
  'Unemployment Rate', 'Log Police', 'Share Black', 'Metro Areas',
  'Age 0-14', 'Age 15-17', 'Age 18-24', 'Age 25-34' )

stargazer(m1, m2, m3, m4, type = exp_type, header=F,
  title = "Levitt 1996 Replication",
  column.separate = c(2, 1, 1),
  column.labels = c("Pooled OLS", "Random Effects", "Fixed Effects"),
  se        = list(NULL, NULL, m3_se, NULL),
  dep.var.caption  = "Outcome: Log Crime", model.names = F,
  dep.var.labels.include = FALSE,  multicolumn = F,
  omit.stat = c('f', 'adj.rsq', 'ser'),
  covariate.labels=sumlabs2, digits = 2, df=F,
  notes = c('SE in Col 1: Robust (HC2)',
  'SE in Columns 2-4: Cluster Robust by State')
)

Levitt 1996 Replication
========================================================================
                                       Outcome: Log Crime               
                         -----------------------------------------------
                             Pooled OLS     Random Effects Fixed Effects
                            (1)      (2)         (3)            (4)     
------------------------------------------------------------------------
Lagged Prison Population 0.002***  0.002***     0.0002        0.0002    
                         (0.0003)  (0.001)     (0.0002)      (0.0003)   
                                                                        
Log Per Capita Income      -0.18    -0.18      0.70***        0.79***   
                          (0.13)    (0.28)      (0.18)        (0.19)    
                                                                        
Unemployment Rate         3.20***   3.20**      -0.50          -0.70    
                          (0.73)    (1.60)      (0.55)        (0.58)    
                                                                        
Log Police                0.54***   0.54**     0.42***        0.41***   
                          (0.09)    (0.25)      (0.13)        (0.15)    
                                                                        
Share Black               0.79***   0.79**     2.00***         1.90     
                          (0.20)    (0.34)      (0.35)        (4.00)    
                                                                        
Metro Areas               1.50***  1.50***     1.40***         1.70     
                          (0.09)    (0.26)      (0.29)        (1.10)    
                                                                        
Age 0-14                   0.74      0.74      6.50***        7.80***   
                          (0.90)    (2.80)      (2.10)        (2.30)    
                                                                        
Age 15-17                25.00***  25.00**      8.80*          9.20*    
                          (5.60)   (12.00)      (4.80)        (5.30)    
                                                                        
Age 18-24                -12.00*** -12.00**     4.50**        5.70**    
                          (2.40)    (4.80)      (2.30)        (2.90)    
                                                                        
Age 25-34                  2.40*     2.40        0.39          0.67     
                          (1.30)    (2.80)      (1.50)        (1.70)    
                                                                        
Constant                   2.80*     2.80      -6.60***                 
                          (1.50)    (3.40)      (2.50)                  
                                                                        
------------------------------------------------------------------------
Observations                663      663         663            663     
R2                         0.74      0.74        0.49          0.97     
========================================================================
Note:                                        *p<0.1; **p<0.05; ***p<0.01
                                               SE in Col 1: Robust (HC2)
                              SE in Columns 2-4: Cluster Robust by State

Hainmueller - Hangartner (2015)¶

swiss = import(paste0(data, 'swissnat_PS6.Rdata'))
# time trends
swiss$t = swiss$year - min(swiss$year)
swiss$t2 = swiss$t^2
swiss$year2 = factor(swiss$year)
swiss$ort_name= factor(swiss$ort_name)

# for formulae
outcome = 'nat_rate_ord'
treat   = 'DirDem'
muni    = 'ort_name'
time    = 'year2'

swiss <- pdata.frame(swiss, index = c(muni, time))
(fm1 = formula_lfe(outcome, treat, C = muni)) # pooled OLS
(fm2 = formula_lfe(outcome, treat, D = muni, C = muni)) # muni fixed effects
(fm3 = formula_lfe(outcome, c(treat, 't'),  # muni FE + common time trend
  D = muni, C = muni))
(fm4 = formula_lfe(outcome, treat,  # muni FE + year FE
  D = c(muni, time), C = muni))
(fm6 = formula_lfe(outcome, treat,  # muni FE + year FE + muni time trends
  D = c(muni, time, 'ort_name:t', 'ort_name:t2'), C = muni))

# fit models
m1 = felm(fm1, swiss)
m2 = felm(fm2, swiss)
m3 = felm(fm3, swiss)
m4 = felm(fm4, swiss)
m5 = felm(fm4, swiss %>% filter(sprachreg == 'G'))

nat_rate_ord ~ DirDem | 0 | 0 | ort_name
<environment: 0x563890c67778>

nat_rate_ord ~ DirDem | ort_name | 0 | ort_name
<environment: 0x563890663638>

nat_rate_ord ~ DirDem + t | ort_name | 0 | ort_name
<environment: 0x56389020a0d0>

nat_rate_ord ~ DirDem | ort_name + year2 | 0 | ort_name
<environment: 0x56389005ae48>

nat_rate_ord ~ DirDem | ort_name + year2 + ort_name:t + ort_name:t2 | 
    0 | ort_name
<environment: 0x56388ff3a6d0>

panelView(nat_rate_ord ~ DirDem,data = swiss, index = c('ort_name','year'), 
          legend.labs = c('Direct Democracy', 'Committee'), by.treatment = TRUE)

Warning message in panelView(nat_rate_ord ~ DirDem, data = swiss, index = c("ort_name", :
“Not a data frame.”

# export
sumlabs3 = c( 'Direct Democracy', 'Time trend')
stargazer(m1, m2, m3, m4, m5, type = exp_type, header=F,
  title = "Hainmueller Hangartner (2015) Replication",
  dep.var.caption  = "Outcome: Naturalisation Rate", model.names = F,
  dep.var.labels.include = FALSE,  multicolumn = F,
  keep = c('DirDem'),
  omit.stat = c('f', 'adj.rsq', 'ser'),
  column.separate = c(4, 1),
  column.labels = c("All Municipalities", "German Speaking"),
  add.lines = list(
    c('Municipality Fixed Effects', '', 'X', 'X', 'X', 'X', 'X', 'X'),
    c('Year Fixed Effects', '', '', '', 'X', 'X', 'X', 'X'),
    c('Common Linear Time Trend', '', '', 'X', '', '', '', '')
  ),
  notes = c('Standard Errors clustered by Municipality'),
  covariate.labels=sumlabs3, digits = 2, df=F
)

Hainmueller Hangartner (2015) Replication
==============================================================================
                                      Outcome: Naturalisation Rate            
                           ---------------------------------------------------
                                   All Municipalities          German Speaking
                             (1)      (2)      (3)      (4)          (5)      
------------------------------------------------------------------------------
Direct Democracy           -2.60*** -2.80*** -1.10*** -1.60***    -1.30***    
                            (0.19)   (0.18)   (0.22)   (0.27)      (0.33)     
                                                                              
------------------------------------------------------------------------------
Municipality Fixed Effects             X        X        X            X       
Year Fixed Effects                                       X            X       
Common Linear Time Trend                        X                             
Observations                9,804    9,804    9,804    9,804        6,251     
R2                           0.04     0.16     0.18     0.20        0.23      
==============================================================================
Note:                                              *p<0.1; **p<0.05; ***p<0.01
                                     Standard Errors clustered by Municipality

(fm5 = formula_lfe(outcome, treat,  # muni FE + year FE + muni time trends
  D = c(muni, time, 'ort_name:t'), C = muni))
(fm6 = formula_lfe(outcome, treat,  # muni FE + year FE + muni time trends
  D = c(muni, time, 'ort_name:t', 'ort_name:t2'), C = muni))

m6 = felm(fm5, swiss)
m8 = felm(fm6, swiss)
m7 = felm(fm5, swiss %>% filter(sprachreg == 'G'))
m9 = felm(fm6, swiss %>% filter(sprachreg == 'G'))
stargazer(m6, m8,  m7, m9, type = exp_type, header=F,
  title = "Hainmueller Hangartner (2015) Replication with Municipality Time Trends",
  dep.var.caption  = "Outcome: Naturalisation Rate", model.names = F,
  dep.var.labels.include = FALSE,  multicolumn = F,
  keep = c('DirDem'),
  omit.stat = c('f', 'adj.rsq', 'ser'),
  column.separate = c(2, 2),
  column.labels = c("All Municipalities", "German Speaking"),
  add.lines = list(
    c('Muni Fixed Effects',    'X', 'X', 'X', 'X'),
    c('Year Fixed Effects',    'X', 'X', 'X', 'X'),
    c('Muni Linear Time Trend','X', 'X', 'X', 'X'),
    c('Muni Quadratic Time Trend','', 'X', '', 'X')
  ),
  notes = c('Standard Errors clustered by Municipality'),
  covariate.labels=sumlabs3, digits = 2, df=F
)

nat_rate_ord ~ DirDem | ort_name + year2 + ort_name:t | 0 | ort_name
<environment: 0x56388ba30000>

nat_rate_ord ~ DirDem | ort_name + year2 + ort_name:t + ort_name:t2 | 
    0 | ort_name
<environment: 0x56388b7dded8>

Hainmueller Hangartner (2015) Replication with Municipality Time Trends
======================================================================
                                  Outcome: Naturalisation Rate        
                          --------------------------------------------
                             All Municipalities      German Speaking  
                              (1)          (2)        (3)       (4)   
----------------------------------------------------------------------
Direct Democracy            -0.90***    -1.20***    -0.94**  -1.40*** 
                             (0.30)      (0.35)     (0.37)    (0.44)  
                                                                      
----------------------------------------------------------------------
Muni Fixed Effects             X            X          X         X    
Year Fixed Effects             X            X          X         X    
Muni Linear Time Trend         X            X          X         X    
Muni Quadratic Time Trend                   X                    X    
Observations                 9,804        9,804      6,251     6,251  
R2                            0.26        0.31       0.29      0.34   
======================================================================
Note:                                      *p<0.1; **p<0.05; ***p<0.01
                             Standard Errors clustered by Municipality

Choosing between FE and RE: Hausman Test¶

$\beta_{FE}$ is assumed to be consistent. Oft-abused test as a result.

H0: $\beta_{FE} - \beta_{RE} = 0$
H0: $\beta_{FE} - \beta_{RE} \neq 0$

$$ \left( \widehat { \boldsymbol { \beta } } _ { F E } - \widehat { \boldsymbol { \beta } } _ { R E } \right) ^ { \prime } \left( \widehat { \operatorname { Var } } \widehat { \boldsymbol { \beta } } _ { F E } \right] - \widehat { \operatorname { Var } } \left[ \widehat { \boldsymbol { \beta } } _ { R E } \right] ) ^ { - 1 } \left( \widehat { \boldsymbol { \beta } } _ { F E } - \widehat { \boldsymbol { \beta } } _ { R E } \right) \stackrel { d } { \rightarrow } \chi _ { k } ^ { 2 } $$

Time Trends¶

Linear Time Trend¶

$$ y _ { i t } = \mathbf { x } _ { i t } \boldsymbol { \beta } + c _ { i } + t + \varepsilon _ { i t } , \quad t = 1,2 , \ldots , T $$

Time Fixed Effects (a.k.a. Two-way Fixed Effects)¶

$$ y _ { i t } = \mathbf { x } _ { i t } \boldsymbol { \beta } + c _ { i } + t _ { t } + \varepsilon _ { i t } , \quad t = 1,2 , \ldots , T $$

Unit Specific Time Trends¶

$$ y _ { i t } = \mathbf { x } _ { i t } \boldsymbol { \beta } + c _ { i } + g _ { i } \cdot t + t _ { t } + \varepsilon _ { i t } , \quad t = 1,2 , \ldots , T $$

Distributed Lag / Granger Causality¶

Causes should happen before consequences, and not vice-versa

Define switching indicator $D_{it}$ as 1 if $i$ switched from control to treatment between $t-1$ and $t$.

Check on whether, conditional on state and year fixed effects, past $D_{st}$ predicts $Y_{ist}$ while future $D_{st}$ does not. If $D_{st}$ causes $Y_{ist}$ but not vice versa, then dummies for future policy changes should not matter in an equation like:

$$ Y_{ist} = \gamma_s + \lambda_t + \sum_{\tau=0}^m \delta_{-\tau} D_{s, t - \tau} + \sum_{\tau=1}^q \delta_{+\tau} D_{s,t+\tau} + X_{ist}' \beta + \epsilon_{ist} $$

where the sums on the RHS allow for m lags / post-treatment effects, and q leads / pre-treatment effects.

Autor Switching Diagrams¶

autor <- import(paste0(data, 'table7/autor-jole-2003.dta'))

# Log total employment: from BLS employment & earnings
autor$lnemp <- log(autor$annemp)

# Non-business-service sector employment from CBP
autor$nonemp  <- autor$stateemp - autor$svcemp
autor$lnnon   <- log(autor$nonemp)
autor$svcfrac <- autor$svcemp / autor$nonemp

# Total business services employment from CBP
autor$bizemp <- autor$svcemp + autor$peremp
autor$lnbiz  <- log(autor$bizemp)

# Restrict sample
autor <- autor[which(autor$year >= 79 & autor$year <= 95), ]
autor <- autor[which(autor$state != 98), ]

# State dummies, year dummies, and state*time trends
autor$t  <- autor$year - 78
autor$t2 <- autor$t^2

# Generate more aggregate demographics
autor$clp     <- autor$clg    + autor$gtc
autor$a1624   <- autor$m1619  + autor$m2024 + autor$f1619 + autor$f2024
autor$a2554   <- autor$m2554  + autor$f2554
autor$a55up   <- autor$m5564  + autor$m65up + autor$f5564 + autor$f65up
autor$fem     <- autor$f1619  + autor$f2024 + autor$f2554 + autor$f5564 + autor$f65up
autor$white   <- autor$rs_wm  + autor$rs_wf
autor$black   <- autor$rs_bm  + autor$rs_bf
autor$other   <- autor$rs_om  + autor$rs_of
autor$married <- autor$marfem + autor$marmale

# Modify union variable (1. Don't interpolate 1979, 1981; 2. Rescale into percentage)
autor$unmem[79 == autor$year | autor$year == 81] <- NA
autor$unmem                                      <- autor$unmem * 100

# Create state and year factors
autor$state <- factor(autor$state)
autor$year  <- factor(autor$year)

# Diff-in-diff regression
did <- felm(lnths ~ lnemp   + admico_2 + admico_1 + admico0  + admico1  + admico2 +
                    admico3 + mico4    + admppa_2 + admppa_1 + admppa0  + admppa1 +
                    admppa2 + admppa3  + mppa4    + admgfa_2 + admgfa_1 + admgfa0 +
                    admgfa1 + admgfa2  + admgfa3  + mgfa4
                    | state + year + state:t | 0 | state, data = autor)

Estimated Impact of implied-contract exceptions to the employment- at-will doctrine on the use of temporary workers

# Plot results
lags_leads  <- c("admico_2", "admico_1", "admico0",
                 "admico1" , "admico2" , "admico3",
                 "mico4")
labels      <- c("2 yr prior", "1 yr prior", "Yr of adopt",
                 "1 yr after", "2 yr after", "3 yr after",
                 "4+ yr after")
results.did <- data.frame(label = factor(labels, levels = labels),
                          coef  = summary(did)$coef[lags_leads, "Estimate"] * 100,
                          se    = summary(did)$coef[lags_leads, "Cluster s.e."] * 100)
g           <- ggplot(results.did, aes(label, coef, group = 1))
p           <- g + geom_point()                                +
                   geom_line(linetype = "dotted")              +
                   geom_errorbar(aes(ymax = coef + 1.96 * se,
                                     ymin = coef - 1.96 * se)) +
                   geom_hline(yintercept = 0)                  +
                   ylab("Log points")                          +
                   xlab(paste("Time passage relative to year of",
                              "adoption of implied contract exception"))
p

Dynamic panel data¶

For FE to be valid, we rely on strict exogeneity wherein $E(\nu_{it}|x_i)= E(\nu_{it}|x_{i1}, \cdots x_{iT})=0$ (i.e. errors are uncorrelated with lags and leads of x). This fails when one of the x's is a lagged dependent variable, e.g.

$$ y_{it} = x_{it}\beta + y_{it-1}\delta + \theta_i + \nu_{it} $$

this yields a correlation between a RHS variable $y_{it-1}$ and the error term of the form:

$$ cov([y_{it-1} - \bar{y}_{it-1}], [\nu_{it} - \bar{\nu}_i]) = -(2/T)\sigma_\nu^2 + [(T-1)/T^2]\sigma_\eta^2 $$

This covariance $\rightarrow 0$ as $T \rightarrow \infty$.

IV solution¶

use lagged values of x and y (e.g. $x_{it-1}$ and/or $y_{it-2}$ as instruments for $\delta y_{it-1}$. $y_{it-2}$ will not be a valid instrument if $\nu_{it}$ is serially correlated.

Quasi-Panel¶

let $i$ index individuals, $j$ index birth cohorts, and $t$ index time. Then, without a true panel to estimate the regression

$$ y_{ijt} = x_{ijt}'\beta + \theta_{ij} + \delta_{t} + \nu_{ijt} $$

we can estimate:

$$ \bar{y}_{jt} = \bar{x}_{jt}' \beta + \bar{\theta}_j + \delta_t + \bar{\nu}_{jt} $$

Synthetic controls¶

Interactive Fixed Effects (Bai 2009)¶

$$ Y_{it} = \beta X_{it} + \lambda_i ' f_t + \tau D_{it} + \epsilon_{it} $$

initial value of $\hat{\beta}$ using within estimator
estimate $\lambda$ and $f$ using $\beta$
update $\beta$ using $\lambda, f$
update until $\beta$ doesn't change

General form of IFE¶

$$ Y_{i t}^{N}=\delta_{t}+Z_{i} \theta_{t}+\lambda_{i}^{\prime} f_{t}+\varepsilon_{i t} $$

$\delta_t$ is an unobserved (common) time-dependent factor,
$Z_i$ is a (1 × r ) vector of observed covariates,
$\theta_t$ is a (r × 1) vector of unknown parameters,
$f_t$ is a (1 × F ) vector of unknown common factors,
$\lambda_i$ is a (1 × F ) vector of unknown factor loadings,
$\epsilon_{it}$ are unobserved transitory shocks.

$\implies \lambda_i' f_t$ is $F \times F$

$F$ is chosen based on factor analysis of the residuals. Usually low $1,2,3$ (i.e. low number)

$f_t = 1 \implies$ unit fixed effect
$\lambda_i = 1 \implies$ time fixed effect
$f_t = t \implies$ linear time trend

Yiqing Xu's Gsynth vignette

data(gsynth)
head(simdata)

system.time(
    out <- gsynth(Y ~ D + X1 + X2, data = simdata, index = c("id","time"), force = "two-way", CV = TRUE, r = c(0, 5), se = TRUE, inference = "parametric", nboots = 1000, parallel = TRUE, cores = detectCores())
)

Parallel computing ...
Cross-validating ... 
 r = 0; sigma2 = 1.74717; IC = 0.56867; MSPE = 2.37280
 r = 1; sigma2 = 1.42977; IC = 0.76329; MSPE = 1.71743
 r = 2; sigma2 = 0.93928; IC = 0.72756; MSPE = 1.14540*
 r = 3; sigma2 = 0.88977; IC = 1.04715; MSPE = 1.15032
 r = 4; sigma2 = 0.83864; IC = 1.35103; MSPE = 1.21397
 r = 5; sigma2 = 0.79605; IC = 1.65130; MSPE = 1.23876

 r* = 2

Bootstrapping ...

   user  system elapsed 
    1.8     0.2     7.8

print(out)

Call:
gsynth.formula(formula = Y ~ D + X1 + X2, data = simdata, index = c("id", 
    "time"), force = "two-way", r = c(0, 5), CV = TRUE, se = TRUE, 
    nboots = 1000, inference = "parametric", parallel = TRUE, 
    cores = detectCores())

Average Treatment Effect on the Treated:
 ATT.avg   S.E. CI.lower CI.upper p.value
   5.544 0.2677    4.981     6.03       0

   ~ by Period (including Pre-treatment Periods):
         ATT   S.E. CI.lower  CI.upper p.value n.Treated
1   0.392160 0.5508  -0.7229  1.423423   0.562         0
2   0.276548 0.4401  -0.5798  1.140643   0.478         0
3  -0.275393 0.5346  -1.4317  0.620699   0.506         0
4   0.441201 0.4289  -0.4039  1.265415   0.262         0
5  -0.889595 0.4771  -1.8867 -0.005064   0.050         0
6   0.593891 0.4729  -0.1715  1.628774   0.124         0
7   0.528749 0.3985  -0.2898  1.296410   0.212         0
8   0.171569 0.5426  -0.8601  1.249869   0.738         0
9   0.610832 0.4830  -0.3468  1.528913   0.230         0
10  0.170597 0.4731  -0.7107  1.101855   0.718         0
11 -0.271892 0.5937  -1.4258  0.970237   0.662         0
12  0.094843 0.4897  -0.8737  1.034990   0.846         0
13 -0.651976 0.5674  -1.7478  0.486143   0.246         0
14  0.573686 0.4496  -0.4359  1.313699   0.266         0
15 -0.469686 0.5227  -1.4982  0.511904   0.320         0
16 -0.077766 0.5550  -1.1548  0.989518   0.936         0
17 -0.141785 0.5562  -1.1848  1.020902   0.836         0
18 -0.157100 0.3824  -0.8671  0.670299   0.696         0
19 -0.915575 0.5100  -1.9250  0.027275   0.062         0
20 -0.003309 0.3412  -0.6842  0.661482   0.990         0
21  1.235962 0.7301  -0.2448  2.623606   0.104         5
22  1.630264 0.5670   0.4281  2.686868   0.008         5
23  2.712178 0.5617   1.6934  3.887184   0.000         5
24  3.466758 0.7082   2.0307  4.824729   0.000         5
25  5.740132 0.5474   4.5293  6.728899   0.000         5
26  5.280035 0.5875   4.1033  6.476398   0.000         5
27  8.436485 0.4353   7.5652  9.202240   0.000         5
28  7.839902 0.6487   6.4690  8.936588   0.000         5
29  9.455115 0.5391   8.3635 10.446938   0.000         5
30  9.638509 0.5039   8.7150 10.634233   0.000         5

Coefficients for the Covariates:
    beta    S.E. CI.lower CI.upper p.value
X1 1.022 0.03043   0.9649    1.086       0
X2 3.053 0.02909   2.9965    3.110       0

plot(out) # by default

Abadie, Hainmueller, Diamond (2010)¶

Construct synthetic control group by finding optimal weights for control regins so that pre-trends between treated ($r=1$) and control regions ($r = 2 \cdots R$) are best matched. If the treatment occurs at $t=0$ and we have data that goes back to $-\tau$, we minimise:

$$ min_{w_r} \sum_{t=-\tau}^{0} (\Delta Y_{r=1,t} - \sum_{r=2}^R w_r \Delta Y_{rt}) $$

$\left\|X_{1}-X_{0} W\right\|_{V}=\sqrt{\left(X_{1}-X_{0} W\right)^{\prime} V\left(X_{1}-X_{0} W\right)}$

id	time	Y	X1	X2	error	mu	alpha	xi	F1	L1	F2	L2
101	1	6.2	0.38	-0.17	0.30	5	-0.061	1.13	0.253	-0.043	0.0058	-0.88
101	2	4.0	1.73	-0.49	0.64	5	-0.061	-1.46	-0.029	-0.043	0.3853	-0.88
101	3	8.9	1.86	0.50	-0.48	5	-0.061	0.74	-0.043	-0.043	-0.3707	-0.88
101	4	11.5	1.39	1.13	0.52	5	-0.061	1.91	1.369	-0.043	0.6444	-0.88
101	5	6.0	2.36	-0.15	0.37	5	-0.061	-1.44	-0.226	-0.043	-0.2205	-0.88
101	6	8.2	0.54	0.88	-0.22	5	-0.061	0.70	1.516	-0.043	0.3318	-0.88

SHEET	CHAIN	CO_OWNED	PA1	⋯	FIRSTIN2	SPECIAL2	MEALS2	OPEN2R	HRSOPEN2	PSODA2	PFRY2	PENTREE2	NREGS2	NREGS112
46	1	0	1	⋯	0.08	1	2	6.5	16	1.03	NA	0.94	4	4
49	2	0	1	⋯	0.05	0	2	10.0	13	1.01	0.89	2.35	4	4
506	2	1	1	⋯	0.25	NA	1	11.0	11	0.95	0.74	2.33	4	3
56	4	1	1	⋯	0.15	0	2	10.0	12	0.92	0.79	0.87	2	2
61	4	1	1	⋯	0.15	0	2	10.0	12	1.01	0.84	0.95	2	2
62	4	1	1	⋯	NA	0	2	10.0	12	NA	0.84	1.79	3	3

state	year	govelec	black	metro	unem	criv	crip	ag0_14	ag15_17	ag18_24	ag25_34	incpc	polpc	pris
1	80	0	0.26	0.63	0.088	445	4448	0.24	0.058	0.13	0.15	7673	235	141
1	81	0	0.26	0.64	0.107	470	4425	0.24	0.055	0.13	0.16	8443	227	164
1	82	1	0.26	0.64	0.144	450	4205	0.23	0.052	0.13	0.16	8856	219	184
1	83	0	0.26	0.64	0.137	419	3708	0.23	0.050	0.13	0.16	9411	223	219
1	84	0	0.25	0.65	0.112	435	3504	0.23	0.049	0.13	0.16	10268	214	245
1	85	0	0.25	0.65	0.089	463	3527	0.23	0.049	0.12	0.16	10974	221	259