options(digits=2)
# R Imports Boilerplate
rm(list=ls())
library(LalRUtils)
load_or_install(c('tidyverse','data.table', 'stargazer','lfe', 'estimatr', 'systemfit',
'magrittr','Hmisc', 'rio', 'texreg', 'knitr', 'foreach', 'doParallel', 'gridExtra',
'rdrobust', 'rdd', 'foreign'))
theme_set(lal_plot_theme())
exp_type = 'text'
root = '/home/alal/Dropbox/0_GradSchool/0_classes/450b/applied-econometrics-notes/'
data = paste0(root, 'data/')
#%%
# Read the data into a dataframe
pums = read.table(paste0(data, 'asciiqob.txt'),
header = FALSE,
stringsAsFactors = FALSE)
names(pums) <- c('lwklywge', 'educ', 'yob', 'qob', 'pob')
pums$age <- 80 - pums$yob
Endogeneity : $Cov(X_i, \epsilon_i) \neq 0$
Instrumental Variable requirements:
General form:
$$ \hat{\beta}_{IV} = (X'P_z X)^{-1}X'P_z Y $$where $P_z$ is the projection matrix of Z:= $Z(Z'Z)^{-1}Z'$
More general estimation strategies:
2 Stage Least Squares:
Control Function approach:
conditional independence assumption
$$ {Y_{1i}, Y_{0i}, D_{1i}, D_{0i}} \coprod Z_i | X_i $$pums.qob.means <- pums %>% group_by(yob, qob) %>% summarise_all(funs(mean))
#%%
# Add dates
pums.qob.means$yqob <- lubridate::ymd(paste0("19",
pums.qob.means$yob,
pums.qob.means$qob * 3),
truncated = 2)
# Function for plotting data
plot.qob <- function(ggplot.obj, ggtitle, ylab) {
gg.colours <- c("firebrick", rep("black", 3), "white")
ggplot.obj + geom_line() +
geom_point(aes(colour = factor(qob)),
size = 5) +
geom_text(aes(label = qob, colour = "white"),
size = 3,
hjust = 0.5, vjust = 0.5,
show_guide = FALSE) +
scale_colour_manual(values = gg.colours, guide = FALSE) +
ggtitle(ggtitle) +
xlab("Year of birth") +
ylab(ylab)
}
# Plot
p.educ <- plot.qob(ggplot(pums.qob.means, aes(x = yqob, y = educ)),
"A. Average education by quarter of birth (first stage)",
"Years of education")
p.lwklywge <- plot.qob(ggplot(pums.qob.means, aes(x = yqob, y = lwklywge)),
"B. Average weekly wage by quarter of birth (reduced form)",
"Log weekly earnings")
p.ivgraph <- grid.arrange(p.educ, p.lwklywge)
### Replication (Table 4.1.1 in MHE)
pums <- fread(paste0(data, 'asciiqob.txt'),
header = FALSE,
stringsAsFactors = FALSE)
names(pums) <- c('lwklywge', 'educ', 'yob', 'qob', 'pob')
qobs <- unique(pums$qob)
qobs.vars <- sapply(qobs, function(x) paste0('qob', x))
pums[, (qobs.vars) := lapply(qobs, function(x) 1*(qob == x))]
#%%
# Column 1: OLS
col1 <- felm(lwklywge ~ 1|0|(educ~educ)|0, pums)
# Column 2: OLS with YOB, POB dummies
col2 <- felm(lwklywge ~ 1| factor(yob) + factor(pob)|(educ~educ)|0, pums)
#%%
col3 <- felm(lwklywge ~ 1 | 0 | (educ~qob1) |0, data=pums)
# Column 3: 2SLS with instrument QOB = 1
#%%
# Column 4: 2SLS with YOB, POB dummies and instrument QOB = 1
col4 <- felm(lwklywge ~ 1 | 0 |(educ~qob1 + qob2 + qob3)|0 , data=pums)
#%%
# Column 5: 2SLS with YOB, POB dummies and instrument (QOB = 1)
col5 <- felm(lwklywge ~ 1 | factor(yob) + factor(pob) |
(educ ~ qob1)|0,data=pums)
# Column 6: 2SLS with YOB, POB dummies and full QOB dummies
col6 <- felm(lwklywge ~ 1 | factor(yob) + factor(pob) |
(educ ~ factor(qob))|0,data=pums)
# Column 7: 2SLS with YOB, POB dummies and full QOB dummies interacted with YOB
col7 <- felm(lwklywge ~ 1 | factor(yob) + factor(pob) |
(educ ~ factor(qob) * factor(yob)),
data = pums)
stargazer(col1,col2,col3,col4,col5,col6,col7,
keep.stat="n",
covariate.labels=c('Years of education'),
add.lines = list(
c("YOB fixed effects", '', 'X','','','X','X','X' ),
c("POB fixed effects", '', 'X','','','X','X','X' ),
c("Instruments", '', '','','','','' ),
c("QOB=1", '', '','X','X','X','X' ),
c("QOB=2", '', '','','X','','X' ),
c("QOB=3", '', '','','X','','X' ),
c("QOB X YOB dummies", '', '','','','','','X' )),
omit= "Constant",
type=exp_type)
Structural equation:
$$ Y_i = \alpha + \rho S_i + \eta_i $$$Cov(\eta_i, s_i) \neq 0$. Let $Z_i \in {0,1}$ s.t. $P(Z_i=1) = p$, $$ Cov(Y_i, Z_i) = \{ E[Y_i|Z_i = 1] - E[Y_i|Z_i = 0]\}p(1-p) $$
Then, $$ \rho = \frac{E[Y_i|Z_i = 1] - E[Y_i|Z_i = 0]}{E[s_i|Z_i = 1] - E[s_i|Z_i=0]} $$
pums$z <- (pums$qob == 3 | pums$qob == 4) * 1
# Compare means (and differences)
ttest.lwklywge <- t.test(lwklywge ~ z, pums)
ttest.educ <- t.test(educ ~ z, pums)
# Compute Wald estimate
sur <- systemfit(list(first = educ ~ z,
second = lwklywge ~ z),
data = pums,
method = "SUR")
wald <- deltaMethod(sur, "second_z / first_z")
wald.estimate <- (mean(pums$lwklywge[pums$z == 1]) - mean(pums$lwklywge[pums$z == 0])) /
(mean(pums$educ[pums$z == 1]) - mean(pums$educ[pums$z == 0]))
# wald.se <- wald.estimate^2 * ()
# OLS estimate
ols <- lm(lwklywge ~ educ, pums)
# Construct table
lwklywge.row <- c(ttest.lwklywge$estimate[1],
ttest.lwklywge$estimate[2],
ttest.lwklywge$estimate[2] - ttest.lwklywge$estimate[1])
educ.row <- c(ttest.educ$estimate[1],
ttest.educ$estimate[2],
ttest.educ$estimate[2] - ttest.educ$estimate[1])
wald.row.est <- c(NA, NA, wald$Estimate)
wald.row.se <- c(NA, NA, wald$SE)
ols.row.est <- c(NA, NA, summary(ols)$coef['educ' , 'Estimate'])
ols.row.se <- c(NA, NA, summary(ols)$coef['educ' , 'Std. Error'])
table <- rbind(lwklywge.row, educ.row,
wald.row.est, wald.row.se,
ols.row.est, ols.row.se)
colnames(table) <- c("Born in the 1st or 2nd quarter of year",
"Born in the 3rd or 4th quarter of year",
"Difference")
rownames(table) <- c("ln(weekly wage)",
"Years of education",
"Wald estimate",
"Wald std error",
"OLS estimate",
"OLS std error")
table
Types of validity in research design:
Define 4 potential outcomes
$$ Y(D = 1, Z = 1); Y(D = 1, Z = 0) ; Y(D = 0, Z = 1); Y(D = 0, Z = 0) $$Assumptions
SUTVA
Ignorability of Instrument - Instrument is as good as randomly assigned - it is independent of the vector of potential outcomes and potential treatment assignments $$ [\{ Y_i(d,z); \forall d, z \}, D_{1i}, D_{0i}] \bot z_i $$
Exclusion Restriction: $Y(D, 0 ) = Y(D, 1) \; \forall D \in \{0, 1\}$ - no direct effect of $Z$ on $Y$ except through $D$.
$Y_i(d,z) =f(d)$,i.e. only a function of $d$ ; $Y_i(d,0) = Y_i(d,1) \equiv Y_{di}$ for $d \in {0,1}$
Combining these, we can write the ignorability condition as
$$ \left( Y _ { 0 } , Y _ { 1 } , D _ { 0 } , D _ { 1 } \right) \perp Z $$Sufficient for a causal interpretation of the reduced form
Given the above conditions, The wald estimand can be interpreted as the causal effect of the endogenous variable of interest on the outcome on those whose treatment status can be changed by the instrument
LATE is the effect of the treatment on the population of compliers. Not informative about never-takers and always-takers
Under randomisation and perfect compliance, ITT == ATE
In a randomised trial with one-sided non-compliance: Bloom Result
$$ ToT = E[Y_{1i} - Y_{0i}|D_i = 1] = \frac{E[Y_i|z_i = 1] - E[Y_i|Z_i=0]}{E[D_i|z_i =1]} = \frac{RF}{FS} = \frac{ITT}{Compliance} $$Manually running IV using 2 stages does not give you correct standard errors.
OLS residual variance is the variance of $\eta + \rho(s_i - \hat{s_i})$ while for proper 2SLS standard errors, we want the variance of $\eta_i$ only.
2 mutually exclusive dummy instruments ($z_{1i}, z_{2i}$). Resulting 2SLS_estimate of $\rho$ is a weighted average of causal effects for instrument-specific compliant subpopulations
$$ \rho_j = \frac{Cov(Y_i, z_{ji})}{Cov(D_i, z_{ji})} , j = 1,2 $$First stage fitted values are $\hat{D_i} = \pi_{11}z_{1i} + \pi_{12}z_{2i}$
$$\rho_{2sls} = \psi \rho_1 + (1-\psi) \rho_2 $$where
$$ \psi = \frac{\pi_{11} Cov(D_1, Z_{1i})}{\pi_{11}Cov(D_1, Z_{1i})+ \pi_{21}Cov(D_1, Z_{2i})} $$$\psi$ is a number between 0 and 1 that depends on the relative strength of each instrument in the first stage
Only relevant When the model is overidentified. 2SLS estimation is done by fitting the first stage and reduced form separately; at each step, $\beta$ and $\pi$ are estimated to minimise errors in that equation. On the other hand, limited information maximum-likelihood jointly minimises the sum of squared errors in both stages.
Basic idea: when model is overidentified, we can test exclusion restriction ($Cov(z, e)=0$) by estimating the model with the two instruments separately and testing whether $Cov(z_1, e_2)=0$ (where $e_2$ is the error term from the structural equation when $x$ is instrumented by $z_2$) and vice versa.
Steps:
IV comes at a price of less precision in estimates, so better to use OLS_unless it can be shown that IV significantly changes results. Define:
$$ H = \frac{(\hat{\beta}_{2sls} - \hat{\beta}_{OLS})^2} {\hat{V}(\hat{\beta}_{2sls}) - \hat{V}(\hat{\beta}_{OLS})} $$Under the null that OLS is consistent, $H \sim \chi^2_1$. This is equivalent to testing $\gamma = 0$ in the equation $y = \beta x + \gamma \hat{u} + \epsilon$, where $\hat{u}$ are the residuals from the first stage of IV (regression of x on z).
set.seed(6152011)
# Number of cities
L <- 1000
# Number of industries
K <- 3
# National growth rates by industry
g.raw <- runif(K)
g.k <- g.raw/sum(g.raw)
#%%
CreateCity <- function(g.k, eta.s){
# Give each city some industry shares
K <- length(g.k)
z.raw <- runif(K)
z <- z.raw / sum(z.raw)
# Give each city some idiosyncratic growth by industry
g.kl.tilde <- g.raw/sum(g.raw)
# Change in employement is inner product of the national
# + idiosyncratic component with industry share
x <- (g.kl.tilde + g.k) %*% z
# Bartik instrument
# (national growth & city industry share inner product)
B <- g.k %*% z
# Realized change in wage
y <- (1/eta.s) * x + rnorm(1,0,1/10)
list("y" = y, "B" = B, "x" = x)
}
# Create a city data frame
city.data <- rep(CreateCity(g.k, 1), L)
df <- data.frame(matrix(unlist(city.data), ncol = 3))
colnames(df) <- c("y", "B", "x")
# Estimates
m.ols <- felm(y ~ x | 0 | 0 | 0, data = df)
m.iv <- felm(y ~ 1 | 0 | (x ~ B) | 0, data = df)
stargazer(m.ols, m.iv, type = exp_type)
Given first stage $s_i = \pi z_i + \epsilon_i$ and structural equation $y_i = \rho s_i + \eta_i$, define $\sigma_{\epsilon \eta} = Cov(\epsilon, \eta)$ and $\sigma_{\epsilon}^2 = Var(\epsilon)$. Let $F$ be the F statistic of the first-stage. Then, bias in IV term is:
$$ \rho_{2sls} - \rho = \frac{\sigma_{\epsilon,\eta}}{\sigma_{\epsilon}^2} \frac{1}{F+1} $$So, as $F \rightarrow \infty$, bias $\rightarrow 0$.
Bound, Jaeger, and Baker (1995) IV may be biased / inconsistent if the instruments are only weakly correlate with the endogenous variables
$$ \rho = \frac{Cov(Y_i, Z_i)}{Cov(D_i,Z_i)} + \frac{Cov(\eta_i, Z_i)}{Cov(D_i, Z_i)} = \alpha_D + \frac{Cov(\eta_i, Z_i)}{Cov(D_i, Z_i)} $$The second term gets inflated if the instrument is weak the moment there is slight failures of exogeneity.
$$ plim \hat{\beta}_{iv} = \beta + \frac{plim \Delta \bar{\epsilon}}{plim \Delta \bar{x}} $$$$ \frac{plim \hat{\beta}_{iv} - \beta}{plim \hat{\beta}_{ols} - \beta} = \frac{cov(\hat{x}, \epsilon) / cov(x,\epsilon)}{R^2_{x,z}} $$Staiger, Stock (1997) : rule of thumb - first-stage F statistic $\geq$ 10 (for 1 endogenous variable)
Montiel-Olea, Pflueger (2013) - construct Effective F Statistic
$$ \hat{F}_{eff} = \frac{1}{S} \frac{x'ZZ'x}{tr(\hat{W}_2)} $$Power is linear in compliance ratio.
$$ S E _ { \widetilde { L A T E } } \approx \frac { S E _ { \widehat {I T T } } } { \text { Compliance Ratio } } $$With 10% compliance, LATE SE is 10x the ITT SE
Likelihood that Complier has a given value of characteristic X relative to the rest of the population
$$ \frac{E[D | Z=1, X=x]-E[D | Z=0, X=x]}{E[D | Z=1]-E[D | Z=0]} $$Let $g$ be any measurable function whose conditional expectation is defined
$$ \kappa=1-\frac{D \cdot(1-Z)}{P(Z=0 | X)}-\frac{(1-D) \cdot Z}{P(Z=1 | X)} $$Then, $$ E[g(Y, D, X) | D_{1}>D_{0}]=\frac{E[\kappa \cdot g(Y, D, X)]}{E[\kappa]} $$
Used to calculate the average of $X$ in complier popn:
$$ E[X | D_{1}>D_{0}]=\frac{E[\kappa \cdot X]}{E[\kappa]} $$draft = import(paste0(data, 'draft.txt'))
colnames(draft) = c('lwage', 'age', 'elig', 'vet')
draft %>% group_by(vet) %>% summarise(lw = mean(lwage)) %>% .$lw -> w
wdiff = w[2] - w[1]
m1 = lm(lwage ~ vet, draft)
stargazer(m1, m1,
se = list(NULL, robust_se(m1)),
add.lines = list(c('SE', 'HC0', 'HC2')),
type = exp_type, header = F, df = F
)
freq_table(draft, 'elig') %>% kable(.)
freq_table(draft, 'vet') %>% kable(.)
draft %>% group_by(elig) %>% summarise(mu = mean(vet)) %>% .$mu -> mu
always_takers = nrow(filter(draft, elig != 1 & vet == 1)) / nrow(filter(draft, elig != 1))
never_takers = nrow(filter(draft, elig == 1 & vet == 0)) / nrow(filter(draft, elig == 1))
compliers = mu[2] - mu[1]
shares = data.frame(subpop = c('always takers', 'never takers', 'compliers'),
share = c(always_takers, never_takers, compliers))
shares %>% kable()
ivmod = felm(lwage ~ 1 | 0 | (vet ~ elig) | 0, draft)
stargazer(ivmod, header = F, model.names = F, type = exp_type, df = F,
column.labels = c('2SLS Estimates'),
covariate.labels = c('Veteran Status'), keep.stat = c('n'))
draft %>% filter(age >= 52) -> a52
fml = as.formula('lwage ~ 1 | 0 | (vet ~ elig) | 0')
fs_all = felm(fml, draft)$coefficients[2]
fs_a52 = felm(fml, a52)$coefficients[2]
(lik = fs_a52/fs_all)
# logit for selection
selmod = glm(vet ~ elig, family=binomial(link=logit), draft)
draft$dhat = selmod$fitted.values
draft %<>% mutate(
κ = 1 - ((vet * (1-elig))/(1-dhat)) - ((1-vet)*elig/dhat))
avg_complier_age = weighted.mean(draft$age, draft$κ)
avg_complier_age
Treatment changes discontinuously at some particular value $x_0$ of x
$$ D_i = \begin{cases} 1 \; \text{ if} \; x_i \geq x_0, \\ 0 \; \text{ if} \; x_i < x_0, \end{cases} $$This value $x$ is likely to be arbitrary in the neighbourhood of $x_0$
RD_takes advantage of a discrete change in treatment in response to a small change in the running variable x. Causal inference relies on the (local) arbitrariness of whether unit $i$ has $x_i < x_0 | x_i > x_0$
Formally,
\begin{align*} E[Y_{0i}|x_i] & = \alpha + \beta x_i \\ Y_{1i} = Y_{0i} + \rho \end{align*}which leads to the regression:
$$ Y_i = \alpha + \beta x_i + \tau D_i + \eta_i $$To permit nonlinearity, we estimate:
$$ Y_i = f(x_i) + \tau D_i + \eta_i $$Define $c := x_0$. The treatment effect can be uncovered by running a pooled regression
$$ Y = \alpha_l +_\tau D + f(X - c) + \epsilon $$It is recommended to let the regression function differ on the two sides of the cutoff by including interaction terms between D and X.
Then, the pooled regression is
$$ Y = \alpha_l + \tau D + \beta_l (X - c) + (\beta_r - \beta_l) D (X-c) + \epsilon $$Higher order polynomials can be included to better approximate the regression functions (per Stone Weirstrass thm).
# Generate series
nobs = 100
x <- runif(nobs)
y.linear <- x + (x > 0.5) * 0.25 + rnorm(n = nobs, mean = 0, sd = 0.1)
y.nonlin <- 0.5 * sin(6 * (x - 0.5)) + 0.5 + (x > 0.5) * 0.25 + rnorm(n = nobs, mean = 0, sd = 0.1)
y.mistake <- 1 / (1 + exp(-25 * (x - 0.5))) + rnorm(n = nobs, mean = 0, sd = 0.1)
rd.series <- data.frame(x, y.linear, y.nonlin, y.mistake)
# Make graph with ggplot2
g.data <- ggplot(rd.series, aes(x = x, group = x > 0.5))
p.linear <- g.data + geom_point(aes(y = y.linear)) +
stat_smooth(aes(y = y.linear),
method = "lm",
se = FALSE) +
geom_vline(xintercept = 0.5) +
ylab("Outcome") +
ggtitle(bquote('A. Linear E[' * Y["0i"] * '|' * X[i] * ']'))
p.nonlin <- g.data + geom_point(aes(y = y.nonlin)) +
stat_smooth(aes(y = y.nonlin),
method = "lm",
formula = y ~ poly(x, 2),
se = FALSE) +
geom_vline(xintercept = 0.5) +
ylab("Outcome") +
ggtitle(bquote('B. Nonlinear E[' * Y["0i"] * '|' * X[i] * ']'))
f.mistake <- function(x) {1 / (1 + exp(-25 * (x - 0.5)))}
p.mistake <- g.data + geom_point(aes(y = y.mistake)) +
stat_smooth(aes(y = y.mistake),
method = "lm",
se = FALSE) +
stat_function(fun = f.mistake,
linetype = "dashed") +
geom_vline(xintercept = 0.5) +
ylab("Outcome") +
ggtitle('C. Nonlinearity mistaken for discontinuity')
grid.arrange(p.linear, p.nonlin, p.mistake, ncol = 1)
lee = import(paste0(data, 'Lee.dta'))
lee %<>% filter(party == 100) # subset to dems
lee %<>%
mutate(
win = 1 * (origvote == highestvote),
vote_share = origvote / totvote,
margin = ifelse(win == 1, # if win
(origvote - sechighestvote)/totvote, # margin if win
(origvote - highestvote)/totvote) # margin if loss
) %>%
arrange(distid, yearel) %>%
group_by(distid) %>%
mutate(last_margin = dplyr::lag(margin, 1), # margin in last election
incumbent = 1 * (dplyr::lag(win, 1) == 1)
) %>%
ungroup()
ggplot(lee, aes(x = last_margin)) + geom_histogram(bins = 50) + geom_vline(xintercept = 0)
DCdensity(lee$last_margin, verbose = T)
ggplot(lee, aes(x = last_margin, y = incumbent, colour = as.factor(incumbent))) +
geom_point(alpha = 0.5, size = 0.3) +
labs(title = 'Incumbency as function of past margin',
y = bquote(incumbent[t+1]), x = bquote(margin[t]))
lee %>% filter(abs(last_margin) <= 0.25) -> cut_25
m1 = lm(vote_share ~ incumbent*last_margin, cut_25)
yhat = predict(m1, cut_25)
stargazer(m1, header = F, type = exp_type, se = list(robust_se(m1)),
model.names = F,
keep = c('incumbent$'), dep.var.caption = 'Vote Share',
dep.var.labels.include = FALSE, multicolumn = F,
omit.stat = c('f', 'adj.rsq', 'ser'))
df = data.frame(yhat, vote_share = cut_25$vote_share,
margin_last = cut_25$last_margin, inc = cut_25$incumbent) %>% na.omit()
ggplot(df, aes(x = margin_last)) +
geom_vline(xintercept = 0, linetype = 'dashed') +
geom_point(aes(y = vote_share, colour = as.factor(inc)),
alpha = 0.4, size = 0.4) +
geom_line(aes(y = yhat, colour = as.factor(inc))) +
theme(legend.position = "none") +
labs(title = 'RD estimates of the Incumbency Advantage',
y = bquote(VoteShare[t+1]), x = bquote(margin[t]))
bws = seq(.05, .95, .05)
coefs = c()
ses = c()
for(bw in bws) {
m = lm(vote_share ~ incumbent*last_margin,
lee %>% filter(abs(last_margin) <= bw))
coefs = c(coefs, m$coefficients[2])
ses = c(ses, robust_se(m)[2])
}
ests = data.frame(bw = bws, coef = coefs, se = ses)
ggplot(ests, aes(bw, coef, group = 1)) + geom_point() +
geom_line(linetype = "dotted") +
geom_errorbar(aes(ymax = coef + 1.96 * se, ymin = coef - 1.96 * se),
colour = 'blue', alpha = 0.6) +
geom_hline(yintercept = 0) +
scale_x_reverse() +
labs(title = 'Incumbency advantage as function of cutoffs',
y = "Effect on Vote Share",
x = "Bandwidth")
lee %<>% group_by(distid) %>% mutate(
plac_inc_1 = 1 * (last_margin >= -0.15),
plac_inc_2 = 1 * (last_margin >= -0.075),
plac_inc_3 = 1 * (last_margin >= 0.075),
plac_inc_4 = 1 * (last_margin >= 0.15),
) %>% ungroup()
# estimate negative placebo effects on losers
m2 = lm(vote_share ~ plac_inc_1 * last_margin,
lee %>% filter(incumbent == 0)
)
m3 = lm(vote_share ~ plac_inc_2 * last_margin,
lee %>% filter(incumbent == 0)
)
# estimate positive placebo effects on winners
m4 = lm(vote_share ~ plac_inc_3 * last_margin,
lee %>% filter(incumbent == 1)
)
m5 = lm(vote_share ~ plac_inc_4 * last_margin,
lee %>% filter(incumbent == 1)
)
mod_slicer <- function(m) {
coef = m$coefficients[2]
se = robust_se(m)[2]
t = coef/se
c(coef, se, t)
}
# stack estimates
plac_res = data.frame(rbind(
mod_slicer(m2),
mod_slicer(m3),
mod_slicer(m4),
mod_slicer(m5)))
plac_res = cbind(
c(-.15, -0.075, 0.075, 0.15),
plac_res
)
colnames(plac_res) = c('cutoff', 'coef', 'se', 't-stat')
plac_res
ggplot(plac_res, aes(x = as.factor(cutoff), y = coef)) +
geom_point() +
geom_hline(yintercept = 0) +
geom_errorbar(aes(ymax = coef + 1.96 * se, ymin = coef - 1.96 * se),
colour = 'blue', alpha = 0.6) +
labs(
x = 'cutoff', y = 'estimate', title = 'Placebo Estimates'
)
rdrobust_senate=read.csv(paste0(data, "rdrobust_senate.csv"))
attach(rdrobust_senate)
### rdplot with 95% confidence intervals
rdplot(y=vote, x=margin, scale= 5, binselect="es", # scale = 2, ci=95,
title="RD Plot: U.S. Senate Election Data",
y.label="Vote Share in Election at time t+1",
x.label="Vote Share in Election at time t")
rdplot(y=vote,x=margin,h=17.7080, subset=margin<=17.7080&margin>=-17.7080,
binselect="esmv", kernel="triangular", p=1,
title="RD Plot: U.S. Senate Election Data",
y.label="Vote Share in Election at time t+1",
x.label="Vote Share in Election at time t")
summary(rdbwselect(y=vote,x=margin, all = T))
### rdrobust default
summary(rdrobust(y=vote,x=margin, all = T))
### rdrobust with covariates
summary(rdrobust(y=vote,x=margin,covs=cbind(class,termshouse,termssenate)))
### rdrobust with cluster-robust variance estimation, and underlying data-driven bandwidth selectors.
summary(rdrobust(y=vote,x=margin,cluster=state))
fowler = import(paste0(data, 'Fowler.dta'))
# subset
fowler %>% filter(rv1 >= -.20 & rv1 <= .20) -> band
band %>% mutate(
bin = cut(rv1, breaks = seq(-.2, .2, 0.0025)) # .25 basis point increments
) %>% group_by(bin) %>%
summarise(voteshare = mean(voteshare, na.rm = T),
rv1 = mean(rv1, na.rm = T),
treat = ifelse(rv1 > 0, 1, 0)) %>% select(bin, voteshare, rv1, treat) %>%
ungroup() -> groupmeans
fml_4 = as.formula('voteshare ~ treat * poly(rv1, 4, raw = T)')
# fit model with polynomials
m = lm(fml_4, groupmeans)
groupmeans$yhat = predict(m, groupmeans)
# manual plot
ggplot(groupmeans, aes(x = bin, y = voteshare, group = 1, colour = as.factor(treat))) +
geom_point(size = 0.3) +
geom_line(data = groupmeans %>% filter(treat == 0), aes(y = yhat)) +
geom_line(data = groupmeans %>% filter(treat == 1), aes(y = yhat)) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position = "none") +
ggtitle('Incumbency Advantage RD - Manual')
# identical to canned routine
rdplot(band$voteshare, band$rv1, p = 4,
nbins = c(80,80), title = 'Incumbency Advantage RD',
y.label = bquote(VoteShare[t+1]),
x.label = bquote(margin[t])
)
Discontinuity doesn't necessarily change treatment, but only the probability of treatment. Then, we can use the discontinuity as an instrumental variable for treatment status.
$$ P[D_i = 1 | x_i] = \begin{cases} g_0 (x_i) if x_i < x_0 \\ g_1 (x_i) if x_i \geq x_0 \end{cases} $$$g_0(x_i) \neq g_1(x_i)$. Assuming $g_1(x_0) > g_0(x_0)$, the probability of treatment relates to $x_i$ via:
$$ E[D_i|x_i] = P[D_i=1|x_i] = g_0(x_i) + [g_1(x_i) - g_0(x_i)] T_i $$where $T_i = \mathbb{1} (x_i \geq x_0)$ := point of discontinuity
$$ \rho = lim_{\delta \rightarrow 0} \frac{E[Y_i|x_0 < x_i < x_0 + \delta] - E[Y_i|x_0 - \delta < x_i < x_0]} {E[D_i|x_0 < x_i < x_0 + \delta] - E[D_i|x_0 - \delta < x_i < x_0]} $$uses Maimonides' rule:
$$ m_{sc} = \frac{e_s}{int[\frac{e_s - 1}{40}] + 1} $$# Load the data
grade4 <- read.dta(paste0(data, "final4.dta"))
grade5 <- read.dta(paste0(data, "final5.dta"))
#%%
# Restrict sample
grade4 <- grade4[which(grade4$classize & grade4$classize < 45 & grade4$c_size > 5), ]
grade5 <- grade5[which(grade5$classize & grade5$classize < 45 & grade5$c_size > 5), ]
# Find means class size by grade size
grade4cmeans <- aggregate(grade4$classize,
by = list(grade4$c_size),
FUN = mean,
na.rm = TRUE)
grade5cmeans <- aggregate(grade5$classize,
by = list(grade5$c_size),
FUN = mean,
na.rm = TRUE)
# Rename aggregaed columns
colnames(grade4cmeans) <- c("c_size", "classize.mean")
colnames(grade5cmeans) <- c("c_size", "classize.mean")
#%%
# Create function for Maimonides Rule
maimonides.rule <- function(x) {x / (floor((x - 1)/40) + 1)}
#%%
# Plot each grade
g4 <- ggplot(data = grade4cmeans, aes(x = c_size))
p4 <- g4 + geom_line(aes(y = classize.mean)) +
stat_function(fun = maimonides.rule,
linetype = "dashed") +
expand_limits(y = 0) +
scale_x_continuous(breaks = seq(0, 220, 20)) +
ylab("Class size") +
xlab("Enrollment count") +
ggtitle("B. Fourth grade")
g5 <- ggplot(data = grade5cmeans, aes(x = c_size))
p5 <- g5 + geom_line(aes(y = classize.mean)) +
stat_function(fun = maimonides.rule,
linetype = "dashed") +
expand_limits(y = 0) +
scale_x_continuous(breaks = seq(0, 220, 20)) +
ylab("Class size") +
xlab("Enrollment count") +
ggtitle("A. Fifth grade")
grid.arrange(p5, p4, ncol = 1)