njmin <- read.table('public.dat',
header = FALSE,
stringsAsFactors = FALSE,
na.strings = c("", ".", "NA"))
names(njmin) <- c('SHEET', 'CHAIN', 'CO_OWNED', 'STATE', 'SOUTHJ', 'CENTRALJ',
'NORTHJ', 'PA1', 'PA2', 'SHORE', 'NCALLS', 'EMPFT', 'EMPPT',
'NMGRS', 'WAGE_ST', 'INCTIME', 'FIRSTINC', 'BONUS', 'PCTAFF',
'MEALS', 'OPEN', 'HRSOPEN', 'PSODA', 'PFRY', 'PENTREE', 'NREGS',
'NREGS11', 'TYPE2', 'STATUS2', 'DATE2', 'NCALLS2', 'EMPFT2',
'EMPPT2', 'NMGRS2', 'WAGE_ST2', 'INCTIME2', 'FIRSTIN2', 'SPECIAL2',
'MEALS2', 'OPEN2R', 'HRSOPEN2', 'PSODA2', 'PFRY2', 'PENTREE2',
'NREGS2', 'NREGS112')
njmin$FTE <- njmin$EMPFT + 0.5 * njmin$EMPPT + njmin$NMGRS
njmin$FTE2 <- njmin$EMPFT2 + 0.5 * njmin$EMPPT2 + njmin$NMGRS2
semean <- function(x, na.rm = FALSE) {
n <- ifelse(na.rm, sum(!is.na(x)), length(x))
sqrt(var(x, na.rm = na.rm) / n)
}
summary.means <- njmin[ , c("FTE", "FTE2", "STATE")] %>%
group_by(STATE) %>%
summarise_all(funs(mean(., na.rm = TRUE)))
summary.means <- as.data.frame(t(summary.means[ , -1]))
colnames(summary.means) <- c("PA", "NJ")
summary.means$dSTATE <- summary.means$NJ - summary.means$PA
summary.means <- rbind(summary.means,
summary.means[2, ] - summary.means[1, ])
row.names(summary.means) <- c("FTE employment before, all available observations",
"FTE employment after, all available observations",
"Change in mean FTE employment")
summary.semeans <- njmin[ , c("FTE", "FTE2", "STATE")] %>%
group_by(STATE) %>%
summarise_all(funs(semean(., na.rm = TRUE)))
summary.semeans <- as.data.frame(t(summary.semeans[ , -1]))
colnames(summary.semeans) <- c("PA", "NJ")
summary.semeans$dSTATE <- sqrt(summary.semeans$NJ + summary.semeans$PA)
njmin <- njmin[ , c("FTE", "FTE2", "STATE")]
njmin <- melt(njmin,
id.vars = c("STATE"),
variable.name = "Period",
value.name = "FTE")
summary.means <- njmin %>%
mutate(STATE = dplyr::recode(STATE, `0` = 'PA (control)',
`1` = 'NJ (treatment)')) %>%
group_by(STATE, Period) %>%
summarise_all(
funs(
mean(., na.rm = TRUE),
semean(., na.rm = TRUE)
))
difftable = data.table::dcast(setDT(summary.means),
STATE ~ Period, value.var = c('mean','semean'))
difftable[, diff:=mean_FTE2 - mean_FTE]
knitr::kable(difftable)
Main Diff-in-diff estimate
E[Yist|s=NJ,t=Nov]−E[Yist|s=NJ,t=Feb]−E[Yist|s=PA,t=Nov]−E[Yist|s=PA,t=Feb]=δ
difftable[1,'diff'] - difftable[2,'diff']
## diff
## 1: 2.8
emps <- summary.means %>%
dplyr::select(state = STATE, period = Period, employment = mean)
ggplot(data=emps,
aes(x=period, y=employment, group=state, colour=state)) +
geom_point() +
geom_line()
