In [1]:
rm(list = ls())
library(LalRUtils)
libreq(data.table, magrittr, tidyverse, janitor, huxtable, knitr)
# theme_set(lal_plot_theme_d())
set.seed(42)
options(repr.plot.width = 15, repr.plot.height=12)
     wants        loaded
[1,] "data.table" TRUE  
[2,] "magrittr"   TRUE  
[3,] "tidyverse"  TRUE  
[4,] "janitor"    TRUE  
[5,] "huxtable"   TRUE  
[6,] "knitr"      TRUE  

FECT (TWFE, Matrix Completion, IFE)

In [3]:
libreq(gsynth, devtools)
     wants      loaded
[1,] "gsynth"   TRUE  
[2,] "devtools" TRUE  
In [4]:
# devtools::install_github('xuyiqing/fastplm')
# devtools::install_github('xuyiqing/fect')
In [5]:
## for processing C++ code
libreq(Rcpp, ggplot2, GGally, grid, gridExtra)
## for parallel computing
libreq(foreach, doParallel, abind)
libreq(panelView)
     wants       loaded
[1,] "Rcpp"      TRUE  
[2,] "ggplot2"   TRUE  
[3,] "GGally"    TRUE  
[4,] "grid"      TRUE  
[5,] "gridExtra" TRUE  
     wants        loaded
[1,] "foreach"    TRUE  
[2,] "doParallel" TRUE  
[3,] "abind"      TRUE  
     wants       loaded
[1,] "panelView" TRUE  
In [6]:
set.seed(1234)
library(fect)
data(fect)
ls()
Registered S3 method overwritten by 'fect':
  method        from  
  print.interFE gsynth


Attaching package: ‘fect’


The following object is masked from ‘package:gsynth’:

    interFE


  1. 'simdata1'
  2. 'simdata2'
  3. 'turnout'
In [7]:
panelView(Y ~ D, data = simdata1, index = c("id","time"), by.timing = TRUE,
  axis.lab = "time", xlab = "Time", ylab = "Unit", show.id = c(1:100),
  background = "white", main = "Simulated Data: D")
In [8]:
panelView(Y ~ D, data = simdata1, index = c("id","time"),
    axis.lab = "time", xlab = "Time", ylab = "Unit", show.id = c(1:100),
    theme.bw = TRUE, type = "outcome", main = "Simulated Data: Y")
In [13]:
simdata1 %>% tabyl(time, D)
A tabyl: 35 × 3
time01
<dbl><dbl><dbl>
1200 0
2200 0
3200 0
4200 0
5200 0
6200 0
7200 0
8200 0
9200 0
10200 0
11200 0
12200 0
13200 0
14200 0
15200 0
16200 0
17200 0
18200 0
19200 0
20200 0
21180 20
22180 20
23180 20
24160 40
25160 40
26160 40
27140 60
28140 60
29140 60
30120 80
31120 80
32120 80
33100100
34100100
35100100

TWFE

In [9]:
out.fe <- fect(Y ~ D + X1 + X2, data = simdata1, index = c("id","time"), 
  force = "two-way", CV = TRUE, r = c(0, 5), se = TRUE, wald = TRUE, 
  nboots = 200, parallel = TRUE)
Parallel computing ...
Bootstrapping for uncertainties ... 200 runs
Bootstrapping for Wald test ... 200 runs
In [10]:
plot(out.fe, main = "Estimated ATT (FEct)", ylab = "Effect of D on Y", 
  cex.main = 0.8, cex.lab = 0.8, cex.axis = 0.8, cex.text = 0.7)

Interactive Fixed Effects

In [11]:
out.ife <- fect(Y ~ D + X1 + X2, data = simdata1, index = c("id","time"), 
          force = "two-way", method = "ife", CV = TRUE, r = c(0, 5), wald = TRUE,
          se = TRUE, nboots = 200, parallel = TRUE)
Parallel computing ...
Cross-validating ... 

 r = 0; sigma2 = 7.69493; IC = 2.37777; PC = 7.39722; MSPE = 8.31143; MSPTATT = 0.40242; MSE = 7.12120
 r = 1; sigma2 = 4.78864; IC = 2.23495; PC = 5.49463; MSPE = 5.63066; MSPTATT = 0.16623; MSE = 4.46102
 r = 2; sigma2 = 3.99714; IC = 2.38292; PC = 5.33170; MSPE = 5.08755; MSPTATT = 0.02471; MSE = 3.46498*
 r = 3; sigma2 = 3.89607; IC = 2.68309; PC = 5.92457; MSPE = 5.83034; MSPTATT = 0.02491; MSE = 3.17250
 r = 4; sigma2 = 3.79549; IC = 2.97986; PC = 6.48177; MSPE = 6.71675; MSPTATT = 0.02898; MSE = 2.90440
 r = 5; sigma2 = 3.70476; IC = 3.27573; PC = 7.02121; MSPE = 8.11939; MSPTATT = 0.02853; MSE = 2.65748

 r* = 2

Bootstrapping for uncertainties ... 200 runs
Bootstrapping for Wald test ... 200 runs
In [14]:
plot(out.ife, main = "Estimated ATT (IFEct)")

Matrix Completion

In [15]:
out.mc <- fect(Y ~ D + X1 + X2, data = simdata1, index = c("id","time"), 
          force = "two-way", method = "mc", CV = TRUE, wald = TRUE,
          se = TRUE, nboots = 200, parallel = TRUE)
Parallel computing ...
Cross-validating ... 
Matrix completion method...

 lambda.norm = 1.00000; MSPE = 8.32727; MSPTATT = 0.40242; MSE = 7.12120
 lambda.norm = 0.42170; MSPE = 6.55425; MSPTATT = 0.20403; MSE = 4.80148
 lambda.norm = 0.17783; MSPE = 5.62731; MSPTATT = 0.05787; MSE = 2.44248*
 lambda.norm = 0.07499; MSPE = 5.78173; MSPTATT = 0.01159; MSE = 0.46413
 lambda.norm = 0.03162; MSPE = 6.14631; MSPTATT = 0.00232; MSE = 0.08324
 lambda.norm = 0.01334; MSPE = 7.15717; MSPTATT = 0.00047; MSE = 0.01503
 lambda.norm = 0.00562; MSPE = 8.22151; MSPTATT = 0.00009; MSE = 0.00268
 lambda.norm = 0.00237; MSPE = 8.28216; MSPTATT = 0.00002; MSE = 0.00048
 lambda.norm = 0.00100; MSPE = 8.30816; MSPTATT = 0.00000; MSE = 0.00008
 lambda.norm = 0.00000; MSPE = 8.32727; MSPTATT = 0.00000; MSE = 0.00000

 lambda.norm* = 0.1778

Bootstrapping for uncertainties ... 200 runs
Bootstrapping for Wald test ... 200 runs
In [17]:
plot(out.mc)

Both

In [16]:
out.both <- fect(Y ~ D + X1 + X2, data = simdata1, index = c("id","time"), 
          force = "two-way", method = "both", CV = TRUE, wald = TRUE,
          se = TRUE, nboots = 200, parallel = TRUE)
Parallel computing ...
Cross-validating ... 

 r = 0; sigma2 = 7.69493; IC = 2.37777; PC = 7.39722; MSPE = 8.19475; MSPTATT = 0.40242; MSE = 7.12120
 r = 1; sigma2 = 4.78864; IC = 2.23495; PC = 5.49463; MSPE = 5.64495; MSPTATT = 0.16623; MSE = 4.46102
 r = 2; sigma2 = 3.99714; IC = 2.38292; PC = 5.33170; MSPE = 4.91807; MSPTATT = 0.02471; MSE = 3.46498*
 r = 3; sigma2 = 3.89607; IC = 2.68309; PC = 5.92457; MSPE = 5.58878; MSPTATT = 0.02491; MSE = 3.17250
 r = 4; sigma2 = 3.79549; IC = 2.97986; PC = 6.48177; MSPE = 6.58202; MSPTATT = 0.02898; MSE = 2.90440
 r = 5; sigma2 = 3.70476; IC = 3.27573; PC = 7.02121; MSPE = 8.19848; MSPTATT = 0.02853; MSE = 2.65748

 r* = 2

Matrix completion method...

 lambda.norm = 1.00000; MSPE = 8.19475; MSPTATT = 0.40242; MSE = 7.12120
 lambda.norm = 0.42170; MSPE = 6.45275; MSPTATT = 0.20403; MSE = 4.80148
 lambda.norm = 0.17783; MSPE = 5.51554; MSPTATT = 0.05787; MSE = 2.44248*
 lambda.norm = 0.07499; MSPE = 5.65972; MSPTATT = 0.01159; MSE = 0.46413
 lambda.norm = 0.03162; MSPE = 6.02706; MSPTATT = 0.00232; MSE = 0.08324
 lambda.norm = 0.01334; MSPE = 7.05848; MSPTATT = 0.00047; MSE = 0.01503
 lambda.norm = 0.00562; MSPE = 8.09048; MSPTATT = 0.00009; MSE = 0.00268
 lambda.norm = 0.00237; MSPE = 8.15029; MSPTATT = 0.00002; MSE = 0.00048
 lambda.norm = 0.00100; MSPE = 8.17592; MSPTATT = 0.00000; MSE = 0.00008
 lambda.norm = 0.00000; MSPE = 8.19475; MSPTATT = 0.00000; MSE = 0.00000

 lambda.norm* = 0.1778



 Recommended method through cross-validation: ife

Bootstrapping for uncertainties ... 200 runs
Bootstrapping for Wald test ... 200 runs
In [18]:
plot(out.both)

Gsynth

In [19]:
data(gsynth)
ls()
  1. 'out.both'
  2. 'out.fe'
  3. 'out.ife'
  4. 'out.mc'
  5. 'simdata'
  6. 'simdata1'
  7. 'simdata2'
  8. 'turnout'
In [21]:
head(simdata)
panelView(Y ~ D, data = simdata,  index = c("id","time"), pre.post = TRUE) 
panelView(Y ~ D, data = simdata,  index = c("id","time"), type = "outcome")
A data.frame: 6 × 15
idtimeYDX1X2efferrormualphaxiF1L1F2L2
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
11011 6.21100.3777-0.17320 0.29825-0.06052 1.1313 0.25332-0.04303 0.005764-0.8805
21012 4.02701.7332-0.49450 0.63665-0.06052-1.4606-0.02855-0.04303 0.385280-0.8805
31013 8.87701.8580 0.49840-0.48385-0.06052 0.7399-0.04287-0.04303-0.370660-0.8805
4101411.51501.3943 1.12730 0.51695-0.06052 1.9091 1.36860-0.04303 0.644377-0.8805
51015 5.97202.3637-0.15350 0.36905-0.06052-1.4439-0.22577-0.04303-0.220487-0.8805
61016 8.23800.5371 0.87740-0.21545-0.06052 0.7018 1.51647-0.04303 0.331782-0.8805
In [22]:
system.time(
    out <- gsynth(Y ~ D + X1 + X2, data = simdata, index = c("id","time"), 
    force = "two-way", CV = TRUE, r = c(0, 5), se = TRUE, 
    inference = "parametric", nboots = 200, parallel = TRUE)
)
Parallel computing ...
Cross-validating ... 
 r = 0; sigma2 = 1.84865; IC = 1.02023; PC = 1.74458; MSPE = 2.37280
 r = 1; sigma2 = 1.51541; IC = 1.20588; PC = 1.99818; MSPE = 1.71743
 r = 2; sigma2 = 0.99737; IC = 1.16130; PC = 1.69046; MSPE = 1.14540*
 r = 3; sigma2 = 0.94664; IC = 1.47216; PC = 1.96215; MSPE = 1.15032
 r = 4; sigma2 = 0.89411; IC = 1.76745; PC = 2.19241; MSPE = 1.21397
 r = 5; sigma2 = 0.85060; IC = 2.05928; PC = 2.40964; MSPE = 1.23876

 r* = 2

Bootstrapping ... ...

   user  system elapsed 
  0.837   0.185  33.401 
In [27]:
plot(out, theme.bw = T) # by default
plot(out, type = "counterfactual", raw = "all", main="", theme.bw = T)

EDR Example

In [28]:
panelView(turnout ~ policy_edr, data = turnout,  index = c("abb","year"), 
          pre.post = TRUE, by.timing = TRUE)
In [29]:
out0 <- gsynth(turnout ~ policy_edr + policy_mail_in + policy_motor, 
               data = turnout, index = c("abb","year"), se = TRUE, inference = "parametric", 
               r = 0, 
               CV = FALSE, force = "two-way", nboots = 200, seed = 02139)
Parallel computing ...
Bootstrapping ... ...

In [31]:
plot(out0, main = "DiD estimate", theme.bw = T)
In [32]:
out <- gsynth(turnout ~ policy_edr + policy_mail_in + policy_motor, 
              data = turnout,  index = c("abb","year"), se = TRUE, inference = "parametric", 
              r = c(0, 5), CV = TRUE, force = "two-way", nboots = 200, seed = 02139)
Parallel computing ...
Cross-validating ... 
 r = 0; sigma2 = 77.99366; IC = 4.82744; PC = 72.60594; MSPE = 22.13889
 r = 1; sigma2 = 13.65239; IC = 3.52566; PC = 21.67581; MSPE = 12.03686
 r = 2; sigma2 = 8.56312; IC = 3.48518; PC = 19.23841; MSPE = 10.31254*
 r = 3; sigma2 = 6.40268; IC = 3.60547; PC = 18.61783; MSPE = 11.48390
 r = 4; sigma2 = 4.74273; IC = 3.70145; PC = 16.93707; MSPE = 16.28613
 r = 5; sigma2 = 4.02488; IC = 3.91847; PC = 17.05226; MSPE = 15.78683

 r* = 2

Bootstrapping ... ...

In [34]:
plot(out, type = "gap", xlim = c(-10, 5), ylim=c(-3,10), theme.bw = T)
In [35]:
plot(out, type = "factors", xlab="Year")

Bayesian Time Series

Structural Time Series - CausalImpact

In [36]:
libreq(CausalImpact)
     wants          loaded
[1,] "CausalImpact" TRUE  
In [38]:
set.seed(1)
x1 <- 100 + arima.sim(model = list(ar = 0.999), n = 100)
y <- 1.2 * x1 + rnorm(100)
y[71:100] <- y[71:100] + 10
data <- cbind(y, x1)
head(data)
A matrix: 6 × 2 of type dbl
yx1
105.388.22
105.988.48
106.687.88
106.286.78
101.384.62
101.484.61
In [39]:
matplot(data, type = "l")
In [40]:
pre.period <- c(1, 70)
post.period <- c(71, 100)
impact <- CausalImpact(data, pre.period, post.period)
In [41]:
plot(impact)
In [42]:
time.points <- seq.Date(as.Date("2014-01-01"), by = 1, length.out = 100)
data <- zoo(cbind(y, x1), time.points)
pre.period <- as.Date(c("2014-01-01", "2014-03-11"))
post.period <- as.Date(c("2014-03-12", "2014-04-10"))
head(data)
               y    x1
2014-01-01 105.3 88.22
2014-01-02 105.9 88.48
2014-01-03 106.6 87.88
2014-01-04 106.2 86.78
2014-01-05 101.3 84.62
2014-01-06 101.4 84.61
In [43]:
impact <- CausalImpact(data, pre.period, post.period)
plot(impact)

PanelMatch

In [3]:
libreq(PanelMatch)
Installing package into ‘/home/alal/R/x86_64-pc-linux-gnu-library/3.6’
(as ‘lib’ is unspecified)

     wants        loaded
[1,] "PanelMatch" TRUE  
In [5]:
dem %>% head
A data.frame: 6 × 5
wbcode2yeardemytradewb
<int><int><dbl><dbl><dbl>
1219600NA NA
2219610NA11.48
3219620NA12.98
4219630NA18.52
5219640NA25.75
6219650NA29.31
In [4]:
DisplayTreatment(unit.id = "wbcode2",
                 time.id = "year", legend.position = "none",
                 xlab = "year", ylab = "Country Code",
                 treatment = "dem", data = dem)
In [19]:
PM.results <- PanelMatch(lag = 4, time.id = "year", unit.id = "wbcode2", 
                         treatment = "dem", refinement.method = "mahalanobis", # use Mahalanobis distance 
                         data = dem, match.missing = TRUE, 
                         covs.formula = ~ y, 
                         size.match = 5, qoi = "att" , outcome.var = "y",
                         lead = 0:4, forbid.treatment.reversal = FALSE, 
                         use.diagonal.variance.matrix = TRUE)
In [20]:
# extract the first matched set
mset <- PM.results$att[1]

DisplayTreatment(unit.id = "wbcode2",
                 time.id = "year", legend.position = "none",
                 xlab = "year", ylab = "Country Code",
                 treatment = "dem", data = dem,
                 matched.set = mset, # this way we highlight the particular set
                 show.set.only = TRUE)
Warning message:
“Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.”
In [21]:
# compare with sets that have had various refinements applied

get_covariate_balance(PM.results$att,
                      data = dem,
                      covariates = c("tradewb", "y"),
                      plot = FALSE)

get_covariate_balance(PM.results$att,
                      data = dem,
                      covariates = c("tradewb", "y"), 
                      plot = TRUE, # visualize by setting plot to TRUE
                      ylim = c(-.2, .2))
A matrix: 5 × 2 of type dbl
tradewby
t_4-0.03685 0.0494259
t_3-0.17490 0.0582114
t_2-0.23555 0.0350823
t_1-0.11573 0.0071610
t_0-0.08564-0.0008894

Estimate after matching

In [17]:
PE.results <- PanelEstimate(sets = PM.results, data = dem)

summary(PE.results)
plot(PE.results)
Weighted Difference-in-Differences with Mahalanobis Distance
Matches created with 4 lags

Standard errors computed with 1000 Weighted bootstrap samples

Estimate of Average Treatment Effect on the Treated (ATT) by Period:
$summary
A matrix: 5 × 4 of type dbl
estimatestd.error2.5%97.5%
t+0-0.95860.6452-2.2870.284
t+1-0.68301.1137-2.6911.680
t+2 0.56651.4699-2.2073.735
t+3 1.55851.7259-1.7744.899
t+4 1.88181.9735-1.9055.692
$lag
4
$iterations
1000
$qoi
'att'

DiD and double-robust DiD

In [2]:
library(devtools)
Loading required package: usethis

In [8]:
# devtools::install_github("bcallaway11/did")
libreq(ggpubr)
     wants    loaded
[1,] "ggpubr" TRUE  
In [4]:
library(did)
data(mpdta)

The dataset contains 500 observations of county-level teen employment rates from 2003-2007. Some states are first treated in 2004, some in 2006, and some in 2007 (see the paper for more details). The important variables in the dataset are

lemp This is the log of county-level teen employment. It is the outcome variable

first.treat This is the period when a state first increases its minimum wage. It can be 2004, 2006, or 2007. It is the variable that defines group in this application

year This is the year and is the time variable

countyreal This is an id number for each county and provides the individual identifier in this panel data context

In [5]:
out <- att_gt(yname="lemp",
              first.treat.name="first.treat",
              idname="countyreal",
              tname="year",
              xformla=~1,
              data=mpdta,
              estMethod="reg",
              printdetails=FALSE,
              )
In [6]:
summary(out)
Reference: Callaway, Brantly and Sant'Anna, Pedro.  "Difference-in-Differences with Multiple Time Periods." Working Paper <https://ssrn.com/abstract=3148250>, 2019. 



| group| time|     att|     se|
|-----:|----:|-------:|------:|
|  2004| 2004| -0.0105| 0.0231|
|  2004| 2005| -0.0704| 0.0316|
|  2004| 2006| -0.1373| 0.0373|
|  2004| 2007| -0.1008| 0.0353|
|  2006| 2004|  0.0065| 0.0241|
|  2006| 2005| -0.0028| 0.0202|
|  2006| 2006| -0.0046| 0.0182|
|  2006| 2007| -0.0412| 0.0206|
|  2007| 2004|  0.0305| 0.0161|
|  2007| 2005| -0.0027| 0.0162|
|  2007| 2006| -0.0311| 0.0177|
|  2007| 2007| -0.0261| 0.0167|


P-value for pre-test of parallel trends assumption:  0.18991

In [9]:
ggdid(out, ylim=c(-.25,.1))
In [10]:
es <- aggte(out, type="dynamic")
In [12]:
summary(es)
ggdid(es)
Reference: Callaway, Brantly and Sant'Anna, Pedro.  "Difference-in-Differences with Multiple Time Periods." Working Paper <https://ssrn.com/abstract=3148250>, 2019. 

Overall ATT:  


|     att|    se|
|-------:|-----:|
| -0.0772| 0.022|


Dynamic Effects:


| event time|     att|     se|
|----------:|-------:|------:|
|         -3|  0.0305| 0.0151|
|         -2| -0.0006| 0.0132|
|         -1| -0.0245| 0.0138|
|          0| -0.0199| 0.0127|
|          1| -0.0510| 0.0174|
|          2| -0.1373| 0.0372|
|          3| -0.1008| 0.0376|

DRDID

In [13]:
devtools::install_github("pedrohcgs/DRDID")
Skipping install of 'DRDID' from a github remote, the SHA1 (30b67823) has not changed since last install.
  Use `force = TRUE` to force installation

DrDID with inverse-probability-tilting

In [14]:
library(DRDID)
# Load data in long format that comes in the DRDID package
data(nsw_long)
# Form the Lalonde sample with CPS comparison group
eval_lalonde_cps <- subset(nsw_long, nsw_long$treated == 0 | nsw_long$sample == 2)
In [15]:
# Implement improved locally efficient DR DID:
out <- drdid(yname = "re", tname = "year", idname = "id", dname = "experimental",
             xformla= ~ age + educ + black + married + nodegree + hisp + re74,
             data = eval_lalonde_cps, panel = TRUE)
summary(out)
 Call:
drdid(yname = "re", tname = "year", idname = "id", dname = "experimental", 
    xformla = ~age + educ + black + married + nodegree + hisp + 
        re74, data = eval_lalonde_cps, panel = TRUE)
------------------------------------------------------------------
 Further improved locally efficient DR DID estimator for the ATT:
 
   ATT     Std. Error  t value    Pr(>|t|)  [95% Conf. Interval] 
-901.2703   393.6247   -2.2897     0.022    -1672.7747  -129.766 
------------------------------------------------------------------
 Estimator based on panel data.
 Outcome regression est. method: weighted least squares.
 Propensity score est. method: inverse prob. tilting.
 Analytical standard error.
------------------------------------------------------------------
 See Sant'Anna and Zhao (2020) for details.

TWFE

In [16]:
# Form the Lalonde sample with CPS comparison group
eval_lalonde_cps <- subset(nsw, nsw$treated == 0 | nsw$sample == 2)
# Select some covariates
covX = as.matrix(cbind(eval_lalonde_cps$age, eval_lalonde_cps$educ,
                       eval_lalonde_cps$black, eval_lalonde_cps$married,
                       eval_lalonde_cps$nodegree, eval_lalonde_cps$hisp,
                       eval_lalonde_cps$re74))
# Implement TWFE DID with panel data
twfe_did_panel(y1 = eval_lalonde_cps$re78, y0 = eval_lalonde_cps$re75,
               D = eval_lalonde_cps$experimental,
               covariates = covX)
$ATT
dd:post: 867.508998190577
$se
352.970327719348
$uci
dd:post: 1559.3308405205
$lci
dd:post: 175.687155860655
$boots
NULL
$att.inf.func
NULL
In [22]:
libreq(scul, tidyverse, glmnet, pBrackets, knitr, kableExtra, cowplot, formattable, Synth)
      wants         loaded
 [1,] "scul"        TRUE  
 [2,] "tidyverse"   TRUE  
 [3,] "glmnet"      TRUE  
 [4,] "pBrackets"   TRUE  
 [5,] "knitr"       TRUE  
 [6,] "kableExtra"  TRUE  
 [7,] "cowplot"     TRUE  
 [8,] "formattable" TRUE  
 [9,] "Synth"       TRUE  
In [23]:
data(cigarette_sales)
## -----------------------------------------------------------------------------
dim(cigarette_sales)

head(cigarette_sales[,1:6])
  1. 28
  2. 79
A data.frame: 6 × 6
yearcigsale_6cigsale_1retprice_1cigsale_5retprice_5
<int><dbl><dbl><dbl><dbl><dbl>
11970123.0 89.839.6100.336.7
21971121.0 95.442.7104.138.8
31972123.5101.142.3103.944.1
41973124.4102.942.1108.045.1
51974126.7108.243.1109.745.5
61975127.1111.746.6114.848.6
In [24]:
AllYData <- cigarette_sales[, 1:2]
AllXData <- cigarette_sales %>%
  select(-c("year", "cigsale_6", "retprice_6"))
In [25]:
## -----------------------------------------------------------------------------
processed.AllYData <- Preprocess(AllYData)

TreatmentBeginsAt <- 19 # Here the 18th row is 1988
PostPeriodLength <- nrow(processed.AllYData) - TreatmentBeginsAt + 1
PrePeriodLength <- TreatmentBeginsAt-1
NumberInitialTimePeriods <- 5
processed.AllYData <- PreprocessSubset(processed.AllYData,
                                       TreatmentBeginsAt ,
                                       NumberInitialTimePeriods,
                                       PostPeriodLength,
                                       PrePeriodLength)
In [26]:
## -----------------------------------------------------------------------------
SCUL.input <- OrganizeDataAndSetup (
    time =  data.frame(AllYData[, 1]),
    y = data.frame(AllYData[, 2]),
    TreatmentBeginsAt = TreatmentBeginsAt,
    x.DonorPool = AllXData[, -1],
    CohensDThreshold = 0.25,
    NumberInitialTimePeriods = NumberInitialTimePeriods,
    TrainingPostPeriodLength = 7,
    x.PlaceboPool = AllXData[, -1],
    OutputFilePath="vignette_output/"
)
In [27]:
GraphTargetData()
Warning message:
“Use of `temp.DataToPlot$time` is discouraged. Use `time` instead.”
Warning message:
“Use of `temp.DataToPlot$y.actual` is discouraged. Use `y.actual` instead.”
In [28]:
## ---- echo = FALSE------------------------------------------------------------
first_guess_lasso = glmnet(as.matrix(SCUL.input$x.DonorPool[(1:TreatmentBeginsAt-1),]), as.matrix(SCUL.input$y[(1:TreatmentBeginsAt-1),]))
In [29]:
## ---- fig.height=5.33, fig.width=8, warning = FALSE, fig.align = "center", echo = FALSE, out.width = '100%'----
plot(first_guess_lasso, "lambda", label = TRUE )
In [30]:
# Create a dataframe of the treated data
data_to_plot_scul_vary_lambda <- data.frame(SCUL.input$time, SCUL.input$y)

# Label the columns
colnames(data_to_plot_scul_vary_lambda) <- c("time", "actual_y")

# Create four naive predictions that are based on random lambdas
data_to_plot_scul_vary_lambda$naive_prediction_1  <-
  predict(
    x = as.matrix(SCUL.input$x.DonorPool[(1:TreatmentBeginsAt-1),]),
    y = as.matrix(SCUL.input$y[(1:TreatmentBeginsAt-1),]),
    newx = as.matrix(SCUL.input$x.DonorPool),
    first_guess_lasso,
    s = first_guess_lasso$lambda[1],
    exact = TRUE
  )

data_to_plot_scul_vary_lambda$naive_prediction_2  <-
  predict(
    x = as.matrix(SCUL.input$x.DonorPool[(1:TreatmentBeginsAt-1),]),
    y = as.matrix(SCUL.input$y[(1:TreatmentBeginsAt-1),]),
    newx = as.matrix(SCUL.input$x.DonorPool),
    first_guess_lasso,
    s = first_guess_lasso$lambda[round(length(first_guess_lasso$lambda)/10)],
    exact = TRUE
  )


data_to_plot_scul_vary_lambda$naive_prediction_3  <-
  predict(
    x = as.matrix(SCUL.input$x.DonorPool[(1:TreatmentBeginsAt-1),]),
    y = as.matrix(SCUL.input$y[(1:TreatmentBeginsAt-1),]),
    newx = as.matrix(SCUL.input$x.DonorPool),
    first_guess_lasso,
    s = first_guess_lasso$lambda[round(length(first_guess_lasso$lambda)/5)],
    exact = TRUE
  )

data_to_plot_scul_vary_lambda$naive_prediction_4  <-
  predict(
    x = as.matrix(SCUL.input$x.DonorPool[(1:TreatmentBeginsAt-1),]),
    y = as.matrix(SCUL.input$y[(1:TreatmentBeginsAt-1),]),
    newx = as.matrix(SCUL.input$x.DonorPool),
    first_guess_lasso,
    s = 0,
    exact = TRUE
  )

# Plot these variables
lasso_plot <-
  ggplot() +
  geom_line(data = data_to_plot_scul_vary_lambda,
            aes(x = time, y = actual_y, color="Real Data"), size=1, linetype="solid")  +
  geom_line(data = data_to_plot_scul_vary_lambda,
            aes(x = time, y = naive_prediction_1, color = "#1; Removes all donors"), size=1, linetype="dashed") +
  geom_line(data = data_to_plot_scul_vary_lambda,
            aes(x = time, y = naive_prediction_2, color="#2"), size = 1, linetype="twodash")  +
  geom_line(data = data_to_plot_scul_vary_lambda, aes(x = time, y = naive_prediction_3,color = "#3"), size = 1, linetype = "longdash")  +
  geom_line(data = data_to_plot_scul_vary_lambda, aes(x = time, y = naive_prediction_4,color = "#4; Removes no donors"), size = 1, linetype="dotdash")  +
  scale_color_manual(name = "",
                     values = c("#1; Removes all donors" = "blue", "Real Data" = "#F21A00", "#2" = "#00A08A", "#3" = "#EBCC2A", "#4; Removes no donors" = "#0F0D0E")) +  
  theme_bw(base_size = 15)  +
  theme(
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(colour = "black"))  +  
  geom_vline(xintercept = data_to_plot_scul_vary_lambda$time[TreatmentBeginsAt-1], colour="grey", linetype = "dashed")  + theme(legend.position="bottom") +
  ylab("") +
  xlab("Time") +
  ggtitle(expression("Actual data v.SCUL predictions from different"~lambda~"penalties")) +
  annotate("text", x = (data_to_plot_scul_vary_lambda$time[TreatmentBeginsAt-1]-data_to_plot_scul_vary_lambda$time[1])/2+data_to_plot_scul_vary_lambda$time[1], y = max(data_to_plot_scul_vary_lambda[,-1])*1.01, label = "Pre-treatment",cex=6) +
  annotate("text", x = (data_to_plot_scul_vary_lambda$time[nrow(SCUL.input$y)] - data_to_plot_scul_vary_lambda$time[TreatmentBeginsAt-1])/2+data_to_plot_scul_vary_lambda$time[TreatmentBeginsAt-1], y = max(data_to_plot_scul_vary_lambda[,-1])*1.01, label = "Post-treatment",cex=6) +
  guides(color=guide_legend(ncol=3))

 lasso_plot
In [31]:
TrainingPostPeriodLength <- 7
  # Calculate the maximum number of possible runs given the previous three choices
  MaxCrossValidations <- PrePeriodLength-NumberInitialTimePeriods-TrainingPostPeriodLength+1

  # Set up the limits of the plot and astetics. (spelling?)
  plot(0,0,xlim=c(-3.75,PrePeriodLength+2.5),ylim=c(-.75,1.5),
       xaxt="n",yaxt="n",bty="n",xlab="",ylab="",type="n")

  # Set colors (train, test, left-out)
  # From Darjeeling Limited color palette, https://github.com/karthik/wesanderson
  #custom_colors <- c("#F98400", "#00A08A", "#5BBCD6")
  CustomColors <- c("black", "white", "red")
  # Loop through all the possible cross validations
  for(j in 1:MaxCrossValidations)
  {
    # Identify the possible set of test data: From one after the training data ends until the end of the pre-treatment period.
    RangeOfFullSetOfTestData <- (NumberInitialTimePeriods+j):PrePeriodLength #7:20

    # Identify the actual set of test data: From one after the training data ends until the end of test data
    RangeOfjthTestData <- (NumberInitialTimePeriods+j):(NumberInitialTimePeriods+j+TrainingPostPeriodLength-1)

    # Identify the actual set of data left out: From one after the test data ends until the end of pre-treatment data
    RangeOfLeftoutData <- (NumberInitialTimePeriods+j+TrainingPostPeriodLength):PrePeriodLength

    #  Identify the training data. From the first one until adding (J-1).
    RangeOfTrainingData <- 1:(NumberInitialTimePeriods-1+j)

    # Put arrows through the data points to represent time
    arrows(0,1-j/MaxCrossValidations,PrePeriodLength+1,1-j/MaxCrossValidations,0.05)

    # Add squares to all of the training data
    points(RangeOfTrainingData,rep(1-j/MaxCrossValidations,length(RangeOfTrainingData)),pch=15,col=CustomColors[1])

    # Add X's to the unused data
    if(length(RangeOfFullSetOfTestData) > TrainingPostPeriodLength)
      points(RangeOfLeftoutData, rep(1-j/MaxCrossValidations,length(RangeOfLeftoutData)), pch=4, col=CustomColors[3])

    # Add circles to the test data
    if(length(RangeOfFullSetOfTestData) >= TrainingPostPeriodLength)
      points(RangeOfjthTestData,rep(1-j/MaxCrossValidations,length(RangeOfjthTestData)), pch=21, col="black",bg=CustomColors[2])
  }
  # Add informative text and bracket

  ## label what is training data
  brackets(1, .9 , NumberInitialTimePeriods, .9, h=.05)
  text((NumberInitialTimePeriods+1)/2,1.15,"Training data\n for 1st CV")

  ## label what is test data
  brackets((NumberInitialTimePeriods+1), .9 , (NumberInitialTimePeriods+TrainingPostPeriodLength), .9, h=.05)
  text((NumberInitialTimePeriods+TrainingPostPeriodLength)-((TrainingPostPeriodLength-1)/2),1.15,"Test data\n for 1st CV")

  ## label what is left-out data
  brackets(NumberInitialTimePeriods+TrainingPostPeriodLength+1, .9 , PrePeriodLength, .9, h=.05)
  text((PrePeriodLength-(PrePeriodLength-NumberInitialTimePeriods-TrainingPostPeriodLength)/2),1.15,"Left-out data\n for 1st CV")


  ## Add a legend so it will be clear in black and white

  legend(
    x="bottom",
    legend=c("Training", "Testing","Left-out"),
    bg=CustomColors,
    col=c(CustomColors[1], "black", CustomColors[3]),
    lwd=1,
    lty=c(0,0),
    pch=c(15,21,4),
    bty="n",
    horiz = TRUE,
    x.intersp=.5,
    y.intersp= .25 ,
    text.width=c(2.5,2.5,2.5)
  )

  # Add custom x-axis labels to indicate time until treatment
  text(PrePeriodLength/2,-.35,"Time period relative to treatment", cex=1)
  CustomXLables <-seq((-PrePeriodLength),0, by=1)
  for (z in seq(from = 1, to = PrePeriodLength + 1, by=5)) {
    text(z, -.15, CustomXLables[z], cex=1)
  }

  # Add custom y-axis labels to indicate CV run number
  text(-3.5,1,bquote(underline("CV Run")), cex=1)
  for (z in seq(from = 0, to = 1-1/MaxCrossValidations, by = (1/MaxCrossValidations))) {
    TempLabel = MaxCrossValidations - z*MaxCrossValidations
    text(-3.5,z,TempLabel, cex=1)
  }

# Add title
#title(main = "Example of rolling origin cross-validation procedure",line = -.8)
text(-3.75, 1.5, "Example of rolling-origin k-fold cross-validation",cex=1.5,adj=0)

dev.off()
null device: 1
In [33]:
SCUL.output <- SCUL()


## ---- fig.height=8, fig.width=12, warning = FALSE, fig.align = "center", out.width = '100%'----
PlotActualvSCUL()
[1] "The training data is running from 1 to"
[1] 5
[1] "The testing data is running from"
[1] 6
[1] "to"
[1] 12
[1] "The minimum lambda is"
[1] 1.809
[1] "The training data is running from 1 to"
[1] 6
[1] "The testing data is running from"
[1] 7
[1] "to"
[1] 13
[1] "The minimum lambda is"
[1] 2.077
[1] "The training data is running from 1 to"
[1] 7
[1] "The testing data is running from"
[1] 8
[1] "to"
[1] 14
[1] "The minimum lambda is"
[1] 2.296
[1] "The training data is running from 1 to"
[1] 8
[1] "The testing data is running from"
[1] 9
[1] "to"
[1] 15
[1] "The minimum lambda is"
[1] 0.02121
[1] "The training data is running from 1 to"
[1] 9
[1] "The testing data is running from"
[1] 10
[1] "to"
[1] 16
[1] "The minimum lambda is"
[1] 0.02029
[1] "The training data is running from 1 to"
[1] 10
[1] "The testing data is running from"
[1] 11
[1] "to"
[1] 17
[1] "The minimum lambda is"
[1] 0.01838
[1] "The training data is running from 1 to"
[1] 11
[1] "The testing data is running from"
[1] 12
[1] "to"
[1] 18
[1] "The minimum lambda is"
[1] 0.01999
In [34]:
## ---- echo = FALSE------------------------------------------------------------
# Calculate pre-treatment sd
PreTreatmentSD <- sd(unlist(SCUL.output$y.actual[1:(SCUL.input$TreatmentBeginsAt-1),]))

# Store Cohen's D in each period for cross-validated lambda
StandardizedDiff <- abs(SCUL.output$y.actual-SCUL.output$y.scul)/PreTreatmentSD
names(StandardizedDiff) <- c("scul.cv")

# Store Cohen's D in each period for max lambda
StandardizedDiff$naive_prediction_1 <- abs(data_to_plot_scul_vary_lambda$naive_prediction_1 - data_to_plot_scul_vary_lambda$actual_y)/PreTreatmentSD
StandardizedDiff$naive_prediction_2 <- abs(data_to_plot_scul_vary_lambda$naive_prediction_2 - data_to_plot_scul_vary_lambda$actual_y)/PreTreatmentSD
StandardizedDiff$naive_prediction_3 <- abs(data_to_plot_scul_vary_lambda$naive_prediction_3 - data_to_plot_scul_vary_lambda$actual_y)/PreTreatmentSD
StandardizedDiff$naive_prediction_4 <- abs(data_to_plot_scul_vary_lambda$naive_prediction_4 - data_to_plot_scul_vary_lambda$actual_y)/PreTreatmentSD

# Show Cohen's D for each of these
CohensD <- data.frame(colMeans(StandardizedDiff[1:(TreatmentBeginsAt-1),]))


# Calculate treatment effect for each of these

TreatmentEffect <- SCUL.output$y.actual-SCUL.output$y.scul
names(TreatmentEffect) <- c("scul.cv")

TreatmentEffect$naive_prediction_1 <-
  data_to_plot_scul_vary_lambda$actual_y -   
  data_to_plot_scul_vary_lambda$naive_prediction_1
TreatmentEffect$naive_prediction_2 <-
  data_to_plot_scul_vary_lambda$actual_y -
  data_to_plot_scul_vary_lambda$naive_prediction_2
TreatmentEffect$naive_prediction_3 <-
  data_to_plot_scul_vary_lambda$actual_y-
  data_to_plot_scul_vary_lambda$naive_prediction_3
TreatmentEffect$naive_prediction_4 <-  
  data_to_plot_scul_vary_lambda$actual_y -
  data_to_plot_scul_vary_lambda$naive_prediction_4


AvgTreatmentEffect <- data.frame(colMeans(TreatmentEffect[TreatmentBeginsAt:nrow(StandardizedDiff),]))

# For target variable
Results.y.CohensD <- SCUL.output$CohensD
Results.y.StandardizedDiff <- (SCUL.output$y.actual-SCUL.output$y.scul)/sd(unlist(SCUL.output$y.actual[1:(SCUL.input$TreatmentBeginsAt-1),]))
Results.y <- SCUL.output$y.scul


## ---- echo = FALSE, warning = FALSE, message=FALSE----------------------------
table_for_hux <- cbind(
  (CohensD),
  (AvgTreatmentEffect)
)
names(table_for_hux) <- c("CohensD", "ATE")

table_for_hux$name <- c("Cross-validation for determining penalty", "Naive guess 1:\n Max penalty (remove all donors)", "Naive guess 2:\n Random penalty", "Naive guess 3:\n Random penalty", "Naive guess 4:\n No penalty (include all donors)")
table_for_hux$value <- c(SCUL.output$CrossValidatedLambda, first_guess_lasso$lambda[1], first_guess_lasso$lambda[round(length(first_guess_lasso$lambda)/10)], first_guess_lasso$lambda[round(length(first_guess_lasso$lambda)/5)], 0)

table_for_hux <- table_for_hux[c(3, 4, 1,2)]
In [36]:
libreq(IRdisplay)
     wants       loaded
[1,] "IRdisplay" TRUE  
In [37]:
kable(table_for_hux, col.names = c("Method","Penalty parameter", "Cohens D (pre-period fit)", "ATE estimate"), digits = 2, row.names = FALSE) %>%
  kable_styling(bootstrap_options = "striped", full_width = F) %>%
column_spec(1, width = "5cm") %>%
column_spec(2:4, width = "2cm")  %>%
  pack_rows("SCUL procedure using:", 1, 1) %>%
  pack_rows("SCUL using naive guesses for penalty", 2, 5) %>% 
    as.character() %>% 
    display_html()
Method Penalty parameter Cohens D (pre-period fit) ATE estimate
SCUL procedure using:
Cross-validation for determining penalty 0.02 0.01 -13.52
SCUL using naive guesses for penalty
Naive guess 1: Max penalty (remove all donors) 9.50 0.82 -50.34
Naive guess 2: Random penalty 6.25 0.55 -35.70
Naive guess 3: Random penalty 3.92 0.36 -26.97
Naive guess 4: No penalty (include all donors) 0.00 0.00 -13.38
In [38]:
PlotShareTable()
A formattable: 17 × 3
Share for First PredictionShare for Most Recent PredictionCoefficient
<dbl><dbl><dbl>
Intercept0.280.2844.62
cigsale_170.140.17 0.21
cigsale_320.110.15 0.13
cigsale_540.100.11 0.15
retprice_540.080.02-0.11
cigsale_230.080.08 0.10
cigsale_470.080.07-0.10
cigsale_490.040.05 0.11
cigsale_200.040.04 0.06
retprice_460.020.01-0.02
retprice_90.010.00-0.02
cigsale_180.010.01-0.01
cigsale_310.010.01 0.01
retprice_500.000.00 0.00
retprice_400.000.00 0.00
retprice_290.000.00 0.00
retprice_510.000.00 0.00
In [40]:
drop_vars_from_same_FIPS <-
 "select(-ends_with(substring(names(x.PlaceboPool)[h],nchar(names(x.PlaceboPool)[h]) - 2 + 1, nchar(names(x.PlaceboPool)[h]))))"

## ---- results='hide'----------------------------------------------------------
SCUL.inference <- CreatePlaceboDistribution(
  DonorPoolRestrictionForEachPlacebo = drop_vars_from_same_FIPS
)
In [41]:
smoke_plot <- SmokePlot()
smoke_plot
In [42]:
# Plot null distribution with no restriction on pre-period fit
NullDist.Full <- PlotNullDistribution(
    CohensD = 999,
    StartTime = TreatmentBeginsAt,
    EndTime = nrow(SCUL.output$y.actual),
    height = 2,
    AdjustmentParm = 1,
    BandwidthParm = .25,
    title_label = "Placebo distribution compared\n to ATE estimate in\n pre-period standard deviations",
    y_label  = " ",
    x_label  =  "",
    subtitle_label  =  "No Cohen's D restriction",
    rejection_label  =  ""
) +
  geom_vline(
        xintercept = mean(Results.y.StandardizedDiff[TreatmentBeginsAt:nrow(Results.y.StandardizedDiff),]),
        linetype = "dashed",
        size = 1,
        color = "red")

# Plot null distribution 0.25 cohen's D restriction on pre-period fit
NullDist.25 <- PlotNullDistribution(
    CohensD = 0.25,
    StartTime = TreatmentBeginsAt,
    EndTime = nrow(SCUL.output$y.actual),
    height = 2,
    AdjustmentParm = 1,
    BandwidthParm = .25,
    y_label  = "",
    x_label  =  "Distribution of standardized difference\n for placebo donor pool",
    subtitle_label  =  "0.25 Cohen's D restriction",
    rejection_label  =  "",
    title_label = " ",

) +
  geom_vline(
        xintercept = mean(Results.y.StandardizedDiff[TreatmentBeginsAt:nrow(Results.y.StandardizedDiff),]),
        linetype = "dashed",
        size = 1,
        color = "red")

# Plot n
# Combine the three plots
combined_plot <- plot_grid(
    NullDist.Full,NullDist.25,
    ncol = 1)

# Display the plot
combined_plot
Warning message:
“Removed 1 rows containing non-finite values (stat_density).”
In [43]:
## -----------------------------------------------------------------------------
#########################################################################################################
# Calculate an average post-treatment p-value
PValue(
    CohensD = 999,
    StartTime = SCUL.input$TreatmentBeginsAt,
    EndTime = nrow(Results.y.StandardizedDiff)
)

PValue(
    CohensD = .25,
    StartTime = SCUL.input$TreatmentBeginsAt,
    EndTime = nrow(Results.y.StandardizedDiff)
)
A data.frame: 1 × 1
p.value.high
<chr>
0.13
A data.frame: 1 × 1
p.value.high
<chr>
0.07
In [44]:
data_for_traditional_scm <- pivot_longer(data = cigarette_sales,
                                  cols = starts_with(c("cigsale_", "retprice_")),
   names_to = c("variable", "fips"),
   names_sep = "_",
   names_prefix = "X",
   values_to = "value",
   values_drop_na = TRUE
 )

data_for_traditional_scm <- pivot_wider(data = data_for_traditional_scm,
                                  names_from = variable,
                                  values_from = value)


 data_for_traditional_scm$fips <- as.numeric(data_for_traditional_scm$fips)
In [45]:
data(synth.data)

# create matrices from panel data that provide inputs for synth()
data_for_traditional_scm$idno = as.numeric(as.factor(data_for_traditional_scm$fips))  # create numeric country id required for synth()

data_for_traditional_scm <- as.data.frame(data_for_traditional_scm)

dataprep.out<-
  dataprep(
    foo = data_for_traditional_scm,
    predictors = c("retprice"),
    predictors.op = "mean",
    dependent = "cigsale",
    unit.variable = "idno",
    time.variable = "year",
    special.predictors = list(
      list("cigsale", 1970, "mean"),
      list("cigsale", 1980, "mean"),
      list("cigsale", 1985, "mean")
    ),
    treatment.identifier = unique(data_for_traditional_scm$idno[data_for_traditional_scm$fips==6]),
    controls.identifier = unique(data_for_traditional_scm$idno[data_for_traditional_scm$fips!=6]),
    time.predictors.prior = c(1970:1987),
    time.optimize.ssr = c(1970:1987),
    time.plot = 1970:1997
  )

## run the synth command to identify the weights
## that create the best possible synthetic
## control unit for the treated.
synth.out <- synth(dataprep.out)
X1, X0, Z1, Z0 all come directly from dataprep object.


**************** 
 searching for synthetic control unit  
 

**************** 
**************** 
**************** 

MSPE (LOSS V): 5.025 

solution.v:
 0.2051 0.00329 0.5995 0.1921 

solution.w:
 0.001381 0.001215 0.002267 0.08392 0.001192 0.001283 0.00214 0.001792 0.00143 0.002045 0.001752 0.0007357 0.001434 0.001558 0.001604 0.001442 0.001441 0.002648 0.002142 0.3229 0.0001724 0.1925 0.001379 0.00103 0.001443 0.001352 0.001601 0.001276 0.001408 0.001729 0.001187 0.001526 0.3486 0.001027 0.001421 0.002404 0.002146 0.001464 

In [46]:
gaps<- dataprep.out$Y1plot-(
  dataprep.out$Y0plot%*%synth.out$solution.w
)

StandardizedDiff$traditional_scm <- abs(dataprep.out$Y1plot-(
  dataprep.out$Y0plot%*%synth.out$solution.w
))/PreTreatmentSD

# Show Cohen's D for each of these
CohensD <- data.frame(colMeans(StandardizedDiff[1:(TreatmentBeginsAt-1),]))


# Calculate treatment effect for each of these
TreatmentEffect <- data.frame(colMeans(StandardizedDiff[TreatmentBeginsAt:nrow(StandardizedDiff),])*PreTreatmentSD)
In [47]:
StandardizedDiff<-cbind(StandardizedDiff,SCUL.input$time)
colnames(StandardizedDiff)[7] <- "time"
# create smoke plot
difference_plot <- ggplot() +
  theme_classic() +
    geom_line(data = StandardizedDiff, aes(x = time, y = -scul.cv), alpha = 1, size = 2., color = "black") +
    geom_line(data = StandardizedDiff, aes(x = time, y = -scul.cv), alpha = 1, size = 1.75, color = "#4dac26") +
     geom_line(data = StandardizedDiff, aes(x = time, y = -traditional_scm), alpha = 1, size = 2., color = "black") +
    geom_line(data = StandardizedDiff, aes(x = time, y = -traditional_scm), alpha = 1, size = 1.75, color = "orange", linetype = "dashed") +
    geom_vline(
        xintercept = SCUL.input$time[TreatmentBeginsAt,1],
        linetype = "dashed",
        size = 1,
        color = "grey37"
    ) +
    labs(
        title = "Difference between actual cigarette sales and synthetic predictions\n from SCUL (green-solid)\n and a traditional synthetic control method (orange-dashed)",
        x = "Time",
        y = "Difference between actual data and predictions\n in pre-treatment standard deviations for each product"
    ) +
    theme(
        axis.text = element_text(size = 18),
        axis.title.y = element_text(size = 12),
        axis.title.x = element_text(size = 18),
        title = element_text(size = 12)
    )

# Display graph
difference_plot
In [48]:
table_for_hux_scm <- cbind(
  (CohensD),
  (TreatmentEffect)
)
table_for_hux_scm <- table_for_hux_scm[-(2:5),]

names(table_for_hux_scm) <- c("CohensD", "ATE")

table_for_hux_scm$name <- c("Cross-validation for determining penalty", " ")
table_for_hux_scm$value <- c(round(SCUL.output$CrossValidatedLambda, digits = 2), "NA")

table_for_hux_scm <- table_for_hux_scm[c(3, 4, 1,2)]


kable(table_for_hux_scm, col.names = c("Method","Penalty parameter", "Cohens D (pre-period fit)", "ATE estimate"), digits = 3, row.names = FALSE) %>%
  kable_styling(bootstrap_options = "striped", full_width = F) %>%
column_spec(1, width = "5cm") %>%
column_spec(2:4, width = "2cm")  %>%
  pack_rows("SCUL procedure using:", 1, 1) %>%
  pack_rows("Traditional SCM method", 2, 2) %>% as.character %>% display_html
Method Penalty parameter Cohens D (pre-period fit) ATE estimate
SCUL procedure using:
Cross-validation for determining penalty 0.02 0.015 13.52
Traditional SCM method
NA 0.155 14.93
In [ ]: