Doubly Robust Proximal Synthetic Control: Notes on Qiu et al (2022)

Author

Apoorva Lal

Published

January 17, 2023

Paper

Code sliced and subset from the paper’s replication archive - please do this so people understand your methods.

libreq(
  magrittr, tidyverse, gmm,
  # Synth,
  CVXSynth,
  splines, scales, ggplot2, ggpubr,
  augsynth
)
      wants       loaded
 [1,] "magrittr"  TRUE  
 [2,] "tidyverse" TRUE  
 [3,] "gmm"       TRUE  
 [4,] "CVXSynth"  TRUE  
 [5,] "splines"   TRUE  
 [6,] "scales"    TRUE  
 [7,] "ggplot2"   TRUE  
 [8,] "ggpubr"    TRUE  
 [9,] "augsynth"  TRUE  
data(kansas)
setDT(kansas)

kansas[, t := (year-1990)*4 + qtr]

############################################################
# residualise on time trend
d1 = kansas[, .(fips, state, t, lngdpcapita)]

model = lm(lngdpcapita ~ poly(t, 2),
  data = d1[state != "Kansas"])

d1[, lngdpcapita.resid  := lngdpcapita - predict(model, d1)]

Y = d1[state == "Kansas", lngdpcapita.resid]

T = nunique(d1$t)
t = 1:T
T0 = (2012 - 1990) * 4 + 1
treatment = as.numeric(t > T0)

X = dcast(
  d1[state != "Kansas", .(lngdpcapita.resid, state, t)],
  t ~ state, value.var = "lngdpcapita.resid") %>%
  .[,t := NULL] %>% as.matrix()

Synthetic Control

Simplex regression of \(Y_{it} \sim \mathbf{W}_{it}\) where coefficients are constrained to be nonnegative and sum to 1. This generates some regularisation.

synth_weights = sc_solve(matrix(Y[t <= T0], ncol = 1), X[t <= T0, ])
data.frame(colnames(X), synth_weights)
      colnames.X.    synth_weights
1         Alabama 0.00000000022958
2          Alaska 0.06523987755379
3         Arizona 0.00000000017686
4        Arkansas 0.00000000022375
5      California 0.00000000018324
6        Colorado 0.00000000020248
7     Connecticut 0.00000000017940
8        Delaware 0.00000000012015
9         Florida 0.00000000010253
10        Georgia 0.00000000010126
11         Hawaii 0.00000000139884
12          Idaho 0.00000000038179
13       Illinois 0.00000000037299
14        Indiana 0.00000000055559
15           Iowa 0.00000000018940
16       Kentucky 0.05321333170715
17      Louisiana 0.00000000002448
18          Maine 0.00000000018884
19       Maryland 0.00000000011601
20  Massachusetts 0.00000000017215
21       Michigan 0.00000000020559
22      Minnesota 0.00000000013564
23    Mississippi 0.00000019021232
24       Missouri 0.00000000057584
25        Montana 0.00000000140186
26       Nebraska 0.00000000066160
27         Nevada 0.00000000006070
28  New Hampshire 0.00000000014335
29     New Jersey 0.00000000057124
30     New Mexico 0.00000000022286
31       New York 0.00000000017532
32 North Carolina 0.00000000050946
33   North Dakota 0.12935644228572
34           Ohio 0.00000000027906
35       Oklahoma 0.00000000175837
36         Oregon 0.00000000028490
37   Pennsylvania 0.00000000098126
38   Rhode Island 0.00000000008648
39 South Carolina 0.30088860776782
40   South Dakota 0.00000000021106
41      Tennessee 0.00000000031173
42          Texas 0.14595838215984
43           Utah 0.00000000029744
44        Vermont 0.00000000029213
45       Virginia 0.00000000013695
46     Washington 0.22031879199804
47  West Virginia 0.08502436138293
48      Wisconsin 0.00000000023740
49        Wyoming 0.00000000043322
Abadie.SC = as.numeric(X %*% synth_weights)
(phi.hat = mean(Y[t > T0] - Abadie.SC[t > T0]))
[1] -0.02943

Select donors with >0.1 weights

donors = c("North Dakota", "South Carolina", "Texas", "Washington")
# outcome series for donors
W = X[, donors]
# outcome series for everyone else
Z = X[, setdiff(colnames(X), c("Kansas", donors))]

data = list(t = t, W = W,
    Z = Z, Y = Y, T = T,
    T0 = T0, treatment = treatment)

OLS

Regress pre-treatment outcomes for Kansas on all other units.

OLS = function(data, vcovfun = c("vcovHAC", "NeweyWest")) {
  model = with(data, lm(Y ~ W + treatment))
  phi.hat = coef(model)["treatment"]
  SC = as.numeric(cbind(1, data$W) %*% coef(model)[1:(ncol(data$W) + 1)])
  list(phi.hat = phi.hat, SC = SC)
}

OLS.result = OLS(data)

Outcome Bridge

PI.h = function(data, vcovfun = c("vcovHAC", "NeweyWest")) {
  vcovfun = match.arg(vcovfun)
  T = data$T
  T0 = data$T0
  t = data$t
  nW = ncol(data$W)
  evalh = \(theta, data) as.numeric(cbind(1, data$W) %*% theta[1:(nW + 1)])
  gh = cbind(1, data$Z)
  ngh = ncol(gh)
  ###################################################################
  # moment function and gradient
  ###################################################################
  g = \(theta, data) {
    h = evalh(theta, data)
    g1 = T / T0 * (t <= T0) * (data$Y - h) * gh
    g2 = (t > T0) * (theta[nW + 2] - (data$Y - h))
    cbind(g1, g2)
  }
  gradv = \(theta, data) {
    h = evalh(theta, data)
    out = matrix(0, ngh + 1, nW + 2)
    # g1
    out[1:ngh, 1:(nW + 1)] = -t(gh) %*% (cbind(1, data$W) * (t <= T0)) / T0
    # g2
    out[ngh + 1, nW + 2] = (T - T0) / T
    out[ngh + 1, 1:(nW + 1)] = colMeans((t > T0) * cbind(1, data$W))
    out
  }
  ###################################################################
  # warm start with lm
  init.model.h = lm(data$Y ~ ., data = as.data.frame(data$W), subset = t <= T0)
  init.alpha = coef(init.model.h)
  init.phi = mean(data$Y[t > T0] -
                  predict(init.model.h,
                          newdata = as.data.frame(data$W[t > T0, , drop = FALSE])))
  theta0 = c(init.alpha, init.phi)
  names(theta0) = c(paste0("alpha", 0:nW), "phi")
  ###################################################################
  # gmm
  model = gmm(
    g = g,
    gradv = gradv,
    x = data,
    t0 = theta0,
    wmatrix = "ident",
    control = list(maxit = 1e5),
    method = "BFGS", vcov = "iid")
  ###################################################################
  cat("Parameter estimate:\n")
  print(coef(model))
  phi.hat = coef(model)["phi"]
  # vcov HAC
  var = if (vcovfun == "vcovHAC") {
    sandwich::vcovHAC(model)
  } else {
    sandwich::NeweyWest(model, prewhite = FALSE)
  }
  rownames(var) = colnames(var) = names(theta0)
  SE = sqrt(var["phi", "phi"])
  CI = phi.hat + qnorm(c(.025, .975)) * SE
  SC = evalh(coef(model), data)
  list(phi.hat = phi.hat, SE = SE, CI = CI,
    convergence = model$algoInfo$convergence, SC = SC)
}

h.result = PI.h(data)
Parameter estimate:
  alpha0   alpha1   alpha2   alpha3   alpha4      phi 
 0.16133  0.46984  1.00704 -0.04408  0.16969 -0.10587 

Both Outcome Bridge and Treatment Bridge

Involves solving the following moment condition problem [probably won’t make sense without reading the paper].

DR = function(data,
    Z.states,
    vcovfun = c("vcovHAC", "NeweyWest")) {
  vcovfun = match.arg(vcovfun)
  T = data$T
  T0 = data$T0
  t = data$t
  nW = ncol(data$W)
  nZ = ncol(data$Z[, Z.states, drop = FALSE])
  # confounding bridge functions
  evalh = \(theta, data) as.numeric(cbind(1, data$W) %*% theta[1:(nW + 1)])
  evalq = \(theta, data) {
    as.numeric(
        exp(
            cbind(1, data$Z[, Z.states, drop = FALSE]) %*% theta[(nW + 2):(nW + nZ + 2)]
          )
      )
  }
  gh = cbind(1, data$Z)
  ngh = ncol(gh)
  gq = cbind(1, data$W)
  ngq = ncol(gq)
  ######################################################################
  # moment function
  g = function(theta, data) {
    h = evalh(theta, data)
    q = evalq(theta, data)
    ϕ = theta[nW + nZ + ngq + 3]
    # centering parameters
    ψ = matrix(theta[(nW + nZ + 3):(nW + nZ + ngq + 2)], nrow = T, ncol = ngq, byrow = TRUE)
    ψm = theta[nW + nZ + ngq + 4]
    # h regresses y on W
    g1 = (t <= T0) * (data$Y - h) * gh
    g2 = (t > T0)  *- gq)
    g3 = (t <= T0) * (q * gq - ψ)
    g4 = (t > T0)  *- (data$Y - h) + ψm)
    g5 = (t <= T0) * (ψm - q * (data$Y - h))
    cbind(g1, g2, g3, g4, g5)
  }
  # gradient - try autodiff?
  gradv = function(theta, data) {
    h = evalh(theta, data)
    q = evalq(theta, data)
    out = matrix(0, ngh + 2 * ngq + 2, nW + nZ + ngq + 4)
    # g1
    out[1:ngh, 1:(nW + 1)] = -t(gh) %*% (cbind(1, data$W) * (t <= T0)) / T
    # g2
    out[(ngh + 1):(ngh + ngq), (nW + nZ + 3):(nW + nZ + ngq + 2)] = diag((T - T0) / T, ngq)
    # g3
    out[(ngh + ngq + 1):(ngh + 2 * ngq), (nW + nZ + 3):(nW + nZ + ngq + 2)] = diag(-T0 / T, ngq)
    M1 = t(gq)
    M2 = q * (t <= T0) * cbind(1, data$Z[, Z.states, drop = FALSE]) / T
    out[(ngh + ngq + 1):(ngh + 2 * ngq), (nW + 2):(nW + nZ + 2)] = M1 %*% M2
    # g4
    out[ngh + 2 * ngq + 1, nW + nZ + ngq + 3] = out[ngh + 2 * ngq + 1, nW + nZ + ngq + 4] = (T - T0) / T
    out[ngh + 2 * ngq + 1, 1:(nW + 1)] = colMeans(cbind(1, data$W) * (t > T0))
    # g5
    out[ngh + 2 * ngq + 2, nW + nZ + ngq + 4] = T0 / T
    out[ngh + 2 * ngq + 2, 1:(nW + 1)] = colMeans(q * cbind(1, data$W) * (t <= T0))
    out[ngh + 2 * ngq + 2, (nW + 2):(nW + nZ + 2)] = colMeans(-q * (data$Y - h) * cbind(1, data$Z[, Z.states, drop = FALSE]) * (t <= T0))
    out
  }
  ######################################################################
  # warm start
  init.model.h = lm(data$Y ~ ., data = as.data.frame(data$W), subset = t <= T0)
  init.alpha = coef(init.model.h)
  init.model.q = glm(I(t > T0) ~ .,
    data = as.data.frame(data$Z[, Z.states, drop = FALSE]), family = binomial())
  init.beta = coef(init.model.q)
  init.beta[1] = init.beta[1] + log((T - T0) / T0)
  init.psi = colMeans(gq[t > T0, , drop = FALSE])
  init.psi.minus = mean(exp(cbind(1, data$Z[t <= T0, Z.states, drop = FALSE]) %*% init.beta) *
    (data$Y[t <= T0] - predict(init.model.h, newdata = as.data.frame(data$W[t <= T0, , drop = FALSE]))))
  init.phi = mean(data$Y[t > T0] -
    predict(init.model.h, newdata = as.data.frame(data$W[t > T0, , drop = FALSE]))) - init.psi.minus
  theta0 = c(init.alpha, init.beta, init.psi, init.phi, init.psi.minus)
  names(theta0) = c(paste0("alpha", 0:nW),
                    paste0("beta", 0:nZ),
                    paste0("psi", 1:ngq),
                    "phi", "psi-")
  ######################################################################
  # gmm
  model = gmm(
      g = g,
      x = data,
      t0 = theta0,
      gradv = gradv,
      wmatrix = "ident",
      control = list(maxit = 1e5), method = "BFGS", vcov = "iid")
  cat("Parameter estimate:\n")
  print(coef(model))
  cat("Summary of estimated q(Zt):\n")
  print(summary(evalq(coef(model), data)[t <= T0]))
  phi.hat = coef(model)["phi"]
  # vcov HAC
  var = if (vcovfun == "vcovHAC") {
    sandwich::vcovHAC(model)
  } else {
    sandwich::NeweyWest(model, prewhite = FALSE)
  }
  rownames(var) = colnames(var) = names(theta0)
  SE = sqrt(var["phi", "phi"])
  CI = phi.hat + qnorm(c(.025, .975)) * SE
  SC = evalh(coef(model), data)
  list(phi.hat = phi.hat, SE = SE, CI = CI, convergence = model$algoInfo$convergence, SC = SC)
}

Three candidates for \(Z\) - sets that are outside the donor set from synth and can therefore be used to calibrate \(q\).

DR.Iowa.result = DR(data, "Iowa")
Parameter estimate:
   alpha0    alpha1    alpha2    alpha3    alpha4     beta0     beta1      psi1 
 0.076781  0.324989  0.675834  0.075280  0.233647 -1.387058 65.353677  1.014234 
     psi2      psi3      psi4      psi5       phi      psi- 
 0.043414 -0.198026  0.124127  0.199551 -0.077099 -0.002567 
Summary of estimated q(Zt):
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.001   0.008   0.029   1.015   0.180  20.861 
DR.Iowa.SouthDakota.result = DR(data, c("South Dakota", "Iowa"))
Parameter estimate:
   alpha0    alpha1    alpha2    alpha3    alpha4     beta0     beta1     beta2 
  0.16010   0.48477   1.02081  -0.12734   0.23723  -3.04139 103.73474 -45.31258 
     psi1      psi2      psi3      psi4      psi5       phi      psi- 
  1.03547   0.23378  -0.27774   0.09873   0.13941  -0.09216  -0.01520 
Summary of estimated q(Zt):
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    0.00    0.00    1.04    0.01   36.45 
DR.Iowa.SouthDakota.Oklahoma.result =
  DR(data, c("South Dakota", "Iowa", "Oklahoma"))
Parameter estimate:
   alpha0    alpha1    alpha2    alpha3    alpha4     beta0     beta1     beta2 
  0.15451   0.48807   1.05165  -0.06518   0.30431  -2.76211  82.68537 -22.74820 
    beta3      psi1      psi2      psi3      psi4      psi5       phi      psi- 
 -6.10781   1.03719   0.21539  -0.27161   0.10050   0.14454  -0.09646  -0.01482 
Summary of estimated q(Zt):
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   0.000   0.001   1.038   0.063  29.640 

summary figure

plot.d = data.frame(t = data$t) %>%
  mutate(
    `Abadies SC` = Abadie.SC,
    DR = DR.Iowa.result$SC,
    DR2 = DR.Iowa.SouthDakota.result$SC,
    DR3 = DR.Iowa.SouthDakota.Oklahoma.result$SC,
    `outcome bridge` = h.result$SC,
    OLS = OLS.result$SC
  ) %>%
  pivot_longer(!t, names_to = "method", values_to = "outcome") %>%
  arrange(method, t) %>%
  mutate(Y = rep(data$Y, times = 6))
colors = hue_pal()(2)
ggplot(plot.d, aes(x = 1990 + t * .25, y = Y)) +
  geom_line(color = colors[1], linewidth = 1, alpha = .5) +
  geom_line(aes(x = 1990 + t * .25, y = outcome), color = colors[2], linetype = "longdash", linewidth = 1) +
  geom_vline(xintercept = 1990 + (data$T0) * .25, linetype = "dotted") +
  facet_wrap(~method, labeller = "label_both") +
  ylab("Detrended log GDP per capita") +
  xlab("Year") +
  scale_x_continuous(breaks = seq(1990, 2010, 10)) +
  theme_bw()