library(LalRUtils)
libreq(data.table,
  boot, car, # classic boot
  sandwich,
  bayesboot, MCMCpack, # bayes boot
  RRI,  # residual randomisation
  ggplot2)
##      wants        loaded
## [1,] "data.table" TRUE  
## [2,] "boot"       TRUE  
## [3,] "car"        TRUE  
## [4,] "sandwich"   TRUE  
## [5,] "bayesboot"  TRUE  
## [6,] "MCMCpack"   TRUE  
## [7,] "RRI"        TRUE  
## [8,] "ggplot2"    TRUE
theme_set(lal_plot_theme())

Snippets implementing a variety of resampling inference methods.

Simulate some data

# %% simulate data
n = 1000; p = 2; k = 4
X = cbind(1, matrix(rnorm(n * p), nrow = n))
ind = c(rep(0, 0.8*n), rep(1, .2*n)) # cluster indicator
β = c(1, 0.2, -.4)
y = X  %*% β + rnorm(n, sd= (1-ind) * 0.1 + ind * 5) # fn of group indicator
df = cbind(y, X, as.factor(ind)) |> as.data.frame()
colnames(df) = c("y", "X1", "X2", "X3", "id")

Estimation

OLS

# %% normal approx
ols = lm(y ~ X + 0)
(olsest = cbind(ols$coefficients, confint(ols)))
##               2.5 %  97.5 %
## X1  0.9759  0.83192  1.1199
## X2  0.2064  0.06255  0.3503
## X3 -0.4252 -0.56457 -0.2859

Residual Bootstrap

# %% residual bootstrap
yhat = fitted(ols)
e = residuals(ols)
boot.fn <- function(data, ind){
  yboot = yhat + e[ind]
  coef(lm(yboot ~ X + 0))
}
out = boot(df, boot.fn, R = 999)
(b1 = cbind(out$t0, confint(out)))
##               2.5 %  97.5 %
## X1  0.9759  0.83976  1.1144
## X2  0.2064  0.06802  0.3545
## X3 -0.4252 -0.55949 -0.2875

Resampling bootstrap - the version everyone uses

# %% bootstrap - resample rows
boot.fn <- function(data, ind){
  d = data[ind, ]
  coef(lm(y ~ 0 + X1 + X2 + X3, d))
}
out = boot(df, boot.fn, R = 999)
(b2 = cbind(out$t0, confint(out)))
##              2.5 %  97.5 %
## X1  0.9759  0.8301  1.1182
## X2  0.2064  0.0768  0.3303
## X3 -0.4252 -0.5509 -0.3050

Cluster bootstrap

Sandwich

lmf <- lm(y ~ 0 + X1 + X2 + X3, df)
SEs = vcovBS(lmf, cluster = ~id) |> diag() |> sqrt()
(clusterbs = cbind(lmf$coefficients,
  lmf$coefficients - 1.96 * SEs, lmf$coefficients + 1.96 * SEs))
##       [,1]    [,2]     [,3]
## X1  1.0180  0.9467  1.08933
## X2  0.2112  0.1490  0.27346
## X3 -0.3344 -0.6053 -0.06361

Bayesian bootstrap

Rubin 1981

manual

# %% bayesian bootstrap
rudirichlet <- function(n, d) {
  rexp.mat <- matrix( rexp(d * n, 1) , nrow = n, ncol = d)
  rexp.mat / rowSums(rexp.mat)
}
modfit = function(w) lm(y ~ 0 + X, weights = w)$coefficients
bb_manual <- function(reps) {
  n = length(y)
  wtmat = rudirichlet(reps, n) # matrix where each row is a n-vector of weights
  apply(wtmat, 1, modfit)
}

(bb_m = apply(bb_manual(999), 1, function(x) c(mean(x), quantile(x, c(0.025, .975)) ) ) |>
  t())
##                2.5%   97.5%
## X1  1.0187  0.87892  1.1635
## X2  0.2094  0.07342  0.3602
## X3 -0.3314 -0.47596 -0.1741

Bayesboot package

# %% bayesboot package
boot_fn = function(df, w) lm(y ~ X + 0, df, weights = w)$coefficients
bb_lm <- bayesboot(df, boot_fn, use.weights = TRUE, R = 999)
(bb_pkg = apply(bb_lm, 2, function(x) c(mean(x), quantile(x, c(0.025, .975)) ) ) |>
  t())
##                2.5%   97.5%
## X1  1.0115  0.86687  1.1519
## X2  0.2116  0.07454  0.3713
## X3 -0.3351 -0.46990 -0.1842

Residual Randomization: RRI

See Toulis’ “Life after Bootstrap”

# %%
g_invar = function(e) sample(e)
rri1 = rrinf(y, X, g_invar, num_R = 999)
# %% RRI 1 - exchangeability
rri2 = rrinf_clust(y, X, type="perm", num_R = 999)
# %% sign
rri3 = rrinf_clust(y, X, type="sign", num_R=999)
# %%
ind = as.factor(ind)
cl = sapply(levels(ind), function(t) which(ind==t))
rri4 = rrinf_clust(y, X, type = "perm", cl, num_R=999)
# %%
prep_df = function(df, infm){
  rownames(df) = c(" Intercept", "X1", "X2")
  colnames(df) = c("Pt", "ConfInt_25", "ConfInt_975")
  df = df |> as.data.table(keep.rownames = T)
  df[, inf_method :=  infm]
  return(df)
}
d9 = prep_df(olsest,    "1. Normal OLS")
d8 = prep_df(b1,        "2. BS: Residuals")
d7 = prep_df(b2,        "3. BS: Obs")
d10= prep_df(clusterbs, "4. BS: Cluster")
d6 = prep_df(bb_m,      "5. Bayes BS: Manual")
d5 = prep_df(bb_pkg,    "6. Bayes BS: package")
d4 = prep_df(rri1,      "7. RRI: basic")
d1 = prep_df(rri2,      "8. RRI: perm")
d2 = prep_df(rri3,      "9. RRI: sign")
d3 = prep_df(rri4,      "10. RRI: cluster")
df_all = rbind(d1, d2, d3, d10, d4, d5, d6, d7, d8, d9)

Final comparison

ggplot(df_all, aes(x = rn, y = Pt, colour = inf_method)) +
  geom_point(position = position_dodge(width = .5)) +
  geom_pointrange(aes(ymin = ConfInt_25, ymax = ConfInt_975),
             position = position_dodge(width = .5)) +
  geom_hline(aes(yintercept = 0), linetype = "dotted")