library(LalRUtils)
libreq(data.table,
boot, car, # classic boot
sandwich,
bayesboot, MCMCpack, # bayes boot
RRI, # residual randomisation
ggplot2)## wants loaded
## [1,] "data.table" TRUE
## [2,] "boot" TRUE
## [3,] "car" TRUE
## [4,] "sandwich" TRUE
## [5,] "bayesboot" TRUE
## [6,] "MCMCpack" TRUE
## [7,] "RRI" TRUE
## [8,] "ggplot2" TRUE
theme_set(lal_plot_theme())Snippets implementing a variety of resampling inference methods.
# %% simulate data
n = 1000; p = 2; k = 4
X = cbind(1, matrix(rnorm(n * p), nrow = n))
ind = c(rep(0, 0.8*n), rep(1, .2*n)) # cluster indicator
β = c(1, 0.2, -.4)
y = X %*% β + rnorm(n, sd= (1-ind) * 0.1 + ind * 5) # fn of group indicator
df = cbind(y, X, as.factor(ind)) |> as.data.frame()
colnames(df) = c("y", "X1", "X2", "X3", "id")# %% normal approx
ols = lm(y ~ X + 0)
(olsest = cbind(ols$coefficients, confint(ols)))## 2.5 % 97.5 %
## X1 0.9759 0.83192 1.1199
## X2 0.2064 0.06255 0.3503
## X3 -0.4252 -0.56457 -0.2859
# %% residual bootstrap
yhat = fitted(ols)
e = residuals(ols)
boot.fn <- function(data, ind){
yboot = yhat + e[ind]
coef(lm(yboot ~ X + 0))
}
out = boot(df, boot.fn, R = 999)
(b1 = cbind(out$t0, confint(out)))## 2.5 % 97.5 %
## X1 0.9759 0.83976 1.1144
## X2 0.2064 0.06802 0.3545
## X3 -0.4252 -0.55949 -0.2875
# %% bootstrap - resample rows
boot.fn <- function(data, ind){
d = data[ind, ]
coef(lm(y ~ 0 + X1 + X2 + X3, d))
}
out = boot(df, boot.fn, R = 999)
(b2 = cbind(out$t0, confint(out)))## 2.5 % 97.5 %
## X1 0.9759 0.8301 1.1182
## X2 0.2064 0.0768 0.3303
## X3 -0.4252 -0.5509 -0.3050
lmf <- lm(y ~ 0 + X1 + X2 + X3, df)
SEs = vcovBS(lmf, cluster = ~id) |> diag() |> sqrt()
(clusterbs = cbind(lmf$coefficients,
lmf$coefficients - 1.96 * SEs, lmf$coefficients + 1.96 * SEs))## [,1] [,2] [,3]
## X1 1.0180 0.9467 1.08933
## X2 0.2112 0.1490 0.27346
## X3 -0.3344 -0.6053 -0.06361
# %% bayesian bootstrap
rudirichlet <- function(n, d) {
rexp.mat <- matrix( rexp(d * n, 1) , nrow = n, ncol = d)
rexp.mat / rowSums(rexp.mat)
}
modfit = function(w) lm(y ~ 0 + X, weights = w)$coefficients
bb_manual <- function(reps) {
n = length(y)
wtmat = rudirichlet(reps, n) # matrix where each row is a n-vector of weights
apply(wtmat, 1, modfit)
}
(bb_m = apply(bb_manual(999), 1, function(x) c(mean(x), quantile(x, c(0.025, .975)) ) ) |>
t())## 2.5% 97.5%
## X1 1.0187 0.87892 1.1635
## X2 0.2094 0.07342 0.3602
## X3 -0.3314 -0.47596 -0.1741
# %% bayesboot package
boot_fn = function(df, w) lm(y ~ X + 0, df, weights = w)$coefficients
bb_lm <- bayesboot(df, boot_fn, use.weights = TRUE, R = 999)
(bb_pkg = apply(bb_lm, 2, function(x) c(mean(x), quantile(x, c(0.025, .975)) ) ) |>
t())## 2.5% 97.5%
## X1 1.0115 0.86687 1.1519
## X2 0.2116 0.07454 0.3713
## X3 -0.3351 -0.46990 -0.1842
See Toulis’ “Life after Bootstrap”
# %%
g_invar = function(e) sample(e)
rri1 = rrinf(y, X, g_invar, num_R = 999)
# %% RRI 1 - exchangeability
rri2 = rrinf_clust(y, X, type="perm", num_R = 999)
# %% sign
rri3 = rrinf_clust(y, X, type="sign", num_R=999)
# %%
ind = as.factor(ind)
cl = sapply(levels(ind), function(t) which(ind==t))
rri4 = rrinf_clust(y, X, type = "perm", cl, num_R=999)# %%
prep_df = function(df, infm){
rownames(df) = c(" Intercept", "X1", "X2")
colnames(df) = c("Pt", "ConfInt_25", "ConfInt_975")
df = df |> as.data.table(keep.rownames = T)
df[, inf_method := infm]
return(df)
}
d9 = prep_df(olsest, "1. Normal OLS")
d8 = prep_df(b1, "2. BS: Residuals")
d7 = prep_df(b2, "3. BS: Obs")
d10= prep_df(clusterbs, "4. BS: Cluster")
d6 = prep_df(bb_m, "5. Bayes BS: Manual")
d5 = prep_df(bb_pkg, "6. Bayes BS: package")
d4 = prep_df(rri1, "7. RRI: basic")
d1 = prep_df(rri2, "8. RRI: perm")
d2 = prep_df(rri3, "9. RRI: sign")
d3 = prep_df(rri4, "10. RRI: cluster")
df_all = rbind(d1, d2, d3, d10, d4, d5, d6, d7, d8, d9)ggplot(df_all, aes(x = rn, y = Pt, colour = inf_method)) +
geom_point(position = position_dodge(width = .5)) +
geom_pointrange(aes(ymin = ConfInt_25, ymax = ConfInt_975),
position = position_dodge(width = .5)) +
geom_hline(aes(yintercept = 0), linetype = "dotted")