library(LalRUtils)
libreq(data.table,
# classic boot
boot, car,
sandwich,# bayes boot
bayesboot, MCMCpack, # residual randomisation
RRI, ggplot2)
## wants loaded
## [1,] "data.table" TRUE
## [2,] "boot" TRUE
## [3,] "car" TRUE
## [4,] "sandwich" TRUE
## [5,] "bayesboot" TRUE
## [6,] "MCMCpack" TRUE
## [7,] "RRI" TRUE
## [8,] "ggplot2" TRUE
theme_set(lal_plot_theme())
Snippets implementing a variety of resampling inference methods.
# %% simulate data
= 1000; p = 2; k = 4
n = cbind(1, matrix(rnorm(n * p), nrow = n))
X = c(rep(0, 0.8*n), rep(1, .2*n)) # cluster indicator
ind = c(1, 0.2, -.4)
β = X %*% β + rnorm(n, sd= (1-ind) * 0.1 + ind * 5) # fn of group indicator
y = cbind(y, X, as.factor(ind)) |> as.data.frame()
df colnames(df) = c("y", "X1", "X2", "X3", "id")
# %% normal approx
= lm(y ~ X + 0)
ols olsest = cbind(ols$coefficients, confint(ols))) (
## 2.5 % 97.5 %
## X1 0.9759 0.83192 1.1199
## X2 0.2064 0.06255 0.3503
## X3 -0.4252 -0.56457 -0.2859
# %% residual bootstrap
= fitted(ols)
yhat = residuals(ols)
e <- function(data, ind){
boot.fn = yhat + e[ind]
yboot coef(lm(yboot ~ X + 0))
}= boot(df, boot.fn, R = 999)
out b1 = cbind(out$t0, confint(out))) (
## 2.5 % 97.5 %
## X1 0.9759 0.83976 1.1144
## X2 0.2064 0.06802 0.3545
## X3 -0.4252 -0.55949 -0.2875
# %% bootstrap - resample rows
<- function(data, ind){
boot.fn = data[ind, ]
d coef(lm(y ~ 0 + X1 + X2 + X3, d))
}= boot(df, boot.fn, R = 999)
out b2 = cbind(out$t0, confint(out))) (
## 2.5 % 97.5 %
## X1 0.9759 0.8301 1.1182
## X2 0.2064 0.0768 0.3303
## X3 -0.4252 -0.5509 -0.3050
<- lm(y ~ 0 + X1 + X2 + X3, df)
lmf = vcovBS(lmf, cluster = ~id) |> diag() |> sqrt()
SEs clusterbs = cbind(lmf$coefficients,
($coefficients - 1.96 * SEs, lmf$coefficients + 1.96 * SEs)) lmf
## [,1] [,2] [,3]
## X1 1.0180 0.9467 1.08933
## X2 0.2112 0.1490 0.27346
## X3 -0.3344 -0.6053 -0.06361
# %% bayesian bootstrap
<- function(n, d) {
rudirichlet <- matrix( rexp(d * n, 1) , nrow = n, ncol = d)
rexp.mat / rowSums(rexp.mat)
rexp.mat
}= function(w) lm(y ~ 0 + X, weights = w)$coefficients
modfit <- function(reps) {
bb_manual = length(y)
n = rudirichlet(reps, n) # matrix where each row is a n-vector of weights
wtmat apply(wtmat, 1, modfit)
}
bb_m = apply(bb_manual(999), 1, function(x) c(mean(x), quantile(x, c(0.025, .975)) ) ) |>
(t())
## 2.5% 97.5%
## X1 1.0187 0.87892 1.1635
## X2 0.2094 0.07342 0.3602
## X3 -0.3314 -0.47596 -0.1741
# %% bayesboot package
= function(df, w) lm(y ~ X + 0, df, weights = w)$coefficients
boot_fn <- bayesboot(df, boot_fn, use.weights = TRUE, R = 999)
bb_lm bb_pkg = apply(bb_lm, 2, function(x) c(mean(x), quantile(x, c(0.025, .975)) ) ) |>
(t())
## 2.5% 97.5%
## X1 1.0115 0.86687 1.1519
## X2 0.2116 0.07454 0.3713
## X3 -0.3351 -0.46990 -0.1842
See Toulis’ “Life after Bootstrap”
# %%
= function(e) sample(e)
g_invar = rrinf(y, X, g_invar, num_R = 999)
rri1 # %% RRI 1 - exchangeability
= rrinf_clust(y, X, type="perm", num_R = 999)
rri2 # %% sign
= rrinf_clust(y, X, type="sign", num_R=999)
rri3 # %%
= as.factor(ind)
ind = sapply(levels(ind), function(t) which(ind==t))
cl = rrinf_clust(y, X, type = "perm", cl, num_R=999) rri4
# %%
= function(df, infm){
prep_df rownames(df) = c(" Intercept", "X1", "X2")
colnames(df) = c("Pt", "ConfInt_25", "ConfInt_975")
= df |> as.data.table(keep.rownames = T)
df := infm]
df[, inf_method return(df)
}= prep_df(olsest, "1. Normal OLS")
d9 = prep_df(b1, "2. BS: Residuals")
d8 = prep_df(b2, "3. BS: Obs")
d7 = prep_df(clusterbs, "4. BS: Cluster")
d10= prep_df(bb_m, "5. Bayes BS: Manual")
d6 = prep_df(bb_pkg, "6. Bayes BS: package")
d5 = prep_df(rri1, "7. RRI: basic")
d4 = prep_df(rri2, "8. RRI: perm")
d1 = prep_df(rri3, "9. RRI: sign")
d2 = prep_df(rri4, "10. RRI: cluster")
d3 = rbind(d1, d2, d3, d10, d4, d5, d6, d7, d8, d9) df_all
ggplot(df_all, aes(x = rn, y = Pt, colour = inf_method)) +
geom_point(position = position_dodge(width = .5)) +
geom_pointrange(aes(ymin = ConfInt_25, ymax = ConfInt_975),
position = position_dodge(width = .5)) +
geom_hline(aes(yintercept = 0), linetype = "dotted")