# Model tidiers
# Wrap long strings at word boundaries (used to avoid ultra-wide gt titles)
wrap_50 <- function(x, width = 50) {
if (is.null(x) || is.na(x) || !nzchar(x)) return(x)
paste(strwrap(x, width = width), collapse = "\n")
}
# Stats-table styling (grey) to visually distinguish inferential outputs
style_stats_gt <- function(gt_tbl) {
gt_tbl %>%
tab_options(
heading.background.color = "#F2F2F2",
column_labels.background.color = "#D9D9D9",
row_group.background.color = "#F7F7F7",
table.font.size = gt::px(13),
data_row.padding = gt::px(4)
)
}
render_binom_model <- function(model, title, note_text = NULL) {
tidy_res <- if (inherits(model, "glmerMod")) {
broom.mixed::tidy(model, effects = "fixed", conf.int = TRUE, conf.level = 0.95, exponentiate = TRUE)
} else {
broom::tidy(model, conf.int = TRUE, conf.level = 0.95, exponentiate = TRUE)
}
tidy_res <- tidy_res %>%
mutate(term = dplyr::recode(term, `(Intercept)` = "Intercept")) %>%
select(term, estimate, conf.low, conf.high, std.error, p.value)
tidy_res <- tidy_res %>%
mutate(across(c(estimate, conf.low, conf.high, std.error), ~ scales::number(.x, accuracy = 0.001)),
p.value = scales::pvalue(p.value, accuracy = 0.001))
gt_tbl <- tidy_res %>%
gt() %>%
tab_header(title = wrap_50(title)) %>%
cols_label(term = "Term", estimate = "OR", conf.low = "CI low", conf.high = "CI high", std.error = "SE", p.value = "p-value") %>%
opt_stylize(3) %>%
style_stats_gt()
gt_tbl %>% gt::as_raw_html() %>% IRdisplay::display_html()
if (!is.null(note_text)) {
IRdisplay::display_markdown(paste0("**Table note:** ", note_text))
}
}
# GLM contrasts (e.g., within-platform test vs control)
# Computes an OR/CI/p for the log-odds difference implied by two single-row newdata frames.
tidy_glm_contrast_or <- function(model, newdata_control, newdata_test, label = "Treatment vs control") {
if (is.null(model) || !inherits(model, "glm")) stop("tidy_glm_contrast_or: model must be a glm")
if (is.null(newdata_control) || is.null(newdata_test)) stop("tidy_glm_contrast_or: newdata must be provided")
# Model matrix rows (use predictors-only terms; newdata won't include the response column)
tt <- stats::delete.response(stats::terms(model))
Xc <- stats::model.matrix(tt, newdata_control)
Xt <- stats::model.matrix(tt, newdata_test)
if (nrow(Xc) != 1 || nrow(Xt) != 1) {
stop("tidy_glm_contrast_or: newdata_control/test must each be 1 row")
}
L <- drop(Xt - Xc)
beta <- stats::coef(model)
V <- stats::vcov(model)
est <- sum(L * beta)
se <- sqrt(drop(t(L) %*% V %*% L))
if (is.na(se) || se <= 0) {
return(tibble::tibble(term = label, estimate = NA_real_, conf.low = NA_real_, conf.high = NA_real_, std.error = NA_real_, p.value = NA_real_))
}
z <- est / se
p <- 2 * stats::pnorm(abs(z), lower.tail = FALSE)
or <- exp(est)
ci <- exp(est + c(-1, 1) * stats::qnorm(0.975) * se)
tibble::tibble(
term = label,
estimate = or,
conf.low = ci[[1]],
conf.high = ci[[2]],
std.error = se,
p.value = p
)
}
render_or_contrast_table <- function(df, title, note_text = NULL) {
if (is.null(df) || nrow(df) == 0) return(invisible(NULL))
out <- df %>%
mutate(across(c(estimate, conf.low, conf.high, std.error), ~ scales::number(.x, accuracy = 0.001)),
p.value = scales::pvalue(p.value, accuracy = 0.001))
gt_tbl <- out %>%
gt() %>%
tab_header(title = wrap_50(title)) %>%
cols_label(term = "Contrast", estimate = "OR", conf.low = "CI low", conf.high = "CI high", std.error = "SE", p.value = "p-value") %>%
opt_stylize(3) %>%
style_stats_gt()
gt_tbl %>% gt::as_raw_html() %>% IRdisplay::display_html()
if (!is.null(note_text)) {
IRdisplay::display_markdown(paste0("**Table note:** ", note_text))
}
invisible(df)
}
# brms helpers
pick_brms_backend <- function() {
# Prefer cmdstanr when available (avoids many rstan toolchain issues)
if (requireNamespace("cmdstanr", quietly = TRUE)) {
ok <- tryCatch({
v <- cmdstanr::cmdstan_version()
!is.null(v)
}, error = function(e) FALSE)
if (isTRUE(ok)) return("cmdstanr")
}
"rstan"
}
safe_brm <- function(formula, data, prior, seed = 5, chains = 4, cores = 4, refresh = 0) {
# Preflight: if brms or rstan can't be loaded, do NOT attempt compilation/sampling.
can_load_ns <- function(pkg) {
requireNamespace(pkg, quietly = TRUE) &&
isTRUE(tryCatch({
loadNamespace(pkg)
TRUE
}, error = function(e) FALSE))
}
if (!can_load_ns("brms")) {
message("brms fit skipped: brms namespace could not be loaded in this kernel/environment")
return(NULL)
}
backend <- pick_brms_backend()
# Only require the backend we actually intend to use.
if (backend == "rstan") {
if (!can_load_ns("rstan")) {
message(
"brms fit skipped (backend=rstan): rstan could not be loaded (common cause: binary/toolchain mismatch like TBB linkage).\n",
"Continuing with glm + relax outputs.\n",
"Recommended fix (outside this notebook): use a clean env where rstan loads, or rebuild/reinstall rstan+StanHeaders+tbb from a consistent toolchain."
)
return(NULL)
}
} else if (backend == "cmdstanr") {
if (!can_load_ns("cmdstanr")) {
message("brms fit skipped (backend=cmdstanr): cmdstanr namespace could not be loaded")
return(NULL)
}
ok <- tryCatch({
v <- cmdstanr::cmdstan_version()
!is.null(v)
}, error = function(e) FALSE)
if (!isTRUE(ok)) {
message(
"brms fit skipped (backend=cmdstanr): CmdStan is not installed/configured in this environment.\n",
"Continuing with glm + relax outputs.\n",
"To run brms here, we install CmdStan via cmdstanr::install_cmdstan() (plus a working C++ toolchain)."
)
return(NULL)
}
}
tryCatch({
brms::brm(
formula = formula,
family = brms::bernoulli(link = "logit"),
data = data,
prior = prior,
seed = seed,
chains = chains,
cores = cores,
refresh = refresh,
backend = backend
)
}, error = function(e) {
message(
"brms fit skipped (backend=", backend, "): ", e$message,
"\nContinuing with glm + relax outputs.\n",
"To run brms reliably, prefer cmdstanr with CmdStan installed and a stable R toolchain."
)
NULL
})
}
# brms confirmation table: OR + (optional) lift in probability space
# - OR summary uses exp(b_test)
# - Lift summary uses posterior_epred() to compute Pr(test) - Pr(control) on the same covariates
render_brms_confirm_table <- function(
fit,
title,
coef_name = "b_test_grouptest",
newdata_control = NULL,
newdata_test = NULL,
note_text = NULL
) {
draws <- posterior::as_draws_df(fit)
if (!(coef_name %in% names(draws))) {
message("brms: coefficient not found in draws: ", coef_name)
return(invisible(NULL))
}
# OR (odds ratio)
or_draw <- exp(draws[[coef_name]])
or_tbl <- tibble::tibble(
quantity = "Effect",
metric = "Multiplicative effect on odds (OR)",
point = stats::median(or_draw, na.rm = TRUE),
lower = stats::quantile(or_draw, 0.025, na.rm = TRUE),
upper = stats::quantile(or_draw, 0.975, na.rm = TRUE),
chance_positive = mean(or_draw > 1, na.rm = TRUE)
)
out <- or_tbl
# Average lift in probability space (multi-check style): E[p(test) - p(control)]
if (!is.null(newdata_control) && !is.null(newdata_test)) {
ep_ctrl <- brms::posterior_epred(fit, newdata = newdata_control, re_formula = NA)
ep_test <- brms::posterior_epred(fit, newdata = newdata_test, re_formula = NA)
# each is draws x N; compute per-draw average lift
lift_draw <- rowMeans(ep_test - ep_ctrl, na.rm = TRUE)
lift_tbl <- tibble::tibble(
quantity = "Function of parameter(s)",
metric = "Average lift (Pr[test] - Pr[control])",
point = stats::median(lift_draw, na.rm = TRUE),
lower = stats::quantile(lift_draw, 0.025, na.rm = TRUE),
upper = stats::quantile(lift_draw, 0.975, na.rm = TRUE),
chance_positive = mean(lift_draw > 0, na.rm = TRUE)
)
out <- dplyr::bind_rows(or_tbl, lift_tbl)
}
gt_tbl <- out %>%
mutate(
quantity = factor(quantity, levels = c("Effect", "Function of parameter(s)"))
) %>%
gt(groupname_col = "quantity") %>%
tab_header(
title = gt::md(paste0("**", wrap_50(title), "**")),
subtitle = gt::md("Hierarchical Bayesian logistic regression (brms)")
) %>%
cols_label(
metric = "Quantity",
point = "Point estimate",
lower = "95% CrI low",
upper = "95% CrI high",
chance_positive = "Chance to win"
) %>%
fmt_number(columns = c(point, lower, upper), decimals = 3) %>%
fmt_number(columns = chance_positive, decimals = 3) %>%
# Format the lift row as percent (probability points)
fmt_percent(
columns = c(point, lower, upper),
rows = metric == "Average lift (Pr[test] - Pr[control])",
decimals = 2
) %>%
opt_stylize(3) %>%
style_stats_gt()
gt_tbl %>% gt::as_raw_html() %>% IRdisplay::display_html()
if (!is.null(note_text)) {
IRdisplay::display_markdown(paste0("**Table note:** ", note_text))
}
invisible(out)
}
# Back-compat: OR-only table (kept for older cells)
render_brms_or_table <- function(fit, title, coef_name = "b_test_grouptest", note_text = NULL) {
render_brms_confirm_table(
fit = fit,
title = title,
coef_name = coef_name,
newdata_control = NULL,
newdata_test = NULL,
note_text = note_text
)
}