stan-dev · vinniott · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025 · Oct 17, 2025
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -24,3 +24,4 @@ vignettes/loo2-non-factorized_cache/*
 ^release-prep\.R$
 ^_pkgdown\.yml$
 ^pkgdown$
+^touchstone$
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -40,7 +40,7 @@ jobs:
           covr::to_cobertura(cov)
         shell: Rscript {0}
 
-      - uses: codecov/codecov-action@v5
+      - uses: codecov/codecov-action@v6
         with:
           # Fail if error if not on PR, or if on PR and token is given--dependabot is treated like fork
           fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}

diff --git a/.github/workflows/touchstone-comment.yaml b/.github/workflows/touchstone-comment.yaml
@@ -0,0 +1,46 @@
+name: Continuous Benchmarks (Comment)
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.run_id }}
+  cancel-in-progress: true
+
+on:
+  workflow_run:
+    workflows: ["Continuous Benchmarks (Receive)"]
+    types: [completed]
+
+jobs:
+  comment:
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      pull-requests: write
+    if: ${{ github.event.workflow_run.event == 'pull_request' }}
+    steps:
+      - name: Download Touchstone artifact
+        id: download
+        uses: actions/download-artifact@v8
+        with:
+          name: pr
+          github-token: ${{ github.token }}
+          repository: ${{ github.repository }}
+          run-id: ${{ github.event.workflow_run.id }}
+
+      # defensive since issues could cause commenting in random places
+      - name: Read PR number
+        id: pr
+        shell: bash
+        run: |
+          number="$(tr -cd '0-9' < ./NR)"
+          test -n "$number"
+          echo "number=$number" >> "$GITHUB_OUTPUT"
+
+      - name: Create or update sticky PR comment
+        id: comment
+        uses: marocchino/sticky-pull-request-comment@v3
+        with:
+          GITHUB_TOKEN: ${{ github.token }}
+          number_force: ${{ steps.pr.outputs.number }}
+          header: touchstone
+          path: ./info.txt
+          skip_unchanged: true
diff --git a/.github/workflows/touchstone-receive.yaml b/.github/workflows/touchstone-receive.yaml
@@ -0,0 +1,43 @@
+name: Continuous Benchmarks (Receive)
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref }}
+  cancel-in-progress: true
+
+on:
+  pull_request:
+
+jobs:
+  prepare:
+    runs-on: ubuntu-latest
+    outputs:
+      config: ${{ steps.read_touchstone_config.outputs.config }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+
+      - id: read_touchstone_config
+        run: |
+          echo "config=$(jq -c . ./touchstone/config.json)" >> $GITHUB_OUTPUT
+
+  build:
+    needs: prepare
+    runs-on: ${{ matrix.config.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+          - ${{ fromJson(needs.prepare.outputs.config) }}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+      - uses: lorenzwalthert/touchstone/actions/receive@main
+        with:
+          r-version: ${{ matrix.config.r }}
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -37,7 +37,7 @@ Imports:
     checkmate,
     matrixStats (>= 0.52),
     parallel,
-    posterior (>= 1.5.0),
+    posterior (>= 1.7.0),
     stats
 Suggests:
     bayesplot (>= 1.7.0),

diff --git a/NAMESPACE b/NAMESPACE
@@ -56,6 +56,7 @@ S3method(print,compare.loo)
 S3method(print,compare.loo_ss)
 S3method(print,importance_sampling)
 S3method(print,importance_sampling_loo)
+S3method(print,kfold)
 S3method(print,loo)
 S3method(print,pareto_k_table)
 S3method(print,pseudobma_bb_weights)
@@ -104,6 +105,7 @@ export(crps)
 export(elpd)
 export(example_loglik_array)
 export(example_loglik_matrix)
+export(example_wine_loglik_matrix)
 export(extract_log_lik)
 export(find_model_names)
 export(gpdfit)
@@ -153,6 +155,7 @@ export(psislw)
 export(relative_eff)
 export(scrps)
 export(sis)
+export(srs_diff_est)
 export(stacking_weights)
 export(tis)
 export(waic)

diff --git a/NEWS.md b/NEWS.md
@@ -17,6 +17,7 @@
 * Added contribution section. by @VisruthSK in #286
 * Update LOO uncertainty paper to use BA doi by @avehtari in #311
 * Update documentation for `E_loo()` function by @avehtari in #312
+* Export `srs_diff_est()` function by @vinniott and @avehtari in #340
 
 
 # loo 2.8.0

diff --git a/R/example_log_lik_array.R → R/example_log_lik_objects.R b/R/example_log_lik_array.R → R/example_log_lik_objects.R
@@ -36,3 +36,7 @@ example_loglik_matrix <- function() {
   return(llarray_to_matrix(ll))
 }
 
+#' @export
+example_wine_loglik_matrix <- function() {
+  return(.example_wine_loglik_matrix)
+}
diff --git a/R/gpdfit.R b/R/gpdfit.R
@@ -29,71 +29,10 @@
 #' for the generalized Pareto distribution. *Technometrics* **51**, 316-325.
 #'
 gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
-  # See section 4 of Zhang and Stephens (2009)
-  if (sort_x) {
-    x <- sort.int(x)
-  }
-  N <- length(x)
-  prior <- 3
-  M <- min_grid_pts + floor(sqrt(N))
-  jj <- seq_len(M)
-  xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample
-  theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar
-  l_theta <- N * lx(theta, x) # profile log-lik
-  w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize
-  theta_hat <- sum(theta * w_theta)
-  k <- mean.default(log1p(-theta_hat * x))
-  sigma <- -k / theta_hat
-
-  if (wip) {
-    k <- adjust_k_wip(k, n = N)
-  }
-
-  if (is.na(k)) {
-    k <- Inf
-  }
-
-  nlist(k, sigma)
-}
-
-
-# internal ----------------------------------------------------------------
-
-lx <- function(a,x) {
-  a <- -a
-  k <- vapply(a, FUN = function(a_i) mean(log1p(a_i * x)), FUN.VALUE = numeric(1))
-  log(a / k) - k - 1
-}
-
-#' Adjust k based on weakly informative prior, Gaussian centered on 0.5. This
-#' will stabilize estimates for very small Monte Carlo sample sizes and low neff
-#' cases.
-#'
-#' @noRd
-#' @param k Scalar khat estimate.
-#' @param n Integer number of tail samples used to fit GPD.
-#' @return Scalar adjusted khat estimate.
-#'
-adjust_k_wip <- function(k, n) {
-  a <- 10
-  n_plus_a <- n + a
-  k * n / n_plus_a + a * 0.5 / n_plus_a
-}
-
-
-#' Inverse CDF of generalized Pareto distribution
-#' (assuming location parameter is 0)
-#'
-#' @noRd
-#' @param p Vector of probabilities.
-#' @param k Scalar shape parameter.
-#' @param sigma Scalar scale parameter.
-#' @return Vector of quantiles.
-#'
-qgpd <- function(p, k, sigma) {
-  if (is.nan(sigma) || sigma <= 0) {
-    return(rep(NaN, length(p)))
-  }
-
-  sigma * expm1(-k * log1p(-p)) / k
+  posterior::gpdfit(
+    x = x,
+    wip = wip,
+    min_grid_pts = min_grid_pts,
+    sort_x = sort_x
+  )
 }
diff --git a/R/loo-package.R b/R/loo-package.R
@@ -1,8 +1,5 @@
 #' Efficient LOO-CV and WAIC for Bayesian models
 #'
-#' @docType package
-#' @name loo-package
-#'
 #' @importFrom stats sd var quantile setNames weights rnorm qnorm
 #' @importFrom matrixStats logSumExp colLogSumExps colSums2 colVars colMaxs
 #'
@@ -89,4 +86,4 @@
 #' for the generalized Pareto distribution. *Technometrics* **51**,
 #' 316-325.
 #'
-NULL
+"_PACKAGE"
diff --git a/R/loo_subsample.R b/R/loo_subsample.R
@@ -24,7 +24,7 @@
 #'   same length containing the posterior density and the approximation density
 #'   for the individual draws.
 #'
-#' @seealso [loo()], [psis()], [loo_compare()]
+#' @seealso [loo()], [psis()], [loo_compare()], [srs_diff_est()]
 #' @template loo-large-data-references
 #'
 #' @export loo_subsample loo_subsample.function
@@ -1166,12 +1166,88 @@ loo_subsample_estimation_diff_srs <- function(x) {
   update_psis_loo_ss_estimates(x)
 }
 
-#' Difference estimation using SRS-WOR sampling (Magnusson et al., 2020)
-#' @noRd
-#' @param y_approx Approximated values of all observations.
-#' @param y The values observed.
-#' @param y_idx The index of `y` in `y_approx`.
-#' @return A list with estimates.
+#' Difference estimator with simple random sampling without replacement.
+#'
+#' The difference estimator `srs_diff()` estimates
+#' the expectation \eqn{n E[y]} when we have \eqn{n} approximate values \eqn{\tilde{y}_i},
+#' \eqn{i = 1, \ldots, n} and \eqn{m < n} accurate values \eqn{y_j}, \eqn{j \in \mathcal{S}},
+#' where \eqn{m} is the subsample size and \eqn{\mathcal{S}} is
+#' a simple random subsample without replacement.  The original
+#' approach is by Cochran (1977) and we follow the equations 7--9 by
+#' Magnusson et al. (2020).
+#'
+#' @details In Magnusson et al. (2020) Eq (9) first row, the second `+` should
+#'   be a `-`; Supplementary Material Eq (6) has this correct.
+#'   As `srs_diff_est()` in the `loo` package is used for \eqn{n E[y]}, there is
+#'   a proportional difference of \eqn{1/n} compared to the paper.
+#'
+#' @param y_approx (numeric) `n` approximated values.
+#' @param y (numeric) `m<n` subsampled values.
+#' @param y_idx (integerish) The index of `y` in `y_approx`.
+#'
+#' @return A named list containing numeric values:
+#' * `y_hat`: estimated mean of \eqn{y} (Eq 7),
+#' * `v_y_hat`: variance of the mean estimate (Eq 8), and
+#' * `hat_v_y`: estimated variance of \eqn{y} (Eq 9).
+#'
+#' @references
+#' Magnusson, M., Riis Andersen, M., Jonasson, J. and Vehtari, A. (2020).
+#' Leave-One-Out Cross-Validation for Model Comparison in Large Data.
+#' In _Proceedings of the 23rd International Conference on Artificial
+#' Intelligence and Statistics (AISTATS)_, PMLR 108:341-351.
+#'
+#' Cochran, W. G. (1977). _Sampling Techniques, 3rd Edition_. John Wiley.
+#'
+#' Cortez, P., Cerdeira, A.L., Almeida, F., Matos, T., & Reis, J. (2009).
+#' Modeling wine preferences by data mining from physicochemical properties.
+#' _Decis. Support Syst._, _47_, 547-553.
+#'
+#' @seealso [loo_subsample()]
+#'
+#' @examples
+#' ### This example predicts wine quality (data from Cortez et al., 2009).
+#' ## First, commented out code shows to generate a loglik_matrix.
+#' ## Second, running code illustrates how to use srs_diff_est().
+#' # library(dplyr)
+#' # library(brms)
+#' # options(brms.backend = "cmdstanr")
+#' # options(mc.cores = 4)
+#' # library(loo)
+#' #
+#' # wine <- read.delim(root("winequality-red", "winequality-red.csv"), sep = ";") |>
+#' #   distinct()
+#' #
+#' # wine_scaled <- as.data.frame(scale(wine))
+#' #
+#' # fitos <- brm(ordered(quality) ~ .,
+#' #              family = cumulative("logit"),
+#' #              prior = prior(R2D2(mean_R2 = 1/3, prec_R2 = 3)),
+#' #              data = wine_scaled,
+#' #              seed = 1,
+#' #              silent = 2,
+#' #              refresh = 0)
+#' #
+#' # wine_loglik_matrix <- log_lik(fitos)
+#' wine_loglik_matrix <- example_wine_loglik_matrix()  # Installed with loo to save time of fitting model shown above
+#' N <- 1359 # nrow(wine_scaled), see above
+#' Nsub <- 100
+#' # posterior log-score
+#' lpd <- elpd(wine_loglik_matrix)
+#' sum(lpd$pointwise[,"elpd"])
+#'
+#' # Use PSIS-LOO for subsample of Nsub randomly selected observations
+#' set.seed(1)
+#' idx <- sample(1:N, Nsub)
+#' elpd_loo_sub <- loo(wine_loglik_matrix[,idx])
+#' sum(elpd_loo_sub$pointwise[,"elpd_loo"]) / Nsub * N
+#'
+#' # Use difference estimator to combine fast result and subsampled accurate result
+#' loo:::srs_diff_est(lpd$pointwise[,"elpd"], elpd_loo_sub$pointwise[,"elpd_loo"], idx)
+#'
+#' # Comparison to using PSIS-LOO for all observations
+#' loo(wine_loglik_matrix)
+#'
+#' @export
 srs_diff_est <- function(y_approx, y, y_idx) {
   checkmate::assert_numeric(y_approx)
   checkmate::assert_numeric(y, max.len = length(y_approx))

diff --git a/R/print.R b/R/print.R
@@ -105,6 +105,28 @@ print.importance_sampling <- function(x, digits = 1, plot_k = FALSE, ...) {
   invisible(x)
 }
 
+#' @export
+#' @rdname print.loo
+print.kfold <- function(x, digits = 1, plot_k = FALSE, ...) {
+  print.loo(x, digits = digits, ...)
+
+  if ("diagnostics" %in% names(x)) {
+    cat("------\n")
+    S <- dim(x)[1]
+    k_threshold <- ps_khat_threshold(S)
+    if (length(pareto_k_ids(x, threshold = k_threshold))) {
+      cat("\n")
+    }
+    print(pareto_k_table(x), digits = digits)
+    cat(.k_help())
+
+    if (plot_k) {
+      graphics::plot(x, ...)
+    }
+  }
+  return(invisible(x))
+  }
+
 # internal ----------------------------------------------------------------
 
 #' Print dimensions of log-likelihood or log-weights matrix

diff --git a/R/psis.R b/R/psis.R
@@ -254,12 +254,12 @@ psis_smooth_tail <- function(x, cutoff) {
   exp_cutoff <- exp(cutoff)
 
   # save time not sorting since x already sorted
-  fit <- gpdfit(exp(x) - exp_cutoff, sort_x = FALSE)
+  fit <- posterior::gpdfit(exp(x) - exp_cutoff, sort_x = FALSE)
   k <- fit$k
   sigma <- fit$sigma
   if (is.finite(k)) {
     p <- (seq_len(len) - 0.5) / len
-    qq <- qgpd(p, k, sigma) + exp_cutoff
+    qq <- posterior::qgeneralized_pareto(p, 0, sigma, k) + exp_cutoff
     tail <- log(qq)
   } else {
     tail <- x