diff --git a/.Rbuildignore b/.Rbuildignore
index 1aa03d51..f3832be8 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -15,6 +15,7 @@ cran-comments.md
 man-roxygen
 data-raw
 docs
+^.*vtune.*$
 revdep
 \.covrignore
 ^\.git$
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
new file mode 100644
index 00000000..f80c4d56
--- /dev/null
+++ b/.claude/settings.local.json
@@ -0,0 +1,7 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(Rscript *)"
+    ]
+  }
+}
diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml
index df89093f..4255992b 100644
--- a/.github/workflows/R-CMD-check.yml
+++ b/.github/workflows/R-CMD-check.yml
@@ -265,8 +265,6 @@ jobs:
         uses: r-lib/actions/setup-r-dependencies@v2
         with:
           dependencies: '"hard"'
-          extra-packages: |
-            github::ms609/TreeTools
           needs: benchmark
 
       - name: Benchmark PR
diff --git a/DESCRIPTION b/DESCRIPTION
index 504ffdb4..2472c834 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -87,10 +87,11 @@ VignetteBuilder: knitr
 Config/Needs/app/optional: uwot
 Config/Needs/check: rcmdcheck
 Config/Needs/coverage: covr
-Config/Needs/memcheck: devtools
+Config/Needs/memcheck: pkgdown, testthat
 Config/Needs/metadata: codemetar
 Config/Needs/revdeps: revdepcheck
 Config/Needs/website: openssl, pkgdown, remotes, shinylive
+Config/roxygen2/version: 8.0.0
 Config/testthat/parallel: false
 Config/testthat/edition: 3
 SystemRequirements: C++17, pandoc-citeproc
@@ -98,5 +99,4 @@ ByteCompile: true
 Encoding: UTF-8
 Language: en-GB
 X-schema.org-keywords: phylogenetics, tree-distance
-RoxygenNote: 7.3.3
 Roxygen: list(markdown = TRUE)
diff --git a/NEWS.md b/NEWS.md
index a051898a..fa770284 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -23,6 +23,17 @@
   removing a hard dependency on the compile-time `SL_MAX_SPLITS` constant.
   TreeDist now supports trees of any size permitted by TreeTools.
 
+- **Large-tree support (requires TreeTools ≥ 2.3.0):** all distance
+  functions now accept trees with up to 32 767 tips (previously limited
+  to `SL_MAX_TIPS`, 2048 with TreeTools ≤ 2.2.0).  The R-level tip-count
+  guard (`.CheckMaxTips()`) detects the TreeTools version at load time and
+  unlocks the higher ceiling automatically; no code changes are needed.
+  All integer counters in the C++ hot paths have been widened from `int16`
+  to `split_int` (`int32`) to handle split counts above 32 767 without
+  overflow.  Direct `lg2[]` table accesses have been replaced with
+  `lg2_lookup()` fallback helpers so that trees with more tips than
+  `SL_MAX_TIPS` are computed correctly via `std::log2` / `std::lgamma`.
+
 ## Performance
 
 - `RobinsonFoulds()` now uses a fast C++ batch path for cross-distance
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 8dd5b1a2..592b4772 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -303,7 +303,7 @@ cpp_mutual_clustering <- function(x, y, nTip) {
     .Call(`_TreeDist_cpp_mutual_clustering`, x, y, nTip)
 }
 
-cpp_shared_phylo <- function(x, y, nTip) {
-    .Call(`_TreeDist_cpp_shared_phylo`, x, y, nTip)
+cpp_shared_phylo <- function(x, y, nTip, force_slow = FALSE) {
+    .Call(`_TreeDist_cpp_shared_phylo`, x, y, nTip, force_slow)
 }
 
diff --git a/R/tree_distance.R b/R/tree_distance.R
index 5ea3a9cb..12caa469 100644
--- a/R/tree_distance.R
+++ b/R/tree_distance.R
@@ -230,6 +230,23 @@ GeneralizedRF <- function(splits1, splits2, nTip, PairScorer,
   g[lower.tri(g)]
 }
 
+# Floor sub-noise distances to zero before normalization.
+# Two sources of numerical noise scale with treesIndependentInfo:
+#  (1) LAP int64 cost-matrix quantization in *Splits scoring; per-cell
+#      truncation of up to (max_possible / BIG) bits, summed over n_splits.
+#  (2) Float-accumulation drift between independently-built tables (e.g.
+#      InfoRobinsonFoulds vs cpp_splitwise_info_batch sum the same per-split
+#      info contributions, but using different lookup-table constructions).
+# Both grow with the magnitude of the answer, so an absolute sqrt(eps)
+# tolerance becomes too tight beyond a few thousand tips. Scaling by
+# treesIndependentInfo self-adjusts; pmax(1, ·) preserves the original
+# tolerance for tiny trees where these errors are negligible anyway.
+.FloorNumericalNoise <- function(ret, treesIndependentInfo) {
+  tol <- pmax(1, treesIndependentInfo) * .Machine[["double.eps"]] ^ 0.5
+  ret[ret < tol] <- 0
+  ret
+}
+
 .AllTipsSame <- function(x, y) {
   if (is.list(x)) {
     xPrime <- x[[1]]
diff --git a/R/tree_distance_info.R b/R/tree_distance_info.R
index dc7441e2..07693302 100644
--- a/R/tree_distance_info.R
+++ b/R/tree_distance_info.R
@@ -249,16 +249,15 @@ DifferentPhylogeneticInfo <- function(tree1, tree2 = NULL, normalize = FALSE,
   if (!is.null(fast)) {
     spi <- fast[["info"]]
     treesIndependentInfo <- .PairwiseSums(fast[["entropies"]])
-    
-    ret <- treesIndependentInfo - spi - spi
+
+    ret <- .FloorNumericalNoise(treesIndependentInfo - spi - spi, treesIndependentInfo)
     ret <- NormalizeInfo(ret, tree1, tree2, how = normalize,
                          infoInBoth = treesIndependentInfo,
                          InfoInTree = SplitwiseInfo, Combine = "+")
-    ret[ret < .Machine[["double.eps"]] ^ 0.5] <- 0
     attributes(ret) <- attributes(spi)
     return(ret)
   }
-  
+
   # Fast path (cross-pairs): same tips, no matching — avoids duplicate as.Splits()
   fast_many <- .FastManyManyPath(tree1, tree2, reportMatching,
                                  cpp_shared_phylo_cross_pairs,
@@ -268,25 +267,22 @@ DifferentPhylogeneticInfo <- function(tree1, tree2 = NULL, normalize = FALSE,
     info1 <- fast_many[["info1"]]
     info2 <- fast_many[["info2"]]
     treesIndependentInfo <- outer(info1, info2, "+")
-    
-    ret <- treesIndependentInfo - spi - spi
+
+    ret <- .FloorNumericalNoise(treesIndependentInfo - spi - spi, treesIndependentInfo)
     ret <- NormalizeInfo(ret, tree1, tree2, how = normalize,
                          infoInBoth = treesIndependentInfo,
                          InfoInTree = SplitwiseInfo, Combine = "+")
-    ret[ret < .Machine[["double.eps"]] ^ 0.5] <- 0
     return(ret)
   }
-  
+
   spi <- SharedPhylogeneticInfo(tree1, tree2, normalize = FALSE, diag = FALSE,
                                 reportMatching = reportMatching)
   treesIndependentInfo <- .MaxValue(tree1, tree2, SplitwiseInfo)
-  
-  ret <- treesIndependentInfo - spi - spi
-  ret <- NormalizeInfo(ret, tree1, tree2, how = normalize, 
+
+  ret <- .FloorNumericalNoise(treesIndependentInfo - spi - spi, treesIndependentInfo)
+  ret <- NormalizeInfo(ret, tree1, tree2, how = normalize,
                        infoInBoth = treesIndependentInfo,
                        InfoInTree = SplitwiseInfo, Combine = "+")
-  
-  ret[ret < .Machine[["double.eps"]] ^ 0.5] <- 0 # Catch floating point inaccuracy
   attributes(ret) <- attributes(spi)
   
   # Return:
@@ -310,16 +306,15 @@ ClusteringInfoDistance <- function(tree1, tree2 = NULL, normalize = FALSE,
   if (!is.null(fast)) {
     mci <- fast[["info"]]
     treesIndependentInfo <- .PairwiseSums(fast[["entropies"]])
-    
-    ret <- treesIndependentInfo - mci - mci
+
+    ret <- .FloorNumericalNoise(treesIndependentInfo - mci - mci, treesIndependentInfo)
     ret <- NormalizeInfo(ret, tree1, tree2, how = normalize,
                          infoInBoth = treesIndependentInfo,
                          InfoInTree = ClusteringEntropy, Combine = "+")
-    ret[ret < .Machine[["double.eps"]] ^ 0.5] <- 0
     attributes(ret) <- attributes(mci)
     return(ret)
   }
-  
+
   # Fast path (cross-pairs): same tips, no matching — avoids duplicate as.Splits()
   fast_many <- .FastManyManyPath(tree1, tree2, reportMatching,
                                  cpp_mutual_clustering_cross_pairs,
@@ -329,25 +324,22 @@ ClusteringInfoDistance <- function(tree1, tree2 = NULL, normalize = FALSE,
     info1 <- fast_many[["info1"]]
     info2 <- fast_many[["info2"]]
     treesIndependentInfo <- outer(info1, info2, "+")
-    
-    ret <- treesIndependentInfo - mci - mci
+
+    ret <- .FloorNumericalNoise(treesIndependentInfo - mci - mci, treesIndependentInfo)
     ret <- NormalizeInfo(ret, tree1, tree2, how = normalize,
                          infoInBoth = treesIndependentInfo,
                          InfoInTree = ClusteringEntropy, Combine = "+")
-    ret[ret < .Machine[["double.eps"]] ^ 0.5] <- 0
     return(ret)
   }
-  
+
   mci <- MutualClusteringInfo(tree1, tree2, normalize = FALSE, diag = FALSE,
                               reportMatching = reportMatching)
   treesIndependentInfo <- .MaxValue(tree1, tree2, ClusteringEntropy)
-  
-  ret <- treesIndependentInfo - mci - mci
+
+  ret <- .FloorNumericalNoise(treesIndependentInfo - mci - mci, treesIndependentInfo)
   ret <- NormalizeInfo(ret, tree1, tree2, how = normalize,
                        infoInBoth = treesIndependentInfo,
                        InfoInTree = ClusteringEntropy, Combine = "+")
-  
-  ret[ret < .Machine[["double.eps"]] ^ 0.5] <- 0 # Handle floating point inaccuracy
   attributes(ret) <- attributes(mci)
   
   # Return:
diff --git a/R/tree_distance_msi.R b/R/tree_distance_msi.R
index aadfc2f6..ff89cb18 100644
--- a/R/tree_distance_msi.R
+++ b/R/tree_distance_msi.R
@@ -29,15 +29,14 @@ MatchingSplitInfoDistance <- function(tree1, tree2 = NULL,
     msi <- fast[["info"]]
     treesIndependentInfo <- .PairwiseSums(fast[["entropies"]])
     
-    ret <- treesIndependentInfo - msi - msi
+    ret <- .FloorNumericalNoise(treesIndependentInfo - msi - msi, treesIndependentInfo)
     ret <- NormalizeInfo(ret, tree1, tree2, how = normalize,
                          infoInBoth = treesIndependentInfo,
                          InfoInTree = SplitwiseInfo, Combine = "+")
-    ret[ret < .Machine[["double.eps"]] ^ 0.5] <- 0
     attributes(ret) <- attributes(msi)
     return(ret)
   }
-  
+
   # Fast path (cross-pairs): same tips, no matching — avoids duplicate as.Splits()
   fast_many <- .FastManyManyPath(tree1, tree2, reportMatching,
                                  cpp_msi_cross_pairs,
@@ -47,25 +46,22 @@ MatchingSplitInfoDistance <- function(tree1, tree2 = NULL,
     info1 <- fast_many[["info1"]]
     info2 <- fast_many[["info2"]]
     treesIndependentInfo <- outer(info1, info2, "+")
-    
-    ret <- treesIndependentInfo - msi - msi
+
+    ret <- .FloorNumericalNoise(treesIndependentInfo - msi - msi, treesIndependentInfo)
     ret <- NormalizeInfo(ret, tree1, tree2, how = normalize,
                          infoInBoth = treesIndependentInfo,
                          InfoInTree = SplitwiseInfo, Combine = "+")
-    ret[ret < .Machine[["double.eps"]] ^ 0.5] <- 0
     return(ret)
   }
-  
+
   msi <- MatchingSplitInfo(tree1, tree2, normalize = FALSE, diag = FALSE,
                            reportMatching = reportMatching)
-  
+
   treesIndependentInfo <- .MaxValue(tree1, tree2, SplitwiseInfo)
-  ret <- treesIndependentInfo - msi - msi
+  ret <- .FloorNumericalNoise(treesIndependentInfo - msi - msi, treesIndependentInfo)
   ret <- NormalizeInfo(ret, tree1, tree2, how = normalize,
                        infoInBoth = treesIndependentInfo,
                        InfoInTree = SplitwiseInfo, Combine = "+")
-  
-  ret[ret < .Machine[["double.eps"]]^0.5] <- 0 # In case of floating point inaccuracy
   attributes(ret) <- attributes(msi)
   # Return:
   ret
diff --git a/R/tree_distance_nni.R b/R/tree_distance_nni.R
index 45854764..d47d4ac6 100644
--- a/R/tree_distance_nni.R
+++ b/R/tree_distance_nni.R
@@ -73,9 +73,7 @@ NNIDist <- function(tree1, tree2 = tree1) {
 #' @importFrom TreeTools Postorder RenumberTips
 #' @importFrom ape Nnode.phylo
 .NNIDistSingle <- function(tree1, tree2, nTip, ...) {
-  if (nTip > 32768L) {
-    stop("Cannot calculate NNI distance for trees with so many tips.")
-  }
+  .CheckMaxTips(nTip)
   if (nrow(tree1[["edge"]]) != nrow(tree2[["edge"]])) {
     stop("Both trees must have the same number of edges. ",
          "Is one rooted and the other unrooted?")
diff --git a/R/tree_distance_rf.R b/R/tree_distance_rf.R
index 9f9454cc..f5622b81 100644
--- a/R/tree_distance_rf.R
+++ b/R/tree_distance_rf.R
@@ -75,14 +75,15 @@ InfoRobinsonFoulds <- function(tree1, tree2 = NULL, similarity = FALSE,
                           cpp_splitwise_info_batch)
     if (!is.null(fast)) {
       treesIndependentInfo <- .PairwiseSums(fast[["entropies"]])
-      unnormalized <- treesIndependentInfo - fast[["info"]] - fast[["info"]]
-      unnormalized[unnormalized < .Machine[["double.eps"]] ^ 0.5] <- 0
+      unnormalized <- .FloorNumericalNoise(
+        treesIndependentInfo - fast[["info"]] - fast[["info"]],
+        treesIndependentInfo)
       ret <- NormalizeInfo(unnormalized, tree1, tree2, how = normalize,
                            InfoInTree = SplitwiseInfo, Combine = "+")
       attributes(ret) <- attributes(fast[["info"]])
       return(ret)
     }
-    
+
     # Cross-pairs fast path
     fast_many <- .FastManyManyPath(tree1, tree2, reportMatching,
                                    cpp_rf_info_cross_pairs,
@@ -92,25 +93,24 @@ InfoRobinsonFoulds <- function(tree1, tree2 = NULL, similarity = FALSE,
       info1 <- fast_many[["info1"]]
       info2 <- fast_many[["info2"]]
       treesIndependentInfo <- outer(info1, info2, "+")
-      
-      unnormalized <- treesIndependentInfo - irf - irf
-      unnormalized[unnormalized < .Machine[["double.eps"]] ^ 0.5] <- 0
+
+      unnormalized <- .FloorNumericalNoise(treesIndependentInfo - irf - irf,
+                                            treesIndependentInfo)
       ret <- NormalizeInfo(unnormalized, tree1, tree2, how = normalize,
                            InfoInTree = SplitwiseInfo, Combine = "+")
       return(ret)
     }
   }
-  
-  unnormalized <- CalculateTreeDistance(InfoRobinsonFouldsSplits, tree1, tree2, 
+
+  unnormalized <- CalculateTreeDistance(InfoRobinsonFouldsSplits, tree1, tree2,
                                         reportMatching) * 2
-  
+
   if (!similarity) {
-    unnormalized <- .MaxValue(tree1, tree2, SplitwiseInfo) - unnormalized
+    treesIndependentInfo <- .MaxValue(tree1, tree2, SplitwiseInfo)
+    unnormalized <- .FloorNumericalNoise(treesIndependentInfo - unnormalized,
+                                          treesIndependentInfo)
   }
   
-  # In case of floating point inaccuracy
-  unnormalized[unnormalized < .Machine[["double.eps"]] ^ 0.5] <- 0
-  
   # Return:
   NormalizeInfo(unnormalized, tree1, tree2, how = normalize,
                 InfoInTree = SplitwiseInfo, Combine = "+")
diff --git a/R/tree_distance_utilities.R b/R/tree_distance_utilities.R
index b85fab32..84b19e2d 100644
--- a/R/tree_distance_utilities.R
+++ b/R/tree_distance_utilities.R
@@ -1,16 +1,23 @@
-# Validate that nTip does not exceed the compiled SL_MAX_TIPS limit.
+# Validate that nTip does not exceed the supported tip-count ceiling.
+# cpp_sl_max_tips() > 2048L iff TreeTools >= 2.3.0 raised the stack threshold
+# and provides heap-backed split storage; accept up to 32767 tips in that case.
+# Otherwise cap at the compiled SL_MAX_TIPS.
 # Called from every distance entry point before any C++ work.
 .CheckMaxTips <- function(nTip) {
-  if (!is.na(nTip) && nTip > .SL_MAX_TIPS) {
-    if (.SL_MAX_TIPS < 32704L) {
-      stop(
-        "Trees with ", nTip, " tips exceed the compiled limit of ",
-        .SL_MAX_TIPS, " tips.",
-          "\nUpdate TreeTools and reinstall TreeDist to support more tips."
-      )
+  if (is.na(nTip)) return(invisible(NULL))
+  sl_max <- cpp_sl_max_tips()
+  if (sl_max > 2048L) {
+    if (nTip > 32767L) {
+      stop("Trees with ", nTip,
+           " tips are not yet supported (maximum 32767).")
     }
-    stop("Trees with ", nTip, " tips are not yet supported (maximum ",
-         .SL_MAX_TIPS, ")")
+  } else if (nTip > sl_max) {
+    # else-if fires only when sl_max <= 2048 (TreeTools < 2.3.0)
+    stop(
+      "Trees with ", nTip, " tips exceed the compiled limit of ",
+      sl_max, " tips.",
+      "\nUpdate TreeTools and reinstall TreeDist to support more tips."
+    )
   }
 }
 
diff --git a/R/zzz.R b/R/zzz.R
index 614ebbda..826528ea 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -1,9 +1,3 @@
-.SL_MAX_TIPS <- NULL # populated in .onLoad
-
-.onLoad <- function(libname, pkgname) {
-  .SL_MAX_TIPS <<- cpp_sl_max_tips()
-}
-
 .onUnload <- function(libpath) {
   StopParallel(quietly = TRUE)
   library.dynam.unload("TreeDist", libpath)
diff --git a/cran-comments.md b/cran-comments.md
index 31e312b3..3bae24af 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -1,7 +1,8 @@
 ## Test environments
-* Local machine, Windows 10, R devel (2024-09-02 r87090 ucrt)
+* Local machine, Windows 11, R version 4.6.0 (2026-04-24 ucrt)
 
 * `devtools::check_win_devel()`
+* `devtools::check_mac_devel()`
 
 * [GitHub Actions](https://github.com/ms609/TreeDist/actions):
   - windows-latest, R release
diff --git a/inst/_pkgdown.yml b/inst/_pkgdown.yml
index 2922547e..664ef8e6 100644
--- a/inst/_pkgdown.yml
+++ b/inst/_pkgdown.yml
@@ -85,6 +85,7 @@ navbar:
       href: news/index.html
     github:
       icon: fa-github fa-lg
+      aria-label: View this repo on GitHub
       href: https://github.com/ms609/TreeDist
 
 articles:
diff --git a/inst/include/TreeDist/mutual_clustering.h b/inst/include/TreeDist/mutual_clustering.h
index 437633f0..23134905 100644
--- a/inst/include/TreeDist/mutual_clustering.h
+++ b/inst/include/TreeDist/mutual_clustering.h
@@ -22,7 +22,9 @@ namespace TreeDist {
   // lg2_unrooted[i]       = log2((2i-5)!!) for i >= 3
   // lg2_rooted             = &lg2_unrooted[0] + 1  (so lg2_rooted[i] = lg2_unrooted[i+1])
   //
-  // These are defined in mutual_clustering_impl.h.
+  // These are fast-path caches sized to TreeTools' stack threshold.
+  // For larger trees, fall back to on-the-fly computation via the _lookup helpers.
+  // Definitions are in mutual_clustering_impl.h.
 
   extern double lg2[SL_MAX_TIPS + 1];
   extern double lg2_double_factorial[SL_MAX_TIPS + SL_MAX_TIPS - 2];
@@ -33,15 +35,46 @@ namespace TreeDist {
   // computation.  max_tips should be >= the largest tree size used.
   void init_lg2_tables(int max_tips);
 
+  // Out-of-line slow paths for inputs that exceed the precomputed tables
+  // (i.e. n_tips > SL_MAX_TIPS).  Defined in mutual_clustering_impl.h.
+  // Kept out-of-line so the *_lookup() inline thunks below stay tiny —
+  // table load + predicted branch, no extern function calls — which some
+  // compilers refuse to inline through, suppressing optimization in the
+  // per-cell hot loops of PID/MCI/MSI/RF info.
+  double lg2_slow(split_int x);
+  double lg2_unrooted_slow(split_int n_tips);
+  double lg2_rooted_slow(split_int n_tips);
+
+  // log2(x) — table-fast for x <= SL_MAX_TIPS, runtime otherwise.
+  [[nodiscard]] inline double lg2_lookup(split_int x) noexcept {
+    return (x <= static_cast<split_int>(SL_MAX_TIPS))
+      ? lg2[x]
+      : lg2_slow(x); // #nocov
+  }
+
+  // log2((2n-5)!!) — table-fast for n <= SL_MAX_TIPS+1, lgamma otherwise.
+  [[nodiscard]] inline double lg2_unrooted_lookup(split_int n_tips) noexcept {
+    return (n_tips <= static_cast<split_int>(SL_MAX_TIPS + 1))
+      ? lg2_unrooted[n_tips]
+      : lg2_unrooted_slow(n_tips); // #nocov
+  }
+
+  // log2((2n-3)!!) — table-fast for n <= SL_MAX_TIPS+1, lgamma otherwise.
+  [[nodiscard]] inline double lg2_rooted_lookup(split_int n_tips) noexcept {
+    return (n_tips <= static_cast<split_int>(SL_MAX_TIPS + 1))
+      ? lg2_rooted[n_tips]
+      : lg2_rooted_slow(n_tips); // #nocov
+  }
+
   // ---- Inline helpers ----
 
   // Information content of a perfectly-matching split pair.
   // ic_matching(a, b, n) = (a + b) * lg2[n] - a * lg2[a] - b * lg2[b]
-  [[nodiscard]] inline double ic_matching(int16 a, int16 b,
-                                          int16 n) noexcept {
-    const double lg2a = lg2[a];
-    const double lg2b = lg2[b];
-    const double lg2n = lg2[n];
+  [[nodiscard]] inline double ic_matching(split_int a, split_int b,
+                                          split_int n) noexcept {
+    const double lg2a = lg2_lookup(a);
+    const double lg2b = lg2_lookup(b);
+    const double lg2n = lg2_lookup(n);
     return (a + b) * lg2n - a * lg2a - b * lg2b;
   }
 
@@ -77,9 +110,9 @@ namespace TreeDist {
   // Implementation in mutual_clustering_impl.h.
 
   double mutual_clustering_score(
-      const splitbit* const* a_state, const int16* a_in, int16 a_n_splits,
-      const splitbit* const* b_state, const int16* b_in, int16 b_n_splits,
-      int16 n_bins, int32 n_tips,
+      const splitbit* const* a_state, const split_int* a_in, split_int a_n_splits,
+      const splitbit* const* b_state, const split_int* b_in, split_int b_n_splits,
+      split_int n_bins, int32 n_tips,
       LapScratch& scratch);
 
 } // namespace TreeDist
diff --git a/inst/include/TreeDist/mutual_clustering_impl.h b/inst/include/TreeDist/mutual_clustering_impl.h
index 7f98c8fb..cea725be 100644
--- a/inst/include/TreeDist/mutual_clustering_impl.h
+++ b/inst/include/TreeDist/mutual_clustering_impl.h
@@ -58,6 +58,30 @@ void init_lg2_tables(int max_tips) {
   }
 }
 
+// ---- Out-of-line slow paths (n_tips > SL_MAX_TIPS) ----
+// Defined out-of-line so that the *_lookup() inline thunks in
+// mutual_clustering.h stay trivial enough for any compiler to inline.
+
+// LCOV_EXCL_START
+double lg2_slow(split_int x) {
+  return std::log2(static_cast<double>(x));
+}
+
+double lg2_unrooted_slow(split_int n_tips) {
+  if (n_tips < 3) return 0.0;
+  const double n = static_cast<double>(n_tips);
+  return (std::lgamma(2.0 * n - 3.0) - std::lgamma(n - 1.0)) / std::log(2.0)
+         - (n - 2.0);
+}
+
+double lg2_rooted_slow(split_int n_tips) {
+  if (n_tips < 2) return 0.0;
+  const double n = static_cast<double>(n_tips);
+  return (std::lgamma(2.0 * n - 1.0) - std::lgamma(n)) / std::log(2.0)
+         - (n - 1.0);
+}
+// LCOV_EXCL_STOP
+
 // ---- Sort+merge exact-match detection (internal) ----
 //
 // Canonicalise each split so bit 0 is always set (flip complement if not),
@@ -68,17 +92,17 @@ void init_lg2_tables(int max_tips) {
 
 namespace detail {
 
-static int16 find_exact_matches_raw(
-    const splitbit* const* a_state, const int16* /*a_in*/, int16 a_n,
-    const splitbit* const* b_state, const int16* /*b_in*/, int16 b_n,
-    int16 n_bins, int32 n_tips,
-    int16* a_match, int16* b_match)
+static split_int find_exact_matches_raw(
+    const splitbit* const* a_state, const split_int* /*a_in*/, split_int a_n,
+    const splitbit* const* b_state, const split_int* /*b_in*/, split_int b_n,
+    split_int n_bins, int32 n_tips,
+    split_int* a_match, split_int* b_match)
 {
-  std::fill(a_match, a_match + a_n, int16(0));
-  std::fill(b_match, b_match + b_n, int16(0));
+  std::fill(a_match, a_match + a_n, split_int(0));
+  std::fill(b_match, b_match + b_n, split_int(0));
   if (a_n == 0 || b_n == 0) return 0;
 
-  const int16 last_bin = n_bins - 1;
+  const split_int last_bin = n_bins - 1;
   const splitbit last_mask = (n_tips % SL_BIN_SIZE == 0)
     ? ~splitbit(0)
     : (splitbit(1) << (n_tips % SL_BIN_SIZE)) - 1;
@@ -87,17 +111,17 @@ static int16 find_exact_matches_raw(
   std::vector<splitbit> a_canon(static_cast<std::size_t>(a_n) * n_bins);
   std::vector<splitbit> b_canon(static_cast<std::size_t>(b_n) * n_bins);
 
-  for (int16 i = 0; i < a_n; ++i) {
+  for (split_int i = 0; i < a_n; ++i) {
     const bool flip = !(a_state[i][0] & 1);
-    for (int16 bin = 0; bin < n_bins; ++bin) {
+    for (split_int bin = 0; bin < n_bins; ++bin) {
       splitbit val = flip ? ~a_state[i][bin] : a_state[i][bin];
       if (bin == last_bin) val &= last_mask;
       a_canon[i * n_bins + bin] = val;
     }
   }
-  for (int16 i = 0; i < b_n; ++i) {
+  for (split_int i = 0; i < b_n; ++i) {
     const bool flip = !(b_state[i][0] & 1);
-    for (int16 bin = 0; bin < n_bins; ++bin) {
+    for (split_int bin = 0; bin < n_bins; ++bin) {
       splitbit val = flip ? ~b_state[i][bin] : b_state[i][bin];
       if (bin == last_bin) val &= last_mask;
       b_canon[i * n_bins + bin] = val;
@@ -105,8 +129,8 @@ static int16 find_exact_matches_raw(
   }
 
   // Sort index arrays by canonical form
-  auto canon_less = [&](const splitbit* canon, int16 n_b, int16 i, int16 j) {
-    for (int16 bin = 0; bin < n_b; ++bin) {
+  auto canon_less = [&](const splitbit* canon, split_int n_b, split_int i, split_int j) {
+    for (split_int bin = 0; bin < n_b; ++bin) {
       const splitbit vi = canon[i * n_b + bin];
       const splitbit vj = canon[j * n_b + bin];
       if (vi < vj) return true;
@@ -115,28 +139,28 @@ static int16 find_exact_matches_raw(
     return false;
   };
 
-  std::vector<int16> a_order(a_n), b_order(b_n);
-  std::iota(a_order.begin(), a_order.end(), int16(0));
-  std::iota(b_order.begin(), b_order.end(), int16(0));
+  std::vector<split_int> a_order(a_n), b_order(b_n);
+  std::iota(a_order.begin(), a_order.end(), split_int(0));
+  std::iota(b_order.begin(), b_order.end(), split_int(0));
 
   std::sort(a_order.begin(), a_order.end(),
-            [&](int16 i, int16 j) {
+            [&](split_int i, split_int j) {
               return canon_less(a_canon.data(), n_bins, i, j);
             });
   std::sort(b_order.begin(), b_order.end(),
-            [&](int16 i, int16 j) {
+            [&](split_int i, split_int j) {
               return canon_less(b_canon.data(), n_bins, i, j);
             });
 
   // Merge-scan
-  int16 exact_n = 0;
-  int16 ai_pos = 0, bi_pos = 0;
+  split_int exact_n = 0;
+  split_int ai_pos = 0, bi_pos = 0;
   while (ai_pos < a_n && bi_pos < b_n) {
-    const int16 ai = a_order[ai_pos];
-    const int16 bi = b_order[bi_pos];
+    const split_int ai = a_order[ai_pos];
+    const split_int bi = b_order[bi_pos];
 
     int cmp = 0;
-    for (int16 bin = 0; bin < n_bins; ++bin) {
+    for (split_int bin = 0; bin < n_bins; ++bin) {
       const splitbit va = a_canon[ai * n_bins + bin];
       const splitbit vb = b_canon[bi * n_bins + bin];
       if (va < vb) { cmp = -1; break; }
@@ -165,41 +189,41 @@ static int16 find_exact_matches_raw(
 // ---- MCI score implementation ----
 
 double mutual_clustering_score(
-    const splitbit* const* a_state, const int16* a_in, int16 a_n_splits,
-    const splitbit* const* b_state, const int16* b_in, int16 b_n_splits,
-    int16 n_bins, int32 n_tips,
+    const splitbit* const* a_state, const split_int* a_in, split_int a_n_splits,
+    const splitbit* const* b_state, const split_int* b_in, split_int b_n_splits,
+    split_int n_bins, int32 n_tips,
     LapScratch& scratch)
 {
   if (a_n_splits == 0 || b_n_splits == 0 || n_tips == 0) return 0.0;
 
-  const int16 most_splits = std::max(a_n_splits, b_n_splits);
+  const split_int most_splits = std::max(a_n_splits, b_n_splits);
   const double n_tips_rcp = 1.0 / static_cast<double>(n_tips);
 
   constexpr cost max_score  = BIG;
   constexpr double over_max = 1.0 / static_cast<double>(BIG);
   const double max_over_tips = static_cast<double>(BIG) * n_tips_rcp;
-  const double lg2_n = lg2[n_tips];
+  const double lg2_n = lg2_lookup(n_tips);
 
   // --- Phase 1: O(n log n) exact-match detection ---
-  std::vector<int16> a_match_buf(a_n_splits);
-  std::vector<int16> b_match_buf(b_n_splits);
+  std::vector<split_int> a_match_buf(a_n_splits);
+  std::vector<split_int> b_match_buf(b_n_splits);
 
-  const int16 exact_n = detail::find_exact_matches_raw(
+  const split_int exact_n = detail::find_exact_matches_raw(
     a_state, a_in, a_n_splits,
     b_state, b_in, b_n_splits,
     n_bins, n_tips,
     a_match_buf.data(), b_match_buf.data());
 
-  const int16* a_match = a_match_buf.data();
-  const int16* b_match = b_match_buf.data();
+  const split_int* a_match = a_match_buf.data();
+  const split_int* b_match = b_match_buf.data();
 
   // Accumulate exact-match score
   double exact_score = 0.0;
-  for (int16 ai = 0; ai < a_n_splits; ++ai) {
+  for (split_int ai = 0; ai < a_n_splits; ++ai) {
     if (a_match[ai]) {
-      const int16 na = a_in[ai];
-      const int16 nA = static_cast<int16>(n_tips - na);
-      exact_score += ic_matching(na, nA, static_cast<int16>(n_tips));
+      const split_int na = a_in[ai];
+      const split_int nA = n_tips - na;
+      exact_score += ic_matching(na, nA, n_tips);
     }
   }
 
@@ -209,58 +233,58 @@ double mutual_clustering_score(
   }
 
   // --- Phase 2: fill cost matrix for unmatched splits only (O(k²)) ---
-  const int16 lap_n = most_splits - exact_n;
+  const split_int lap_n = most_splits - exact_n;
 
-  std::vector<int16> a_unmatch, b_unmatch;
+  std::vector<split_int> a_unmatch, b_unmatch;
   a_unmatch.reserve(lap_n);
   b_unmatch.reserve(lap_n);
-  for (int16 ai = 0; ai < a_n_splits; ++ai) {
+  for (split_int ai = 0; ai < a_n_splits; ++ai) {
     if (!a_match[ai]) a_unmatch.push_back(ai);
   }
-  for (int16 bi = 0; bi < b_n_splits; ++bi) {
+  for (split_int bi = 0; bi < b_n_splits; ++bi) {
     if (!b_match[bi]) b_unmatch.push_back(bi);
   }
 
   scratch.score_pool.resize(lap_n);
   CostMatrix& score = scratch.score_pool;
 
-  const int16 a_unmatched_n = static_cast<int16>(a_unmatch.size());
-  const int16 b_unmatched_n = static_cast<int16>(b_unmatch.size());
+  const split_int a_unmatched_n = static_cast<split_int>(a_unmatch.size());
+  const split_int b_unmatched_n = static_cast<split_int>(b_unmatch.size());
 
-  for (int16 a_pos = 0; a_pos < a_unmatched_n; ++a_pos) {
-    const int16 ai = a_unmatch[a_pos];
-    const int16 na = a_in[ai];
-    const int16 nA = static_cast<int16>(n_tips - na);
+  for (split_int a_pos = 0; a_pos < a_unmatched_n; ++a_pos) {
+    const split_int ai = a_unmatch[a_pos];
+    const split_int na = a_in[ai];
+    const split_int nA = n_tips - na;
     const splitbit* a_row = a_state[ai];
 
-    const double offset_a = lg2_n - lg2[na];
-    const double offset_A = lg2_n - lg2[nA];
+    const double offset_a = lg2_n - lg2_lookup(na);
+    const double offset_A = lg2_n - lg2_lookup(nA);
 
-    for (int16 b_pos = 0; b_pos < b_unmatched_n; ++b_pos) {
-      const int16 bi = b_unmatch[b_pos];
+    for (split_int b_pos = 0; b_pos < b_unmatched_n; ++b_pos) {
+      const split_int bi = b_unmatch[b_pos];
       const splitbit* b_row = b_state[bi];
-      int16 a_and_b = 0;
-      for (int16 bin = 0; bin < n_bins; ++bin) {
+      split_int a_and_b = 0;
+      for (split_int bin = 0; bin < n_bins; ++bin) {
         a_and_b += TreeTools::count_bits(a_row[bin] & b_row[bin]);
       }
 
-      const int16 nb = b_in[bi];
-      const int16 nB = static_cast<int16>(n_tips - nb);
-      const int16 a_and_B = na - a_and_b;
-      const int16 A_and_b = nb - a_and_b;
-      const int16 A_and_B = nA - A_and_b;
+      const split_int nb = b_in[bi];
+      const split_int nB = n_tips - nb;
+      const split_int a_and_B = na - a_and_b;
+      const split_int A_and_b = nb - a_and_b;
+      const split_int A_and_B = nA - A_and_b;
 
       if (a_and_b == A_and_b && a_and_b == a_and_B
           && a_and_b == A_and_B) {
         score(a_pos, b_pos) = max_score;
       } else {
-        const double lg2_nb = lg2[nb];
-        const double lg2_nB = lg2[nB];
+        const double lg2_nb = lg2_lookup(nb);
+        const double lg2_nB = lg2_lookup(nB);
         const double ic_sum =
-          a_and_b * (lg2[a_and_b] + offset_a - lg2_nb) +
-          a_and_B * (lg2[a_and_B] + offset_a - lg2_nB) +
-          A_and_b * (lg2[A_and_b] + offset_A - lg2_nb) +
-          A_and_B * (lg2[A_and_B] + offset_A - lg2_nB);
+          a_and_b * (lg2_lookup(a_and_b) + offset_a - lg2_nb) +
+          a_and_B * (lg2_lookup(a_and_B) + offset_a - lg2_nB) +
+          A_and_b * (lg2_lookup(A_and_b) + offset_A - lg2_nb) +
+          A_and_B * (lg2_lookup(A_and_B) + offset_A - lg2_nB);
         score(a_pos, b_pos) =
           max_score - static_cast<cost>(ic_sum * max_over_tips);
       }
diff --git a/inst/include/TreeDist/types.h b/inst/include/TreeDist/types.h
index c953989b..60b49ec0 100644
--- a/inst/include/TreeDist/types.h
+++ b/inst/include/TreeDist/types.h
@@ -16,6 +16,10 @@ namespace TreeDist {
   using int32 = int_fast32_t;
   using cost  = int_fast64_t;
 
+  // Primary type for split/tip/bin counters. int32 benchmarks faster than int16
+  // (avoids sign-extension when mixing with SplitList int32 members).
+  using split_int = int32;
+
   using lap_dim = int;
   using lap_row = lap_dim;
   using lap_col = lap_dim;
diff --git a/man/HierarchicalMutualInfo.Rd b/man/HierarchicalMutualInfo.Rd
index 193b18ba..0da33247 100644
--- a/man/HierarchicalMutualInfo.Rd
+++ b/man/HierarchicalMutualInfo.Rd
@@ -124,17 +124,17 @@ AHMI(tree1, tree2, precision = 0.1)
 \insertAllCited{}
 }
 \seealso{
-Other tree distances: 
-\code{\link{JaccardRobinsonFoulds}()},
-\code{\link{KendallColijn}()},
-\code{\link{MASTSize}()},
-\code{\link{MatchingSplitDistance}()},
-\code{\link{NNIDist}()},
-\code{\link{NyeSimilarity}()},
-\code{\link{PathDist}()},
+Other tree distances:
+\code{\link[=JaccardRobinsonFoulds]{JaccardRobinsonFoulds()}},
+\code{\link[=KendallColijn]{KendallColijn()}},
+\code{\link[=MASTSize]{MASTSize()}},
+\code{\link[=MatchingSplitDistance]{MatchingSplitDistance()}},
+\code{\link[=NNIDist]{NNIDist()}},
+\code{\link[=NyeSimilarity]{NyeSimilarity()}},
+\code{\link[=PathDist]{PathDist()}},
 \code{\link{Robinson-Foulds}},
-\code{\link{SPRDist}()},
-\code{\link{TransferDist}()},
-\code{\link{TreeDistance}()}
+\code{\link[=SPRDist]{SPRDist()}},
+\code{\link[=TransferDist]{TransferDist()}},
+\code{\link[=TreeDistance]{TreeDistance()}}
 }
 \concept{tree distances}
diff --git a/man/Islands.Rd b/man/Islands.Rd
index 41c24d1b..9bfcfb4a 100644
--- a/man/Islands.Rd
+++ b/man/Islands.Rd
@@ -64,13 +64,13 @@ par(oPar)
 \insertAllCited{}
 }
 \seealso{
-Other tree space functions: 
-\code{\link{MSTSegments}()},
-\code{\link{MapTrees}()},
-\code{\link{MappingQuality}()},
-\code{\link{SpectralEigens}()},
+Other tree space functions:
+\code{\link[=MSTSegments]{MSTSegments()}},
+\code{\link[=MapTrees]{MapTrees()}},
+\code{\link[=MappingQuality]{MappingQuality()}},
+\code{\link[=SpectralEigens]{SpectralEigens()}},
 \code{\link{cluster-statistics}},
-\code{\link{median.multiPhylo}()}
+\code{\link[=median.multiPhylo]{median.multiPhylo()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/JaccardRobinsonFoulds.Rd b/man/JaccardRobinsonFoulds.Rd
index 7e515be7..ad70a60f 100644
--- a/man/JaccardRobinsonFoulds.Rd
+++ b/man/JaccardRobinsonFoulds.Rd
@@ -126,18 +126,18 @@ VisualizeMatching(JRF2, tree1, tree2, matchZeros = FALSE)
 \insertAllCited{}
 }
 \seealso{
-Other tree distances: 
-\code{\link{HierarchicalMutualInfo}()},
-\code{\link{KendallColijn}()},
-\code{\link{MASTSize}()},
-\code{\link{MatchingSplitDistance}()},
-\code{\link{NNIDist}()},
-\code{\link{NyeSimilarity}()},
-\code{\link{PathDist}()},
+Other tree distances:
+\code{\link[=HierarchicalMutualInfo]{HierarchicalMutualInfo()}},
+\code{\link[=KendallColijn]{KendallColijn()}},
+\code{\link[=MASTSize]{MASTSize()}},
+\code{\link[=MatchingSplitDistance]{MatchingSplitDistance()}},
+\code{\link[=NNIDist]{NNIDist()}},
+\code{\link[=NyeSimilarity]{NyeSimilarity()}},
+\code{\link[=PathDist]{PathDist()}},
 \code{\link{Robinson-Foulds}},
-\code{\link{SPRDist}()},
-\code{\link{TransferDist}()},
-\code{\link{TreeDistance}()}
+\code{\link[=SPRDist]{SPRDist()}},
+\code{\link[=TransferDist]{TransferDist()}},
+\code{\link[=TreeDistance]{TreeDistance()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/KMeansPP.Rd b/man/KMeansPP.Rd
index e279beb3..cab7aebe 100644
--- a/man/KMeansPP.Rd
+++ b/man/KMeansPP.Rd
@@ -45,7 +45,7 @@ plot(x, col = plusters$cluster, pch = rep(15:19, each = 5))
 \seealso{
 \code{\link[stats]{kmeans}}
 
-Other cluster functions: 
+Other cluster functions:
 \code{\link{cluster-statistics}}
 }
 \author{
diff --git a/man/KendallColijn.Rd b/man/KendallColijn.Rd
index 57d01772..8bb666b0 100644
--- a/man/KendallColijn.Rd
+++ b/man/KendallColijn.Rd
@@ -125,18 +125,18 @@ KCDiameter(trees)
 is a more sophisticated, if more cumbersome, implementation that supports
 lambda > 0, i.e. use of edge lengths in tree comparison.
 
-Other tree distances: 
-\code{\link{HierarchicalMutualInfo}()},
-\code{\link{JaccardRobinsonFoulds}()},
-\code{\link{MASTSize}()},
-\code{\link{MatchingSplitDistance}()},
-\code{\link{NNIDist}()},
-\code{\link{NyeSimilarity}()},
-\code{\link{PathDist}()},
+Other tree distances:
+\code{\link[=HierarchicalMutualInfo]{HierarchicalMutualInfo()}},
+\code{\link[=JaccardRobinsonFoulds]{JaccardRobinsonFoulds()}},
+\code{\link[=MASTSize]{MASTSize()}},
+\code{\link[=MatchingSplitDistance]{MatchingSplitDistance()}},
+\code{\link[=NNIDist]{NNIDist()}},
+\code{\link[=NyeSimilarity]{NyeSimilarity()}},
+\code{\link[=PathDist]{PathDist()}},
 \code{\link{Robinson-Foulds}},
-\code{\link{SPRDist}()},
-\code{\link{TransferDist}()},
-\code{\link{TreeDistance}()}
+\code{\link[=SPRDist]{SPRDist()}},
+\code{\link[=TransferDist]{TransferDist()}},
+\code{\link[=TreeDistance]{TreeDistance()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/MASTSize.Rd b/man/MASTSize.Rd
index db028b0a..a50bf2d1 100644
--- a/man/MASTSize.Rd
+++ b/man/MASTSize.Rd
@@ -62,18 +62,18 @@ CompareAll(as.phylo(0:4, 8), MASTInfo)
 \code{\link[phangorn:mast]{phangorn::mast()}}, a slower implementation that also lists the
 leaves contained within the subtree.
 
-Other tree distances: 
-\code{\link{HierarchicalMutualInfo}()},
-\code{\link{JaccardRobinsonFoulds}()},
-\code{\link{KendallColijn}()},
-\code{\link{MatchingSplitDistance}()},
-\code{\link{NNIDist}()},
-\code{\link{NyeSimilarity}()},
-\code{\link{PathDist}()},
+Other tree distances:
+\code{\link[=HierarchicalMutualInfo]{HierarchicalMutualInfo()}},
+\code{\link[=JaccardRobinsonFoulds]{JaccardRobinsonFoulds()}},
+\code{\link[=KendallColijn]{KendallColijn()}},
+\code{\link[=MatchingSplitDistance]{MatchingSplitDistance()}},
+\code{\link[=NNIDist]{NNIDist()}},
+\code{\link[=NyeSimilarity]{NyeSimilarity()}},
+\code{\link[=PathDist]{PathDist()}},
 \code{\link{Robinson-Foulds}},
-\code{\link{SPRDist}()},
-\code{\link{TransferDist}()},
-\code{\link{TreeDistance}()}
+\code{\link[=SPRDist]{SPRDist()}},
+\code{\link[=TransferDist]{TransferDist()}},
+\code{\link[=TreeDistance]{TreeDistance()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/MCITree.Rd b/man/MCITree.Rd
index 8a119ba0..18528bab 100644
--- a/man/MCITree.Rd
+++ b/man/MCITree.Rd
@@ -47,8 +47,8 @@ SplitwiseInfo(mcc)
 
 }
 \seealso{
-Other summary trees: 
-\code{\link{TransferConsensus}()}
+Other summary trees:
+\code{\link[=TransferConsensus]{TransferConsensus()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/MSTSegments.Rd b/man/MSTSegments.Rd
index 1ac77390..0b3c5186 100644
--- a/man/MSTSegments.Rd
+++ b/man/MSTSegments.Rd
@@ -70,13 +70,13 @@ PlotTools::SpectrumLegend(
 \insertAllCited{}
 }
 \seealso{
-Other tree space functions: 
-\code{\link{Islands}()},
-\code{\link{MapTrees}()},
-\code{\link{MappingQuality}()},
-\code{\link{SpectralEigens}()},
+Other tree space functions:
+\code{\link[=Islands]{Islands()}},
+\code{\link[=MapTrees]{MapTrees()}},
+\code{\link[=MappingQuality]{MappingQuality()}},
+\code{\link[=SpectralEigens]{SpectralEigens()}},
 \code{\link{cluster-statistics}},
-\code{\link{median.multiPhylo}()}
+\code{\link[=median.multiPhylo]{median.multiPhylo()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/MapTrees.Rd b/man/MapTrees.Rd
index 59890840..69f9ed6d 100644
--- a/man/MapTrees.Rd
+++ b/man/MapTrees.Rd
@@ -110,13 +110,13 @@ this application.  The application itself can be cited using
 Full detail of tree space analysis in R is provided in the accompanying
 \href{https://ms609.github.io/TreeDist/articles/treespace.html}{vignette}.
 
-Other tree space functions: 
-\code{\link{Islands}()},
-\code{\link{MSTSegments}()},
-\code{\link{MappingQuality}()},
-\code{\link{SpectralEigens}()},
+Other tree space functions:
+\code{\link[=Islands]{Islands()}},
+\code{\link[=MSTSegments]{MSTSegments()}},
+\code{\link[=MappingQuality]{MappingQuality()}},
+\code{\link[=SpectralEigens]{SpectralEigens()}},
 \code{\link{cluster-statistics}},
-\code{\link{median.multiPhylo}()}
+\code{\link[=median.multiPhylo]{median.multiPhylo()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/MappingQuality.Rd b/man/MappingQuality.Rd
index 05751dfb..e9c77c05 100644
--- a/man/MappingQuality.Rd
+++ b/man/MappingQuality.Rd
@@ -41,13 +41,13 @@ MappingQuality(distances, dist(mapping), 4)
 \insertAllCited{}
 }
 \seealso{
-Other tree space functions: 
-\code{\link{Islands}()},
-\code{\link{MSTSegments}()},
-\code{\link{MapTrees}()},
-\code{\link{SpectralEigens}()},
+Other tree space functions:
+\code{\link[=Islands]{Islands()}},
+\code{\link[=MSTSegments]{MSTSegments()}},
+\code{\link[=MapTrees]{MapTrees()}},
+\code{\link[=SpectralEigens]{SpectralEigens()}},
 \code{\link{cluster-statistics}},
-\code{\link{median.multiPhylo}()}
+\code{\link[=median.multiPhylo]{median.multiPhylo()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/MatchingSplitDistance.Rd b/man/MatchingSplitDistance.Rd
index b5d64b27..5b1eb124 100644
--- a/man/MatchingSplitDistance.Rd
+++ b/man/MatchingSplitDistance.Rd
@@ -84,18 +84,18 @@ VisualizeMatching(MatchingSplitDistance, TreeTools::BalancedTree(6),
 \insertAllCited{}
 }
 \seealso{
-Other tree distances: 
-\code{\link{HierarchicalMutualInfo}()},
-\code{\link{JaccardRobinsonFoulds}()},
-\code{\link{KendallColijn}()},
-\code{\link{MASTSize}()},
-\code{\link{NNIDist}()},
-\code{\link{NyeSimilarity}()},
-\code{\link{PathDist}()},
+Other tree distances:
+\code{\link[=HierarchicalMutualInfo]{HierarchicalMutualInfo()}},
+\code{\link[=JaccardRobinsonFoulds]{JaccardRobinsonFoulds()}},
+\code{\link[=KendallColijn]{KendallColijn()}},
+\code{\link[=MASTSize]{MASTSize()}},
+\code{\link[=NNIDist]{NNIDist()}},
+\code{\link[=NyeSimilarity]{NyeSimilarity()}},
+\code{\link[=PathDist]{PathDist()}},
 \code{\link{Robinson-Foulds}},
-\code{\link{SPRDist}()},
-\code{\link{TransferDist}()},
-\code{\link{TreeDistance}()}
+\code{\link[=SPRDist]{SPRDist()}},
+\code{\link[=TransferDist]{TransferDist()}},
+\code{\link[=TreeDistance]{TreeDistance()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/NNIDist.Rd b/man/NNIDist.Rd
index 5e24301b..23ed0839 100644
--- a/man/NNIDist.Rd
+++ b/man/NNIDist.Rd
@@ -101,18 +101,18 @@ CompareAll(as.phylo(30:33, 8), NNIDist)
 \insertAllCited{}
 }
 \seealso{
-Other tree distances: 
-\code{\link{HierarchicalMutualInfo}()},
-\code{\link{JaccardRobinsonFoulds}()},
-\code{\link{KendallColijn}()},
-\code{\link{MASTSize}()},
-\code{\link{MatchingSplitDistance}()},
-\code{\link{NyeSimilarity}()},
-\code{\link{PathDist}()},
+Other tree distances:
+\code{\link[=HierarchicalMutualInfo]{HierarchicalMutualInfo()}},
+\code{\link[=JaccardRobinsonFoulds]{JaccardRobinsonFoulds()}},
+\code{\link[=KendallColijn]{KendallColijn()}},
+\code{\link[=MASTSize]{MASTSize()}},
+\code{\link[=MatchingSplitDistance]{MatchingSplitDistance()}},
+\code{\link[=NyeSimilarity]{NyeSimilarity()}},
+\code{\link[=PathDist]{PathDist()}},
 \code{\link{Robinson-Foulds}},
-\code{\link{SPRDist}()},
-\code{\link{TransferDist}()},
-\code{\link{TreeDistance}()}
+\code{\link[=SPRDist]{SPRDist()}},
+\code{\link[=TransferDist]{TransferDist()}},
+\code{\link[=TreeDistance]{TreeDistance()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/NyeSimilarity.Rd b/man/NyeSimilarity.Rd
index d984b129..21a40aee 100644
--- a/man/NyeSimilarity.Rd
+++ b/man/NyeSimilarity.Rd
@@ -122,18 +122,18 @@ NyeSimilarity(as.phylo(0:5, nTip = 8), similarity = FALSE)
 \insertAllCited{}
 }
 \seealso{
-Other tree distances: 
-\code{\link{HierarchicalMutualInfo}()},
-\code{\link{JaccardRobinsonFoulds}()},
-\code{\link{KendallColijn}()},
-\code{\link{MASTSize}()},
-\code{\link{MatchingSplitDistance}()},
-\code{\link{NNIDist}()},
-\code{\link{PathDist}()},
+Other tree distances:
+\code{\link[=HierarchicalMutualInfo]{HierarchicalMutualInfo()}},
+\code{\link[=JaccardRobinsonFoulds]{JaccardRobinsonFoulds()}},
+\code{\link[=KendallColijn]{KendallColijn()}},
+\code{\link[=MASTSize]{MASTSize()}},
+\code{\link[=MatchingSplitDistance]{MatchingSplitDistance()}},
+\code{\link[=NNIDist]{NNIDist()}},
+\code{\link[=PathDist]{PathDist()}},
 \code{\link{Robinson-Foulds}},
-\code{\link{SPRDist}()},
-\code{\link{TransferDist}()},
-\code{\link{TreeDistance}()}
+\code{\link[=SPRDist]{SPRDist()}},
+\code{\link[=TransferDist]{TransferDist()}},
+\code{\link[=TreeDistance]{TreeDistance()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/PathDist.Rd b/man/PathDist.Rd
index 1192d4ce..17a0a094 100644
--- a/man/PathDist.Rd
+++ b/man/PathDist.Rd
@@ -68,18 +68,18 @@ PathDist(as.phylo(30:33, 8))
 \insertAllCited{}
 }
 \seealso{
-Other tree distances: 
-\code{\link{HierarchicalMutualInfo}()},
-\code{\link{JaccardRobinsonFoulds}()},
-\code{\link{KendallColijn}()},
-\code{\link{MASTSize}()},
-\code{\link{MatchingSplitDistance}()},
-\code{\link{NNIDist}()},
-\code{\link{NyeSimilarity}()},
+Other tree distances:
+\code{\link[=HierarchicalMutualInfo]{HierarchicalMutualInfo()}},
+\code{\link[=JaccardRobinsonFoulds]{JaccardRobinsonFoulds()}},
+\code{\link[=KendallColijn]{KendallColijn()}},
+\code{\link[=MASTSize]{MASTSize()}},
+\code{\link[=MatchingSplitDistance]{MatchingSplitDistance()}},
+\code{\link[=NNIDist]{NNIDist()}},
+\code{\link[=NyeSimilarity]{NyeSimilarity()}},
 \code{\link{Robinson-Foulds}},
-\code{\link{SPRDist}()},
-\code{\link{TransferDist}()},
-\code{\link{TreeDistance}()}
+\code{\link[=SPRDist]{SPRDist()}},
+\code{\link[=TransferDist]{TransferDist()}},
+\code{\link[=TreeDistance]{TreeDistance()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/Robinson-Foulds.Rd b/man/Robinson-Foulds.Rd
index 7510e094..1b4c656b 100644
--- a/man/Robinson-Foulds.Rd
+++ b/man/Robinson-Foulds.Rd
@@ -144,18 +144,18 @@ VisualizeMatching(InfoRobinsonFoulds, balanced7, pectinate7)
 \seealso{
 Display paired splits: \code{\link[=VisualizeMatching]{VisualizeMatching()}}
 
-Other tree distances: 
-\code{\link{HierarchicalMutualInfo}()},
-\code{\link{JaccardRobinsonFoulds}()},
-\code{\link{KendallColijn}()},
-\code{\link{MASTSize}()},
-\code{\link{MatchingSplitDistance}()},
-\code{\link{NNIDist}()},
-\code{\link{NyeSimilarity}()},
-\code{\link{PathDist}()},
-\code{\link{SPRDist}()},
-\code{\link{TransferDist}()},
-\code{\link{TreeDistance}()}
+Other tree distances:
+\code{\link[=HierarchicalMutualInfo]{HierarchicalMutualInfo()}},
+\code{\link[=JaccardRobinsonFoulds]{JaccardRobinsonFoulds()}},
+\code{\link[=KendallColijn]{KendallColijn()}},
+\code{\link[=MASTSize]{MASTSize()}},
+\code{\link[=MatchingSplitDistance]{MatchingSplitDistance()}},
+\code{\link[=NNIDist]{NNIDist()}},
+\code{\link[=NyeSimilarity]{NyeSimilarity()}},
+\code{\link[=PathDist]{PathDist()}},
+\code{\link[=SPRDist]{SPRDist()}},
+\code{\link[=TransferDist]{TransferDist()}},
+\code{\link[=TreeDistance]{TreeDistance()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/SPRDist.Rd b/man/SPRDist.Rd
index 5075abda..572f54a8 100644
--- a/man/SPRDist.Rd
+++ b/man/SPRDist.Rd
@@ -74,18 +74,18 @@ functions \code{USPRDist()} and \code{ReplugDist()}.
 the \insertCite{deOliveira2008;textual}{TreeDist} algorithm but can crash
 when sent trees of certain formats, and tends to have a longer running time.
 
-Other tree distances: 
-\code{\link{HierarchicalMutualInfo}()},
-\code{\link{JaccardRobinsonFoulds}()},
-\code{\link{KendallColijn}()},
-\code{\link{MASTSize}()},
-\code{\link{MatchingSplitDistance}()},
-\code{\link{NNIDist}()},
-\code{\link{NyeSimilarity}()},
-\code{\link{PathDist}()},
+Other tree distances:
+\code{\link[=HierarchicalMutualInfo]{HierarchicalMutualInfo()}},
+\code{\link[=JaccardRobinsonFoulds]{JaccardRobinsonFoulds()}},
+\code{\link[=KendallColijn]{KendallColijn()}},
+\code{\link[=MASTSize]{MASTSize()}},
+\code{\link[=MatchingSplitDistance]{MatchingSplitDistance()}},
+\code{\link[=NNIDist]{NNIDist()}},
+\code{\link[=NyeSimilarity]{NyeSimilarity()}},
+\code{\link[=PathDist]{PathDist()}},
 \code{\link{Robinson-Foulds}},
-\code{\link{TransferDist}()},
-\code{\link{TreeDistance}()}
+\code{\link[=TransferDist]{TransferDist()}},
+\code{\link[=TreeDistance]{TreeDistance()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/SpectralEigens.Rd b/man/SpectralEigens.Rd
index 16067450..546e2b98 100644
--- a/man/SpectralEigens.Rd
+++ b/man/SpectralEigens.Rd
@@ -37,13 +37,13 @@ plot(eigens, pch = 15, col = clusts$cluster)
 plot(cmdscale(distances), pch = 15, col = clusts$cluster)
 }
 \seealso{
-Other tree space functions: 
-\code{\link{Islands}()},
-\code{\link{MSTSegments}()},
-\code{\link{MapTrees}()},
-\code{\link{MappingQuality}()},
+Other tree space functions:
+\code{\link[=Islands]{Islands()}},
+\code{\link[=MSTSegments]{MSTSegments()}},
+\code{\link[=MapTrees]{MapTrees()}},
+\code{\link[=MappingQuality]{MappingQuality()}},
 \code{\link{cluster-statistics}},
-\code{\link{median.multiPhylo}()}
+\code{\link[=median.multiPhylo]{median.multiPhylo()}}
 }
 \author{
 Adapted by MRS from script by \href{https://rpubs.com/nurakawa/spectral-clustering}{Nura Kawa}
diff --git a/man/SplitEntropy.Rd b/man/SplitEntropy.Rd
index 4388b274..494dd322 100644
--- a/man/SplitEntropy.Rd
+++ b/man/SplitEntropy.Rd
@@ -40,8 +40,8 @@ SplitEntropy(c(A, A, A, B, B, B), c(A, A, B, B, B, B))
 \insertAllCited{}
 }
 \seealso{
-Other information functions: 
-\code{\link{SplitSharedInformation}()},
+Other information functions:
+\code{\link[=SplitSharedInformation]{SplitSharedInformation()}},
 \code{\link{TreeInfo}}
 }
 \author{
diff --git a/man/SplitSharedInformation.Rd b/man/SplitSharedInformation.Rd
index 57560887..25f1fc78 100644
--- a/man/SplitSharedInformation.Rd
+++ b/man/SplitSharedInformation.Rd
@@ -87,8 +87,8 @@ splits.
 \insertAllCited{}
 }
 \seealso{
-Other information functions: 
-\code{\link{SplitEntropy}()},
+Other information functions:
+\code{\link[=SplitEntropy]{SplitEntropy()}},
 \code{\link{TreeInfo}}
 }
 \author{
diff --git a/man/TransferConsensus.Rd b/man/TransferConsensus.Rd
index 219111ab..1c964d4d 100644
--- a/man/TransferConsensus.Rd
+++ b/man/TransferConsensus.Rd
@@ -66,7 +66,7 @@ cat("Transfer consensus splits:", NSplits(tc), "\n")
 \insertAllCited{}
 }
 \seealso{
-Other summary trees: 
-\code{\link{MCITree}()}
+Other summary trees:
+\code{\link[=MCITree]{MCITree()}}
 }
 \concept{summary trees}
diff --git a/man/TransferDist.Rd b/man/TransferDist.Rd
index 2439130f..d9361014 100644
--- a/man/TransferDist.Rd
+++ b/man/TransferDist.Rd
@@ -102,17 +102,17 @@ TransferDist(as.phylo(0:5, 8))
 \insertAllCited{}
 }
 \seealso{
-Other tree distances: 
-\code{\link{HierarchicalMutualInfo}()},
-\code{\link{JaccardRobinsonFoulds}()},
-\code{\link{KendallColijn}()},
-\code{\link{MASTSize}()},
-\code{\link{MatchingSplitDistance}()},
-\code{\link{NNIDist}()},
-\code{\link{NyeSimilarity}()},
-\code{\link{PathDist}()},
+Other tree distances:
+\code{\link[=HierarchicalMutualInfo]{HierarchicalMutualInfo()}},
+\code{\link[=JaccardRobinsonFoulds]{JaccardRobinsonFoulds()}},
+\code{\link[=KendallColijn]{KendallColijn()}},
+\code{\link[=MASTSize]{MASTSize()}},
+\code{\link[=MatchingSplitDistance]{MatchingSplitDistance()}},
+\code{\link[=NNIDist]{NNIDist()}},
+\code{\link[=NyeSimilarity]{NyeSimilarity()}},
+\code{\link[=PathDist]{PathDist()}},
 \code{\link{Robinson-Foulds}},
-\code{\link{SPRDist}()},
-\code{\link{TreeDistance}()}
+\code{\link[=SPRDist]{SPRDist()}},
+\code{\link[=TreeDistance]{TreeDistance()}}
 }
 \concept{tree distances}
diff --git a/man/TreeDist-package.Rd b/man/TreeDist-package.Rd
index 15872b54..7d4da0d5 100644
--- a/man/TreeDist-package.Rd
+++ b/man/TreeDist-package.Rd
@@ -134,6 +134,11 @@ Geodesic distance
 \author{
 \strong{Maintainer}: Martin R. Smith \email{martin.smith@durham.ac.uk} (\href{https://orcid.org/0000-0001-5660-1727}{ORCID}) [copyright holder, programmer]
 
+Authors:
+\itemize{
+  \item Martin R. Smith \email{martin.smith@durham.ac.uk} (\href{https://orcid.org/0000-0001-5660-1727}{ORCID}) [copyright holder, programmer]
+}
+
 Other contributors:
 \itemize{
   \item Roy Jonker \email{roy_jonker@magiclogic.com} (LAP algorithm) [programmer, copyright holder]
diff --git a/man/TreeDistance.Rd b/man/TreeDistance.Rd
index f081b6d3..3bda3fed 100644
--- a/man/TreeDistance.Rd
+++ b/man/TreeDistance.Rd
@@ -318,18 +318,18 @@ MutualClusteringInfoSplits(splits1, splits2)
 \insertAllCited{}
 }
 \seealso{
-Other tree distances: 
-\code{\link{HierarchicalMutualInfo}()},
-\code{\link{JaccardRobinsonFoulds}()},
-\code{\link{KendallColijn}()},
-\code{\link{MASTSize}()},
-\code{\link{MatchingSplitDistance}()},
-\code{\link{NNIDist}()},
-\code{\link{NyeSimilarity}()},
-\code{\link{PathDist}()},
+Other tree distances:
+\code{\link[=HierarchicalMutualInfo]{HierarchicalMutualInfo()}},
+\code{\link[=JaccardRobinsonFoulds]{JaccardRobinsonFoulds()}},
+\code{\link[=KendallColijn]{KendallColijn()}},
+\code{\link[=MASTSize]{MASTSize()}},
+\code{\link[=MatchingSplitDistance]{MatchingSplitDistance()}},
+\code{\link[=NNIDist]{NNIDist()}},
+\code{\link[=NyeSimilarity]{NyeSimilarity()}},
+\code{\link[=PathDist]{PathDist()}},
 \code{\link{Robinson-Foulds}},
-\code{\link{SPRDist}()},
-\code{\link{TransferDist}()}
+\code{\link[=SPRDist]{SPRDist()}},
+\code{\link[=TransferDist]{TransferDist()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/TreeInfo.Rd b/man/TreeInfo.Rd
index a31234d8..9f80d53f 100644
--- a/man/TreeInfo.Rd
+++ b/man/TreeInfo.Rd
@@ -205,9 +205,9 @@ An introduction to the phylogenetic information content of a split is given
 in \href{https://ms609.github.io/TreeTools/reference/SplitInformation.html}{\code{SplitInformation()}}
 and in a \href{https://ms609.github.io/TreeDist/articles/information.html}{package vignette}.
 
-Other information functions: 
-\code{\link{SplitEntropy}()},
-\code{\link{SplitSharedInformation}()}
+Other information functions:
+\code{\link[=SplitEntropy]{SplitEntropy()}},
+\code{\link[=SplitSharedInformation]{SplitSharedInformation()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/cluster-statistics.Rd b/man/cluster-statistics.Rd
index eb997329..e892a26d 100644
--- a/man/cluster-statistics.Rd
+++ b/man/cluster-statistics.Rd
@@ -87,16 +87,16 @@ MeanNN(points, cluster)
 MeanMSTEdge(points, cluster)
 }
 \seealso{
-Other tree space functions: 
-\code{\link{Islands}()},
-\code{\link{MSTSegments}()},
-\code{\link{MapTrees}()},
-\code{\link{MappingQuality}()},
-\code{\link{SpectralEigens}()},
-\code{\link{median.multiPhylo}()}
-
-Other cluster functions: 
-\code{\link{KMeansPP}()}
+Other tree space functions:
+\code{\link[=Islands]{Islands()}},
+\code{\link[=MSTSegments]{MSTSegments()}},
+\code{\link[=MapTrees]{MapTrees()}},
+\code{\link[=MappingQuality]{MappingQuality()}},
+\code{\link[=SpectralEigens]{SpectralEigens()}},
+\code{\link[=median.multiPhylo]{median.multiPhylo()}}
+
+Other cluster functions:
+\code{\link[=KMeansPP]{KMeansPP()}}
 }
 \author{
 \href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
diff --git a/man/median.multiPhylo.Rd b/man/median.multiPhylo.Rd
index 43282ceb..ad1fe2f8 100644
--- a/man/median.multiPhylo.Rd
+++ b/man/median.multiPhylo.Rd
@@ -79,12 +79,12 @@ Consensus methods:
 \code{\link[ape:consensus]{ape::consensus()}},
 \code{\link[TreeTools:ConsensusWithout]{TreeTools::ConsensusWithout()}}
 
-Other tree space functions: 
-\code{\link{Islands}()},
-\code{\link{MSTSegments}()},
-\code{\link{MapTrees}()},
-\code{\link{MappingQuality}()},
-\code{\link{SpectralEigens}()},
+Other tree space functions:
+\code{\link[=Islands]{Islands()}},
+\code{\link[=MSTSegments]{MSTSegments()}},
+\code{\link[=MapTrees]{MapTrees()}},
+\code{\link[=MappingQuality]{MappingQuality()}},
+\code{\link[=SpectralEigens]{SpectralEigens()}},
 \code{\link{cluster-statistics}}
 }
 \author{
diff --git a/memcheck/tests.R b/memcheck/tests.R
index e85bebd2..14c7388a 100644
--- a/memcheck/tests.R
+++ b/memcheck/tests.R
@@ -1,4 +1,3 @@
 # Code to be run with
 #   R -d "valgrind --tool=memcheck --leak-check=full --error-exitcode=1" --vanilla < memcheck/thisfile.R
-library("TreeDist")
-devtools::test()
+testthat::test_local()
diff --git a/memcheck/vignettes.R b/memcheck/vignettes.R
index 0c02f716..9c2ac44c 100644
--- a/memcheck/vignettes.R
+++ b/memcheck/vignettes.R
@@ -1,3 +1,3 @@
 # Code to be run with
 #   R -d "valgrind --tool=memcheck --leak-check=full --error-exitcode=1" --vanilla < memcheck/thisfile.R
-devtools::build_vignettes(install = FALSE)
+pkgdown::build_articles(preview = FALSE)
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index c78c3efc..116f48d3 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -727,15 +727,16 @@ BEGIN_RCPP
 END_RCPP
 }
 // cpp_shared_phylo
-List cpp_shared_phylo(const RawMatrix& x, const RawMatrix& y, const IntegerVector& nTip);
-RcppExport SEXP _TreeDist_cpp_shared_phylo(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP) {
+List cpp_shared_phylo(const RawMatrix& x, const RawMatrix& y, const IntegerVector& nTip, const bool force_slow);
+RcppExport SEXP _TreeDist_cpp_shared_phylo(SEXP xSEXP, SEXP ySEXP, SEXP nTipSEXP, SEXP force_slowSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< const RawMatrix& >::type x(xSEXP);
     Rcpp::traits::input_parameter< const RawMatrix& >::type y(ySEXP);
     Rcpp::traits::input_parameter< const IntegerVector& >::type nTip(nTipSEXP);
-    rcpp_result_gen = Rcpp::wrap(cpp_shared_phylo(x, y, nTip));
+    Rcpp::traits::input_parameter< const bool >::type force_slow(force_slowSEXP);
+    rcpp_result_gen = Rcpp::wrap(cpp_shared_phylo(x, y, nTip, force_slow));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -797,7 +798,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_TreeDist_cpp_jaccard_similarity", (DL_FUNC) &_TreeDist_cpp_jaccard_similarity, 5},
     {"_TreeDist_cpp_msi_distance", (DL_FUNC) &_TreeDist_cpp_msi_distance, 3},
     {"_TreeDist_cpp_mutual_clustering", (DL_FUNC) &_TreeDist_cpp_mutual_clustering, 3},
-    {"_TreeDist_cpp_shared_phylo", (DL_FUNC) &_TreeDist_cpp_shared_phylo, 3},
+    {"_TreeDist_cpp_shared_phylo", (DL_FUNC) &_TreeDist_cpp_shared_phylo, 4},
     {NULL, NULL, 0}
 };
 
diff --git a/src/day_1985.cpp b/src/day_1985.cpp
index 70ab4dbd..157bf073 100644
--- a/src/day_1985.cpp
+++ b/src/day_1985.cpp
@@ -98,7 +98,7 @@ double calc_consensus_info(const List &trees, const LogicalVector &phylo,
 
   std::vector<ClusterTable> tables;
   if (std::size_t(n_trees) > tables.max_size()) {
-    ASSERT(false && "Not enough memory for consensus of so many trees"); // LCOV_EXCL_LINE
+    ASSERT(false && "Not enough memory for consensus of so many trees"); // #nocov
   }
 
   tables.reserve(n_trees);
diff --git a/src/ints.h b/src/ints.h
index 50fd3cc7..3d424c4c 100644
--- a/src/ints.h
+++ b/src/ints.h
@@ -6,6 +6,7 @@
 // Re-export shared types to global scope for backward compatibility.
 using TreeDist::int16;
 using TreeDist::int32;
+using TreeDist::split_int;
 
 // Types used only within TreeDist's own source.
 using uint32 = uint_fast32_t;
diff --git a/src/pairwise_distances.cpp b/src/pairwise_distances.cpp
index 67db48ab..760e7ff8 100644
--- a/src/pairwise_distances.cpp
+++ b/src/pairwise_distances.cpp
@@ -34,12 +34,12 @@ using TreeTools::count_bits;
 // per-pair heap allocation.  Vectors grow lazily and are never shrunk.
 // ---------------------------------------------------------------------------
 struct MatchScratch {
-  std::vector<splitbit> a_canon;
-  std::vector<splitbit> b_canon;
-  std::vector<int16>    a_order;
-  std::vector<int16>    b_order;
-  std::vector<int16>    a_match;
-  std::vector<int16>    b_match;
+  std::vector<splitbit>   a_canon;
+  std::vector<splitbit>   b_canon;
+  std::vector<split_int>  a_order;
+  std::vector<split_int>  b_order;
+  std::vector<split_int>  a_match;
+  std::vector<split_int>  b_match;
 };
 
 // ---------------------------------------------------------------------------
@@ -59,19 +59,19 @@ struct MatchScratch {
 //   a_match[ai] = bi+1 if split ai matched split bi, else 0.
 //   b_match[bi] = ai+1 if split bi matched split ai, else 0.
 // ---------------------------------------------------------------------------
-static int16 find_exact_matches(
+static split_int find_exact_matches(
     const SplitList& a, const SplitList& b,
     const int32 n_tips,
     MatchScratch& scratch
 ) {
-  const int16 n_bins   = a.n_bins;
-  const int16 last_bin = n_bins - 1;
+  const split_int n_bins   = a.n_bins;
+  const split_int last_bin = n_bins - 1;
   const splitbit last_mask = (n_tips % SL_BIN_SIZE == 0)
     ? ~splitbit(0)
     : (splitbit(1) << (n_tips % SL_BIN_SIZE)) - 1;
 
-  const int16 a_n = a.n_splits;
-  const int16 b_n = b.n_splits;
+  const split_int a_n = a.n_splits;
+  const split_int b_n = b.n_splits;
 
   // Ensure buffers are large enough (grow lazily, never shrink)
   const size_t a_canon_sz = static_cast<size_t>(a_n) * n_bins;
@@ -83,10 +83,10 @@ static int16 find_exact_matches(
   if (scratch.a_match.size() < static_cast<size_t>(a_n)) scratch.a_match.resize(a_n);
   if (scratch.b_match.size() < static_cast<size_t>(b_n)) scratch.b_match.resize(b_n);
 
-  int16* a_match = scratch.a_match.data();
-  int16* b_match = scratch.b_match.data();
-  std::fill(a_match, a_match + a_n, int16(0));
-  std::fill(b_match, b_match + b_n, int16(0));
+  split_int* a_match = scratch.a_match.data();
+  split_int* b_match = scratch.b_match.data();
+  std::fill(a_match, a_match + a_n, split_int(0));
+  std::fill(b_match, b_match + b_n, split_int(0));
 
   if (a_n == 0 || b_n == 0) return 0;
 
@@ -94,17 +94,17 @@ static int16 find_exact_matches(
   splitbit* b_canon = scratch.b_canon.data();
 
   // --- 1. Compute canonical forms into flat buffers ---
-  for (int16 i = 0; i < a_n; ++i) {
+  for (split_int i = 0; i < a_n; ++i) {
     const bool flip = !(a.state[i][0] & 1);
-    for (int16 bin = 0; bin < n_bins; ++bin) {
+    for (split_int bin = 0; bin < n_bins; ++bin) {
       splitbit val = flip ? ~a.state[i][bin] : a.state[i][bin];
       if (bin == last_bin) val &= last_mask;
       a_canon[i * n_bins + bin] = val;
     }
   }
-  for (int16 i = 0; i < b_n; ++i) {
+  for (split_int i = 0; i < b_n; ++i) {
     const bool flip = !(b.state[i][0] & 1);
-    for (int16 bin = 0; bin < n_bins; ++bin) {
+    for (split_int bin = 0; bin < n_bins; ++bin) {
       splitbit val = flip ? ~b.state[i][bin] : b.state[i][bin];
       if (bin == last_bin) val &= last_mask;
       b_canon[i * n_bins + bin] = val;
@@ -112,8 +112,8 @@ static int16 find_exact_matches(
   }
 
   // --- 2. Sort index arrays by canonical form ---
-  auto canon_less = [&](const splitbit* canon, int16 i, int16 j) {
-    for (int16 bin = 0; bin < n_bins; ++bin) {
+  auto canon_less = [&](const splitbit* canon, split_int i, split_int j) {
+    for (split_int bin = 0; bin < n_bins; ++bin) {
       const splitbit vi = canon[i * n_bins + bin];
       const splitbit vj = canon[j * n_bins + bin];
       if (vi < vj) return true;
@@ -122,29 +122,29 @@ static int16 find_exact_matches(
     return false; // #nocov
   };
 
-  int16* a_order = scratch.a_order.data();
-  int16* b_order = scratch.b_order.data();
-  std::iota(a_order, a_order + a_n, int16(0));
-  std::iota(b_order, b_order + b_n, int16(0));
+  split_int* a_order = scratch.a_order.data();
+  split_int* b_order = scratch.b_order.data();
+  std::iota(a_order, a_order + a_n, split_int(0));
+  std::iota(b_order, b_order + b_n, split_int(0));
 
   std::sort(a_order, a_order + a_n,
-            [&](int16 i, int16 j) {
+            [&](split_int i, split_int j) {
               return canon_less(a_canon, i, j);
             });
   std::sort(b_order, b_order + b_n,
-            [&](int16 i, int16 j) {
+            [&](split_int i, split_int j) {
               return canon_less(b_canon, i, j);
             });
 
   // --- 3. Merge-scan to find matches ---
-  int16 exact_n = 0;
-  int16 ai_pos = 0, bi_pos = 0;
+  split_int exact_n = 0;
+  split_int ai_pos = 0, bi_pos = 0;
   while (ai_pos < a_n && bi_pos < b_n) {
-    const int16 ai = a_order[ai_pos];
-    const int16 bi = b_order[bi_pos];
+    const split_int ai = a_order[ai_pos];
+    const split_int bi = b_order[bi_pos];
 
     int cmp = 0;
-    for (int16 bin = 0; bin < n_bins; ++bin) {
+    for (split_int bin = 0; bin < n_bins; ++bin) {
       const splitbit va = a_canon[ai * n_bins + bin];
       const splitbit vb = b_canon[bi * n_bins + bin];
       if (va < vb) { cmp = -1; break; }
@@ -178,25 +178,25 @@ static double mutual_clustering_score(
 ) {
   if (a.n_splits == 0 || b.n_splits == 0 || n_tips == 0) return 0.0;
 
-  const int16 most_splits = std::max(a.n_splits, b.n_splits);
+  const split_int most_splits = std::max(a.n_splits, b.n_splits);
   const double n_tips_rcp = 1.0 / static_cast<double>(n_tips);
 
   constexpr cost max_score  = BIG;
   constexpr double over_max = 1.0 / static_cast<double>(BIG);
   const double max_over_tips = static_cast<double>(BIG) * n_tips_rcp;
-  const double lg2_n = lg2[n_tips];
+  const double lg2_n = lg2_lookup(n_tips);
 
   // --- Phase 1: O(n log n) exact-match detection ---
-  const int16 exact_n = find_exact_matches(a, b, n_tips, mscratch);
-  const int16* a_match = mscratch.a_match.data();
-  const int16* b_match = mscratch.b_match.data();
+  const split_int exact_n = find_exact_matches(a, b, n_tips, mscratch);
+  const split_int* a_match = mscratch.a_match.data();
+  const split_int* b_match = mscratch.b_match.data();
 
   // Accumulate exact-match score
   double exact_score = 0.0;
-  for (int16 ai = 0; ai < a.n_splits; ++ai) {
+  for (split_int ai = 0; ai < a.n_splits; ++ai) {
     if (a_match[ai]) {
-      const int16 na = a.in_split[ai];
-      const int16 nA = n_tips - na;
+      const split_int na = a.in_split[ai];
+      const split_int nA = n_tips - na;
       exact_score += TreeDist::ic_matching(na, nA, n_tips);
     }
   }
@@ -207,58 +207,58 @@ static double mutual_clustering_score(
   }
 
   // --- Phase 2: fill cost matrix for unmatched splits only (O(k²)) ---
-  const int16 lap_n = most_splits - exact_n;
+  const split_int lap_n = most_splits - exact_n;
 
   // Build index maps for unmatched splits
-  std::vector<int16> a_unmatch, b_unmatch;
+  std::vector<split_int> a_unmatch, b_unmatch;
   a_unmatch.reserve(lap_n);
   b_unmatch.reserve(lap_n);
-  for (int16 ai = 0; ai < a.n_splits; ++ai) {
+  for (split_int ai = 0; ai < a.n_splits; ++ai) {
     if (!a_match[ai]) a_unmatch.push_back(ai);
   }
-  for (int16 bi = 0; bi < b.n_splits; ++bi) {
+  for (split_int bi = 0; bi < b.n_splits; ++bi) {
     if (!b_match[bi]) b_unmatch.push_back(bi);
   }
 
   scratch.score_pool.resize(lap_n);
   cost_matrix& score = scratch.score_pool;
 
-  const int16 a_unmatched_n = static_cast<int16>(a_unmatch.size());
-  const int16 b_unmatched_n = static_cast<int16>(b_unmatch.size());
+  const split_int a_unmatched_n = static_cast<split_int>(a_unmatch.size());
+  const split_int b_unmatched_n = static_cast<split_int>(b_unmatch.size());
 
-  for (int16 a_pos = 0; a_pos < a_unmatched_n; ++a_pos) {
-    const int16 ai   = a_unmatch[a_pos];
-    const int16 na   = a.in_split[ai];
-    const int16 nA   = n_tips - na;
+  for (split_int a_pos = 0; a_pos < a_unmatched_n; ++a_pos) {
+    const split_int ai   = a_unmatch[a_pos];
+    const split_int na   = a.in_split[ai];
+    const split_int nA   = n_tips - na;
     const auto* a_row = a.state[ai];
 
-    const double offset_a = lg2_n - lg2[na];
-    const double offset_A = lg2_n - lg2[nA];
+    const double offset_a = lg2_n - lg2_lookup(na);
+    const double offset_A = lg2_n - lg2_lookup(nA);
 
-    for (int16 b_pos = 0; b_pos < b_unmatched_n; ++b_pos) {
-      const int16 bi   = b_unmatch[b_pos];
+    for (split_int b_pos = 0; b_pos < b_unmatched_n; ++b_pos) {
+      const split_int bi   = b_unmatch[b_pos];
       const auto* b_row = b.state[bi];
-      int16 a_and_b = 0;
-      for (int16 bin = 0; bin < a.n_bins; ++bin) {
+      split_int a_and_b = 0;
+      for (split_int bin = 0; bin < a.n_bins; ++bin) {
         a_and_b += count_bits(a_row[bin] & b_row[bin]);
       }
 
-      const int16 nb    = b.in_split[bi];
-      const int16 nB    = n_tips - nb;
-      const int16 a_and_B = na - a_and_b;
-      const int16 A_and_b = nb - a_and_b;
-      const int16 A_and_B = nA - A_and_b;
+      const split_int nb    = b.in_split[bi];
+      const split_int nB    = n_tips - nb;
+      const split_int a_and_B = na - a_and_b;
+      const split_int A_and_b = nb - a_and_b;
+      const split_int A_and_B = nA - A_and_b;
 
       if (a_and_b == A_and_b && a_and_b == a_and_B && a_and_b == A_and_B) {
         score(a_pos, b_pos) = max_score;
       } else {
-        const double lg2_nb = lg2[nb];
-        const double lg2_nB = lg2[nB];
+        const double lg2_nb = lg2_lookup(nb);
+        const double lg2_nB = lg2_lookup(nB);
         const double ic_sum =
-          a_and_b * (lg2[a_and_b] + offset_a - lg2_nb) +
-          a_and_B * (lg2[a_and_B] + offset_a - lg2_nB) +
-          A_and_b * (lg2[A_and_b] + offset_A - lg2_nb) +
-          A_and_B * (lg2[A_and_B] + offset_A - lg2_nB);
+          a_and_b * (lg2_lookup(a_and_b) + offset_a - lg2_nb) +
+          a_and_B * (lg2_lookup(a_and_B) + offset_a - lg2_nB) +
+          A_and_b * (lg2_lookup(A_and_b) + offset_A - lg2_nb) +
+          A_and_B * (lg2_lookup(A_and_B) + offset_A - lg2_nB);
         score(a_pos, b_pos) = max_score - static_cast<cost>(ic_sum * max_over_tips);
       }
     }
@@ -366,27 +366,27 @@ static double rf_info_score(
     const SplitList& a, const SplitList& b, const int32 n_tips,
     MatchScratch& mscratch
 ) {
-  const int16 a_n = a.n_splits;
-  const int16 b_n = b.n_splits;
+  const split_int a_n = a.n_splits;
+  const split_int b_n = b.n_splits;
   if (a_n == 0 || b_n == 0) return 0;
 
   // Use sort+merge to find exact matches in O(n log n)
-  const int16 exact_n = find_exact_matches(a, b, n_tips, mscratch);
+  const split_int exact_n = find_exact_matches(a, b, n_tips, mscratch);
   if (exact_n == 0) return 0;
 
   // Sum info contribution for each matched split in a
-  const int16* a_match = mscratch.a_match.data();
-  const double lg2_unrooted_n = lg2_unrooted[n_tips];
+  const split_int* a_match = mscratch.a_match.data();
+  const double lg2_unrooted_n = lg2_unrooted_lookup(n_tips);
   double score = 0;
-  for (int16 ai = 0; ai < a_n; ++ai) {
+  for (split_int ai = 0; ai < a_n; ++ai) {
     if (a_match[ai] == 0) continue;
-    int16 leaves_in_split = 0;
-    for (int16 bin = 0; bin < a.n_bins; ++bin) {
+    split_int leaves_in_split = 0;
+    for (split_int bin = 0; bin < a.n_bins; ++bin) {
       leaves_in_split += count_bits(a.state[ai][bin]);
     }
     score += lg2_unrooted_n
-           - lg2_rooted[leaves_in_split]
-           - lg2_rooted[n_tips - leaves_in_split];
+           - lg2_rooted_lookup(leaves_in_split)
+           - lg2_rooted_lookup(n_tips - leaves_in_split);
   }
   return score;
 }
@@ -446,48 +446,48 @@ static double msd_score(
     const SplitList& a, const SplitList& b, const int32 n_tips,
     LapScratch& scratch, MatchScratch& mscratch
 ) {
-  const int16 most_splits = std::max(a.n_splits, b.n_splits);
+  const split_int most_splits = std::max(a.n_splits, b.n_splits);
   if (most_splits == 0) return 0.0;
-  const bool  a_has_more  = (a.n_splits > b.n_splits);
-  const int16 a_extra     = a_has_more ? most_splits - b.n_splits : 0;
-  const int16 b_extra     = a_has_more ? 0 : most_splits - a.n_splits;
-  const int16 half_tips   = n_tips / 2;
-  const cost  max_score   = BIG / most_splits;
+  const bool      a_has_more  = (a.n_splits > b.n_splits);
+  const split_int a_extra     = a_has_more ? most_splits - b.n_splits : 0;
+  const split_int b_extra     = a_has_more ? 0 : most_splits - a.n_splits;
+  const split_int half_tips   = n_tips / 2;
+  const cost      max_score   = BIG / most_splits;
 
   // --- Phase 1: O(n log n) exact-match detection ---
-  const int16 exact_n = find_exact_matches(a, b, n_tips, mscratch);
-  const int16* a_match = mscratch.a_match.data();
-  const int16* b_match = mscratch.b_match.data();
+  const split_int exact_n = find_exact_matches(a, b, n_tips, mscratch);
+  const split_int* a_match = mscratch.a_match.data();
+  const split_int* b_match = mscratch.b_match.data();
 
   if (exact_n == b.n_splits || exact_n == a.n_splits) {
     return 0.0;
   }
 
   // --- Phase 2: fill cost matrix for unmatched splits only ---
-  const int16 lap_n = most_splits - exact_n;
+  const split_int lap_n = most_splits - exact_n;
 
-  std::vector<int16> a_unmatch, b_unmatch;
+  std::vector<split_int> a_unmatch, b_unmatch;
   a_unmatch.reserve(lap_n);
   b_unmatch.reserve(lap_n);
-  for (int16 ai = 0; ai < a.n_splits; ++ai) {
+  for (split_int ai = 0; ai < a.n_splits; ++ai) {
     if (!a_match[ai]) a_unmatch.push_back(ai);
   }
-  for (int16 bi = 0; bi < b.n_splits; ++bi) {
+  for (split_int bi = 0; bi < b.n_splits; ++bi) {
     if (!b_match[bi]) b_unmatch.push_back(bi);
   }
 
   scratch.score_pool.resize(lap_n);
   cost_matrix& score = scratch.score_pool;
 
-  const int16 a_unmatched_n = static_cast<int16>(a_unmatch.size());
-  const int16 b_unmatched_n = static_cast<int16>(b_unmatch.size());
+  const split_int a_unmatched_n = static_cast<split_int>(a_unmatch.size());
+  const split_int b_unmatched_n = static_cast<split_int>(b_unmatch.size());
 
-  for (int16 a_pos = 0; a_pos < a_unmatched_n; ++a_pos) {
-    const int16 ai = a_unmatch[a_pos];
-    for (int16 b_pos = 0; b_pos < b_unmatched_n; ++b_pos) {
-      const int16 bi = b_unmatch[b_pos];
+  for (split_int a_pos = 0; a_pos < a_unmatched_n; ++a_pos) {
+    const split_int ai = a_unmatch[a_pos];
+    for (split_int b_pos = 0; b_pos < b_unmatched_n; ++b_pos) {
+      const split_int bi = b_unmatch[b_pos];
       splitbit total = 0;
-      for (int16 bin = 0; bin < a.n_bins; ++bin) {
+      for (split_int bin = 0; bin < a.n_bins; ++bin) {
         total += count_bits(a.state[ai][bin] ^ b.state[bi][bin]);
       }
       score(a_pos, b_pos) = static_cast<cost>(
@@ -574,30 +574,30 @@ static double msi_score(
     const SplitList& a, const SplitList& b, const int32 n_tips,
     LapScratch& scratch
 ) {
-  const int16 most_splits = std::max(a.n_splits, b.n_splits);
+  const split_int most_splits = std::max(a.n_splits, b.n_splits);
   if (most_splits == 0) return 0.0;
 
   constexpr cost max_score = BIG;
-  const double max_possible = lg2_unrooted[n_tips]
-    - lg2_rooted[int16((n_tips + 1) / 2)]
-    - lg2_rooted[int16(n_tips / 2)];
+  const double max_possible = lg2_unrooted_lookup(n_tips)
+    - lg2_rooted_lookup((n_tips + 1) / 2)
+    - lg2_rooted_lookup(n_tips / 2);
   const double score_over_possible = static_cast<double>(max_score) / max_possible;
   const double possible_over_score = max_possible / static_cast<double>(max_score);
 
   scratch.score_pool.resize(most_splits);
   cost_matrix& score = scratch.score_pool;
 
-  for (int16 ai = 0; ai < a.n_splits; ++ai) {
-    for (int16 bi = 0; bi < b.n_splits; ++bi) {
-      int16 n_a_only = 0, n_a_and_b = 0, n_different = 0;
+  for (split_int ai = 0; ai < a.n_splits; ++ai) {
+    for (split_int bi = 0; bi < b.n_splits; ++bi) {
+      split_int n_a_only = 0, n_a_and_b = 0, n_different = 0;
       splitbit different;
-      for (int16 bin = 0; bin < a.n_bins; ++bin) {
+      for (split_int bin = 0; bin < a.n_bins; ++bin) {
         different   = a.state[ai][bin] ^ b.state[bi][bin];
         n_different += count_bits(different);
         n_a_only   += count_bits(a.state[ai][bin] &  different);
         n_a_and_b  += count_bits(a.state[ai][bin] & ~different);
       }
-      const int16 n_same = n_tips - n_different;
+      const split_int n_same = n_tips - n_different;
       score(ai, bi) = cost(max_score - score_over_possible *
         TreeDist::mmsi_score(n_same, n_a_and_b, n_different, n_a_only));
     }
@@ -669,32 +669,68 @@ NumericVector cpp_msi_all_pairs(
 // metric, spi_overlap(A, B) where B contains A can EXCEED spi_overlap(A, A)
 // for identical splits, so the LAP can find a better global assignment by NOT
 // matching identical splits.  The full LAP is always solved.
+// LAP-fill body for shared_phylo_score, templated on Fast so the per-cell
+// spi_overlap call resolves at compile time to direct lg2_rooted[] access
+// (Fast=true) or the bounds-checked lookup (Fast=false).
+template<bool Fast>
+static inline void shared_phylo_fill_lap(
+    const SplitList& a, const SplitList& b, const int32 n_tips,
+    const double best_overlap, const double score_over_possible,
+    const cost max_score, cost_matrix& score
+) {
+  for (split_int ai = 0; ai < a.n_splits; ++ai) {
+    for (split_int bi = 0; bi < b.n_splits; ++bi) {
+      const double spi = TreeDist::spi_overlap<Fast>(
+        a.state[ai], b.state[bi], n_tips,
+        a.in_split[ai], b.in_split[bi], a.n_bins);
+      score(ai, bi) = (spi == 0.0) ? max_score
+                                   : cost((spi - best_overlap) * score_over_possible);
+    }
+    score.padRowAfterCol(ai, b.n_splits, max_score);
+  }
+}
+
 static double shared_phylo_score(
     const SplitList& a, const SplitList& b, const int32 n_tips,
     LapScratch& scratch
 ) {
-  const int16 most_splits = std::max(a.n_splits, b.n_splits);
+  const split_int most_splits = std::max(a.n_splits, b.n_splits);
   if (most_splits == 0) return 0.0;
 
-  const int16 overlap_a = int16(n_tips + 1) / 2;
+  const split_int overlap_a = (n_tips + 1) / 2;
   constexpr cost max_score = BIG;
-  const double best_overlap = TreeDist::one_overlap(overlap_a, n_tips / 2, n_tips);
-  const double max_possible = lg2_unrooted[n_tips] - best_overlap;
+  // Hot path: n_tips <= SL_MAX_TIPS, so all lg2_rooted/lg2_unrooted indices
+  // fall in the precomputed table.  Dispatch once here so the per-cell loop
+  // emits direct table loads with no bounds-check branch.
+  // The else branch is reachable only on TT 2.3 builds with n_tips > 32705,
+  // which is too large to exercise in routine tests; the equivalent slow
+  // template instantiations (one_overlap<false> etc.) are covered by the
+  // legacy single-pair entry (cpp_shared_phylo) via force_slow=true.
+  const bool fast = n_tips <= static_cast<int32>(SL_MAX_TIPS + 1);
+  double best_overlap, max_possible;
+  if (fast) {
+    best_overlap = TreeDist::one_overlap<true>(overlap_a, n_tips / 2, n_tips);
+    max_possible = lg2_unrooted[n_tips] - best_overlap;
+  } else {
+    // LCOV_EXCL_START
+    best_overlap = TreeDist::one_overlap<false>(overlap_a, n_tips / 2, n_tips);
+    max_possible = lg2_unrooted_lookup(n_tips) - best_overlap;
+    // LCOV_EXCL_STOP
+  }
   const double score_over_possible = static_cast<double>(max_score) / max_possible;
   const double possible_over_score = max_possible / static_cast<double>(max_score);
 
   scratch.score_pool.resize(most_splits);
   cost_matrix& score = scratch.score_pool;
 
-  for (int16 ai = 0; ai < a.n_splits; ++ai) {
-    for (int16 bi = 0; bi < b.n_splits; ++bi) {
-      const double spi = TreeDist::spi_overlap(
-        a.state[ai], b.state[bi], n_tips,
-        a.in_split[ai], b.in_split[bi], a.n_bins);
-      score(ai, bi) = (spi == 0.0) ? max_score
-                                   : cost((spi - best_overlap) * score_over_possible);
-    }
-    score.padRowAfterCol(ai, b.n_splits, max_score);
+  if (fast) {
+    shared_phylo_fill_lap<true>(a, b, n_tips, best_overlap,
+                                score_over_possible, max_score, score);
+  } else {
+    // LCOV_EXCL_START
+    shared_phylo_fill_lap<false>(a, b, n_tips, best_overlap,
+                                 score_over_possible, max_score, score);
+    // LCOV_EXCL_STOP
   }
   score.padAfterRow(a.n_splits, max_score);
 
@@ -763,7 +799,7 @@ static double jaccard_score(
     const double exponent, const bool allow_conflict,
     LapScratch& scratch, MatchScratch& mscratch
 ) {
-  const int16 most_splits = std::max(a.n_splits, b.n_splits);
+  const split_int most_splits = std::max(a.n_splits, b.n_splits);
   if (most_splits == 0) return 0.0;
 
   constexpr cost   max_score  = BIG;
@@ -772,7 +808,7 @@ static double jaccard_score(
   // --- Phase 1: O(n log n) exact-match detection ---
   // Only used when allow_conflict=true; otherwise the full LAP may reassign
   // non-matching splits to compatible (non-exact) partners.
-  int16 exact_n = 0;
+  split_int exact_n = 0;
   if (allow_conflict) {
     exact_n = find_exact_matches(a, b, n_tips, mscratch);
   } else {
@@ -781,61 +817,61 @@ static double jaccard_score(
       mscratch.a_match.resize(a.n_splits);
     if (mscratch.b_match.size() < static_cast<size_t>(b.n_splits))
       mscratch.b_match.resize(b.n_splits);
-    std::fill(mscratch.a_match.data(), mscratch.a_match.data() + a.n_splits, int16(0));
-    std::fill(mscratch.b_match.data(), mscratch.b_match.data() + b.n_splits, int16(0));
+    std::fill(mscratch.a_match.data(), mscratch.a_match.data() + a.n_splits, split_int(0));
+    std::fill(mscratch.b_match.data(), mscratch.b_match.data() + b.n_splits, split_int(0));
   }
-  const int16* a_match = mscratch.a_match.data();
-  const int16* b_match = mscratch.b_match.data();
+  const split_int* a_match = mscratch.a_match.data();
+  const split_int* b_match = mscratch.b_match.data();
 
   if (exact_n == b.n_splits || exact_n == a.n_splits) {
     return static_cast<double>(exact_n);
   }
 
   // --- Phase 2: fill cost matrix for unmatched splits only ---
-  const int16 lap_n = most_splits - exact_n;
+  const split_int lap_n = most_splits - exact_n;
 
-  std::vector<int16> a_unmatch, b_unmatch;
+  std::vector<split_int> a_unmatch, b_unmatch;
   a_unmatch.reserve(lap_n);
   b_unmatch.reserve(lap_n);
-  for (int16 ai = 0; ai < a.n_splits; ++ai) {
+  for (split_int ai = 0; ai < a.n_splits; ++ai) {
     if (!a_match[ai]) a_unmatch.push_back(ai);
   }
-  for (int16 bi = 0; bi < b.n_splits; ++bi) {
+  for (split_int bi = 0; bi < b.n_splits; ++bi) {
     if (!b_match[bi]) b_unmatch.push_back(bi);
   }
 
   scratch.score_pool.resize(lap_n);
   cost_matrix& score = scratch.score_pool;
 
-  const int16 a_unmatched_n = static_cast<int16>(a_unmatch.size());
-  const int16 b_unmatched_n = static_cast<int16>(b_unmatch.size());
+  const split_int a_unmatched_n = static_cast<split_int>(a_unmatch.size());
+  const split_int b_unmatched_n = static_cast<split_int>(b_unmatch.size());
 
-  for (int16 a_pos = 0; a_pos < a_unmatched_n; ++a_pos) {
-    const int16 ai = a_unmatch[a_pos];
-    const int16 na = a.in_split[ai];
-    const int16 nA = n_tips - na;
+  for (split_int a_pos = 0; a_pos < a_unmatched_n; ++a_pos) {
+    const split_int ai = a_unmatch[a_pos];
+    const split_int na = a.in_split[ai];
+    const split_int nA = n_tips - na;
 
-    for (int16 b_pos = 0; b_pos < b_unmatched_n; ++b_pos) {
-      const int16 bi = b_unmatch[b_pos];
-      int16 a_and_b = 0;
-      for (int16 bin = 0; bin < a.n_bins; ++bin) {
+    for (split_int b_pos = 0; b_pos < b_unmatched_n; ++b_pos) {
+      const split_int bi = b_unmatch[b_pos];
+      split_int a_and_b = 0;
+      for (split_int bin = 0; bin < a.n_bins; ++bin) {
         a_and_b += count_bits(a.state[ai][bin] & b.state[bi][bin]);
       }
-      const int16 nb    = b.in_split[bi];
-      const int16 nB    = n_tips - nb;
-      const int16 a_and_B = na - a_and_b;
-      const int16 A_and_b = nb - a_and_b;
-      const int16 A_and_B = nB - a_and_B;
+      const split_int nb    = b.in_split[bi];
+      const split_int nB    = n_tips - nb;
+      const split_int a_and_B = na - a_and_b;
+      const split_int A_and_b = nb - a_and_b;
+      const split_int A_and_B = nB - a_and_B;
 
       if (!allow_conflict && !(
             a_and_b == na || a_and_B == na ||
             A_and_b == nA || A_and_B == nA)) {
         score(a_pos, b_pos) = max_score;
       } else {
-        const int16 A_or_b = n_tips - a_and_B;
-        const int16 a_or_B = n_tips - A_and_b;
-        const int16 a_or_b = n_tips - A_and_B;
-        const int16 A_or_B = n_tips - a_and_b;
+        const split_int A_or_b = n_tips - a_and_B;
+        const split_int a_or_B = n_tips - A_and_b;
+        const split_int a_or_b = n_tips - A_and_B;
+        const split_int A_or_B = n_tips - a_and_b;
         const double ars_ab = static_cast<double>(a_and_b) / static_cast<double>(a_or_b);
         const double ars_Ab = static_cast<double>(A_and_b) / static_cast<double>(A_or_b);
         const double ars_aB = static_cast<double>(a_and_B) / static_cast<double>(a_or_B);
@@ -1229,7 +1265,7 @@ NumericVector cpp_clustering_entropy_batch(
   for (int i = 0; i < N; ++i) {
     SplitList sl(Rcpp::as<RawMatrix>(splits_list[i]));
     double total = 0.0;
-    for (int16 s = 0; s < sl.n_splits; ++s) {
+    for (split_int s = 0; s < sl.n_splits; ++s) {
       const int k = sl.in_split[s];
       if (k <= 0 || k >= n_tip) continue;
       const double p = k * invN;
@@ -1271,7 +1307,7 @@ NumericVector cpp_splitwise_info_batch(
   for (int i = 0; i < N; ++i) {
     SplitList sl(Rcpp::as<RawMatrix>(splits_list[i]));
     double total = 0.0;
-    for (int16 s = 0; s < sl.n_splits; ++s) {
+    for (split_int s = 0; s < sl.n_splits; ++s) {
       const int k = sl.in_split[s];
       if (k < 2 || (n_tip - k) < 2) continue;
       total += l2u_n - l2r[k] - l2r[n_tip - k];
diff --git a/src/tree_distance_functions.cpp b/src/tree_distance_functions.cpp
index 99347300..5ab745cb 100644
--- a/src/tree_distance_functions.cpp
+++ b/src/tree_distance_functions.cpp
@@ -28,15 +28,15 @@ double cpp_mci_impl_score(const Rcpp::RawMatrix& x,
   // Build arrays matching the header's raw-pointer API types.
   std::vector<const splitbit*> a_ptrs(a.n_splits);
   std::vector<const splitbit*> b_ptrs(b.n_splits);
-  std::vector<TreeDist::int16> a_in(a.n_splits);
-  std::vector<TreeDist::int16> b_in(b.n_splits);
-  for (TreeDist::int16 i = 0; i < a.n_splits; ++i) {
+  std::vector<TreeDist::split_int> a_in(a.n_splits);
+  std::vector<TreeDist::split_int> b_in(b.n_splits);
+  for (TreeDist::split_int i = 0; i < a.n_splits; ++i) {
     a_ptrs[i] = a.state[i];
-    a_in[i] = static_cast<TreeDist::int16>(a.in_split[i]);
+    a_in[i] = static_cast<TreeDist::split_int>(a.in_split[i]);
   }
-  for (TreeDist::int16 i = 0; i < b.n_splits; ++i) {
+  for (TreeDist::split_int i = 0; i < b.n_splits; ++i) {
     b_ptrs[i] = b.state[i];
-    b_in[i] = static_cast<TreeDist::int16>(b.in_split[i]);
+    b_in[i] = static_cast<TreeDist::split_int>(b.in_split[i]);
   }
 
   return TreeDist::mutual_clustering_score(
diff --git a/src/tree_distances.cpp b/src/tree_distances.cpp
index 0da45e65..3651caab 100644
--- a/src/tree_distances.cpp
+++ b/src/tree_distances.cpp
@@ -29,18 +29,15 @@ namespace TreeDist {
     }
   }
 
+  // Hard C++ ceiling: n_tips^2 must fit int32 and all counters must fit split_int.
+  // 32767 = INT16_MAX is the chosen practical cap (n^2 <= 1.07e9 < INT32_MAX).
+  // R-level .CheckMaxTips() fires before this; this is a safety backstop.
+  static constexpr int32 TREEDIST_MAX_TIPS = 32767;
+
   void check_ntip(const int32 n) {
-    if (n > SL_MAX_TIPS) {
-      if (SL_MAX_TIPS <= 2048) {
-        Rcpp::stop("Trees with %d tips exceed the compiled limit of %d. "
-                     "Update TreeTools to support more tips, then reinstall "
-                     "TreeDist.", static_cast<int>(n),
-                     static_cast<int>(SL_MAX_TIPS));
-      } else {
-        Rcpp::stop("Trees with %d tips are not yet supported (maximum %d). ",
-                   static_cast<int>(n),
-                   static_cast<int>(SL_MAX_TIPS));
-      }
+    if (n > TREEDIST_MAX_TIPS) {
+      Rcpp::stop("Trees with %d tips are not yet supported (maximum %d).",
+                 static_cast<int>(n), static_cast<int>(TREEDIST_MAX_TIPS));
     }
   }
 
@@ -114,41 +111,41 @@ inline List robinson_foulds_info(const RawMatrix &x, const RawMatrix &y,
                                  const int32 n_tips) {
   const SplitList a(x), b(y);
   
-  const int16 last_bin = a.n_bins - 1;
-  const int16 unset_tips = (n_tips % SL_BIN_SIZE) ?
+  const split_int last_bin = a.n_bins - 1;
+  const split_int unset_tips = (n_tips % SL_BIN_SIZE) ?
     SL_BIN_SIZE - n_tips % SL_BIN_SIZE : 0;
-  
+
   const splitbit unset_mask = ALL_ONES >> unset_tips;
-  const double lg2_unrooted_n = lg2_unrooted[n_tips];
+  const double lg2_unrooted_n = lg2_unrooted_lookup(n_tips);
   double score = 0;
-  
+
   grf_match matching(a.n_splits, NA_INTEGER);
-  
-  const int16 n_bins = a.n_bins;
+
+  const split_int n_bins = a.n_bins;
   std::vector<splitbit> b_complement(b.n_splits * n_bins);
-  for (int16 i = 0; i < b.n_splits; i++) {
+  for (split_int i = 0; i < b.n_splits; i++) {
     splitbit* bc_i = &b_complement[i * n_bins];
-    for (int16 bin = 0; bin < last_bin; ++bin) {
+    for (split_int bin = 0; bin < last_bin; ++bin) {
       bc_i[bin] = ~b.state[i][bin];
     }
     bc_i[last_bin] = b.state[i][last_bin] ^ unset_mask;
   }
-  
-  for (int16 ai = 0; ai < a.n_splits; ++ai) {
+
+  for (split_int ai = 0; ai < a.n_splits; ++ai) {
     if ((ai & 1023) == 0) Rcpp::checkUserInterrupt();
-    for (int16 bi = 0; bi < b.n_splits; ++bi) {
-      
+    for (split_int bi = 0; bi < b.n_splits; ++bi) {
+
       bool all_match = true, all_complement = true;
       const splitbit* bc_bi = &b_complement[bi * n_bins];
-      
-      for (int16 bin = 0; bin < n_bins; ++bin) {
+
+      for (split_int bin = 0; bin < n_bins; ++bin) {
         if ((a.state[ai][bin] != b.state[bi][bin])) {
           all_match = false;
           break;
         }
       }
       if (!all_match) {
-        for (int16 bin = 0; bin < n_bins; ++bin) {
+        for (split_int bin = 0; bin < n_bins; ++bin) {
           if ((a.state[ai][bin] != bc_bi[bin])) {
             all_complement = false;
             break;
@@ -156,13 +153,13 @@ inline List robinson_foulds_info(const RawMatrix &x, const RawMatrix &y,
         }
       }
       if (all_match || all_complement) {
-        int16 leaves_in_split = 0;
-        for (int16 bin = 0; bin < a.n_bins; ++bin) {
+        split_int leaves_in_split = 0;
+        for (split_int bin = 0; bin < a.n_bins; ++bin) {
           leaves_in_split += count_bits(a.state[ai][bin]);
         }
-        
-        score += lg2_unrooted_n - lg2_rooted[leaves_in_split] -
-          lg2_rooted[n_tips - leaves_in_split];
+
+        score += lg2_unrooted_n - lg2_rooted_lookup(leaves_in_split) -
+          lg2_rooted_lookup(n_tips - leaves_in_split);
 
         matching[ai] = bi + 1;
         break; /* Only one match possible per split */
@@ -180,50 +177,50 @@ inline List robinson_foulds_info(const RawMatrix &x, const RawMatrix &y,
 inline List matching_split_distance(const RawMatrix &x, const RawMatrix &y,
                                     const int32 n_tips) {
   const SplitList a(x), b(y);
-  const int16 most_splits = std::max(a.n_splits, b.n_splits);
-  const int16 split_diff = most_splits - std::min(a.n_splits, b.n_splits);
-  const int16 half_tips = n_tips / 2;
+  const split_int most_splits = std::max(a.n_splits, b.n_splits);
+  const split_int split_diff = most_splits - std::min(a.n_splits, b.n_splits);
+  const split_int half_tips = n_tips / 2;
   if (most_splits == 0) {
     return List::create(Named("score") = 0);
   }
   const cost max_score = BIG / most_splits;
   cost_matrix score(most_splits);
-  
-  for (int16 ai = 0; ai < a.n_splits; ++ai) {
+
+  for (split_int ai = 0; ai < a.n_splits; ++ai) {
     if ((ai & 1023) == 0) Rcpp::checkUserInterrupt();
-    for (int16 bi = 0; bi < b.n_splits; ++bi) {
+    for (split_int bi = 0; bi < b.n_splits; ++bi) {
       splitbit total = 0;
-      for (int16 bin = 0; bin < a.n_bins; ++bin) {
-         total += count_bits(a.state[ai][bin] ^ b.state[bi][bin]);
+      for (split_int bin = 0; bin < a.n_bins; ++bin) {
+        total += count_bits(a.state[ai][bin] ^ b.state[bi][bin]);
       }
       score(ai, bi) = total;
     }
   }
-  
-  for (int16 ai = 0; ai < a.n_splits; ++ai) {
-    for (int16 bi = 0; bi < b.n_splits; ++bi) {
+
+  for (split_int ai = 0; ai < a.n_splits; ++ai) {
+    for (split_int bi = 0; bi < b.n_splits; ++bi) {
       if (score(ai, bi) > half_tips) {
         score(ai, bi) = n_tips - score(ai, bi);
       }
     }
     score.padRowAfterCol(ai, b.n_splits, max_score);
   }
-  
+
   score.padAfterRow(a.n_splits, max_score);
-  
+
   std::vector<lap_col> rowsol;
   std::vector<lap_row> colsol;
-  
+
   resize_uninitialized(rowsol, most_splits);
   resize_uninitialized(colsol, most_splits);
-  
+
   const double final_score = lap(most_splits, score, rowsol, colsol) -
     (max_score * split_diff);
-  
+
   std::vector<int> final_matching;
   final_matching.reserve(a.n_splits);
-  
-  for (int16 i = 0; i < a.n_splits; ++i) {
+
+  for (split_int i = 0; i < a.n_splits; ++i) {
     const int match = (rowsol[i] < b.n_splits)
     ? static_cast<int>(rowsol[i]) + 1
     : NA_INTEGER;
@@ -239,49 +236,49 @@ inline List jaccard_similarity(const RawMatrix &x, const RawMatrix &y,
                                    const int32 n_tips, const NumericVector &k,
                                    const LogicalVector &allowConflict) {
   const SplitList a(x), b(y);
-  const int16 most_splits = std::max(a.n_splits, b.n_splits);
-  
+  const split_int most_splits = std::max(a.n_splits, b.n_splits);
+
   constexpr cost max_score = BIG;
   constexpr double max_scoreL = max_score;
   const double exponent = k[0];
-  
+
   bool allow_conflict = allowConflict[0];
-  
+
   cost_matrix score(most_splits);
-  
-  for (int16 ai = 0; ai < a.n_splits; ++ai) {
+
+  for (split_int ai = 0; ai < a.n_splits; ++ai) {
     if ((ai & 1023) == 0) Rcpp::checkUserInterrupt();
-    
-    const int16 na = a.in_split[ai];
-    const int16 nA = n_tips - na;
-    
-    for (int16 bi = 0; bi < b.n_splits; ++bi) {
-      
+
+    const split_int na = a.in_split[ai];
+    const split_int nA = n_tips - na;
+
+    for (split_int bi = 0; bi < b.n_splits; ++bi) {
+
       // x divides tips into a|A; y divides tips into b|B
-      int16 a_and_b = 0;
-      for (int16 bin = 0; bin < a.n_bins; ++bin) {
+      split_int a_and_b = 0;
+      for (split_int bin = 0; bin < a.n_bins; ++bin) {
         a_and_b += count_bits(a.state[ai][bin] & b.state[bi][bin]);
       }
-      
-      const int16
+
+      const split_int
         nb = b.in_split[bi],
         nB = n_tips - nb,
         a_and_B = na - a_and_b,
         A_and_b = nb - a_and_b,
         A_and_B = nB - a_and_B
       ;
-      
+
       if (!allow_conflict && !(
             a_and_b == na ||
             a_and_B == na ||
             A_and_b == nA ||
             A_and_B == nA)
           ) {
-        
+
         score(ai, bi) = max_score; /* Prohibited */
-        
+
       } else {
-        const int16
+        const split_int
           A_or_b  = n_tips - a_and_B,
           a_or_B = n_tips - A_and_b,
           a_or_b = n_tips - A_and_B,
@@ -330,76 +327,77 @@ inline List jaccard_similarity(const RawMatrix &x, const RawMatrix &y,
   std::vector<int> final_matching;
   final_matching.reserve(a.n_splits);
   
-  for (int16 i = 0; i < a.n_splits; ++i) {
+  for (split_int i = 0; i < a.n_splits; ++i) {
     const int match = (rowsol[i] < b.n_splits)
     ? static_cast<int>(rowsol[i]) + 1
     : NA_INTEGER;
     final_matching.push_back(match);
   }
-  
-  
+
+
   return List::create(Named("score") = final_score,
                       _["matching"] = final_matching);
-  
+
 }
 
 List msi_distance(const RawMatrix &x, const RawMatrix &y, const int32 n_tips) {
   const SplitList a(x), b(y);
-  const int16 most_splits = std::max(a.n_splits, b.n_splits);
+  const split_int most_splits = std::max(a.n_splits, b.n_splits);
   constexpr cost max_score = BIG;
-  const double max_possible = lg2_unrooted[n_tips] - 
-    lg2_rooted[int16((n_tips + 1) / 2)] - lg2_rooted[int16(n_tips / 2)];
+  const double max_possible = lg2_unrooted_lookup(n_tips) -
+    lg2_rooted_lookup((n_tips + 1) / 2) - lg2_rooted_lookup(n_tips / 2);
   const double score_over_possible = static_cast<double>(max_score) / max_possible;
   const double possible_over_score = max_possible / max_score;
-  
+
   cost_matrix score(most_splits);
-  
-  splitbit different[SL_MAX_BINS];
-  
-  for (int16 ai = 0; ai < a.n_splits; ++ai) {
+
+  // Heap-allocated: n_bins can exceed SL_MAX_BINS for large trees.
+  std::vector<splitbit> different(a.n_bins);
+
+  for (split_int ai = 0; ai < a.n_splits; ++ai) {
     if ((ai & 1023) == 0) Rcpp::checkUserInterrupt();
-    for (int16 bi = 0; bi < b.n_splits; ++bi) {
-      int16 
+    for (split_int bi = 0; bi < b.n_splits; ++bi) {
+      split_int
         n_different = 0,
         n_a_only = 0,
         n_a_and_b = 0
       ;
-      for (int16 bin = 0; bin < a.n_bins; ++bin) {
+      for (split_int bin = 0; bin < a.n_bins; ++bin) {
         different[bin] = a.state[ai][bin] ^ b.state[bi][bin];
         n_different += count_bits(different[bin]);
         n_a_only += count_bits(a.state[ai][bin] & different[bin]);
         n_a_and_b += count_bits(a.state[ai][bin] & ~different[bin]);
       }
-      const int16 n_same = n_tips - n_different;
-      
-      score(ai, bi) = cost(max_score - 
+      const split_int n_same = n_tips - n_different;
+
+      score(ai, bi) = cost(max_score -
         (score_over_possible *
           TreeDist::mmsi_score(n_same, n_a_and_b, n_different, n_a_only)));
     }
     score.padRowAfterCol(ai, b.n_splits, max_score);
   }
   score.padAfterRow(a.n_splits, max_score);
-  
+
   std::vector<lap_col> rowsol;
   std::vector<lap_row> colsol;
-  
+
   resize_uninitialized(rowsol, most_splits);
   resize_uninitialized(colsol, most_splits);
-  
+
   const double final_score = static_cast<double>(
     (max_score * most_splits) - lap(most_splits, score, rowsol, colsol)) *
       possible_over_score;
-  
+
   std::vector<int> final_matching;
   final_matching.reserve(a.n_splits);
-  
-  for (int16 i = 0; i < a.n_splits; ++i) {
+
+  for (split_int i = 0; i < a.n_splits; ++i) {
     const int match = (rowsol[i] < b.n_splits)
     ? static_cast<int>(rowsol[i]) + 1
     : NA_INTEGER;
     final_matching.push_back(match);
   }
-  
+
   return List::create(Named("score") = final_score,
                       _["matching"] = final_matching);
 
@@ -410,55 +408,55 @@ List mutual_clustering(const RawMatrix &x, const RawMatrix &y,
   const SplitList a(x);
   const SplitList b(y);
   const bool a_has_more_splits = (a.n_splits > b.n_splits);
-  const int16 most_splits = a_has_more_splits ? a.n_splits : b.n_splits;
-  const int16 a_extra_splits = a_has_more_splits ? most_splits - b.n_splits : 0;
-  const int16 b_extra_splits = a_has_more_splits ? 0 : most_splits - a.n_splits;
+  const split_int most_splits = a_has_more_splits ? a.n_splits : b.n_splits;
+  const split_int a_extra_splits = a_has_more_splits ? most_splits - b.n_splits : 0;
+  const split_int b_extra_splits = a_has_more_splits ? 0 : most_splits - a.n_splits;
   const double n_tips_reciprocal = 1.0 / n_tips;
-  
+
   if (most_splits == 0 || n_tips == 0) {
     return List::create(Named("score") = 0,
                         _["matching"] = IntegerVector(0));
   }
-  
+
   constexpr cost max_score = BIG;
   constexpr double over_max_score = 1.0 / static_cast<double>(max_score);
   const double max_over_tips = static_cast<double>(max_score) * n_tips_reciprocal;
-  const double lg2_n = lg2[n_tips];
-  
+  const double lg2_n = lg2_lookup(n_tips);
+
   cost_matrix score(most_splits);
-  
+
   double exact_match_score = 0;
-  int16 exact_matches = 0;
+  split_int exact_matches = 0;
   // vector zero-initializes [so does make_unique]
   // match will have one added to it so numbering follows R; hence 0 = UNMATCHED
   std::vector<int> a_match(a.n_splits);
-  std::unique_ptr<int16[]> b_match = std::make_unique<int16[]>(b.n_splits);
-  
-  for (int16 ai = 0; ai < a.n_splits; ++ai) {
+  std::unique_ptr<split_int[]> b_match = std::make_unique<split_int[]>(b.n_splits);
+
+  for (split_int ai = 0; ai < a.n_splits; ++ai) {
     if ((ai & 1023) == 0) Rcpp::checkUserInterrupt();
     if (a_match[ai]) continue;
-    
-    const int16 na = a.in_split[ai];
-    const int16 nA = n_tips - na;
+
+    const split_int na = a.in_split[ai];
+    const split_int nA = n_tips - na;
     const auto *a_row = a.state[ai];
-    const double offset_a = lg2_n - lg2[na];
-    const double offset_A = lg2_n - lg2[nA];
-    
-    for (int16 bi = 0; bi < b.n_splits; ++bi) {
-      
+    const double offset_a = lg2_n - lg2_lookup(na);
+    const double offset_A = lg2_n - lg2_lookup(nA);
+
+    for (split_int bi = 0; bi < b.n_splits; ++bi) {
+
       // x divides tips into a|A; y divides tips into b|B
-      int16 a_and_b = 0;
+      split_int a_and_b = 0;
       const auto *b_row = b.state[bi];
-      for (int16 bin = 0; bin < a.n_bins; ++bin) {
+      for (split_int bin = 0; bin < a.n_bins; ++bin) {
         a_and_b += count_bits(a_row[bin] & b_row[bin]);
       }
-      
-      const int16 nb = b.in_split[bi];
-      const int16 nB = n_tips - nb;
-      const int16 a_and_B = na - a_and_b;
-      const int16 A_and_b = nb - a_and_b;
-      const int16 A_and_B = nA - A_and_b;
-      
+
+      const split_int nb = b.in_split[bi];
+      const split_int nB = n_tips - nb;
+      const split_int a_and_B = na - a_and_b;
+      const split_int A_and_b = nb - a_and_b;
+      const split_int A_and_B = nA - A_and_b;
+
       if ((!a_and_B && !A_and_b) ||
           (!a_and_b && !A_and_B)) {
         exact_match_score += TreeDist::ic_matching(na, nA, n_tips);
@@ -471,44 +469,44 @@ List mutual_clustering(const RawMatrix &x, const RawMatrix &y,
                    && a_and_b == A_and_B) {
         score(ai, bi) = max_score; // Avoid rounding errors
       } else {
-        const double lg2_nb = lg2[nb];
-        const double lg2_nB = lg2[nB];
+        const double lg2_nb = lg2_lookup(nb);
+        const double lg2_nB = lg2_lookup(nB);
         const double ic_sum =
-          a_and_b * (lg2[a_and_b] + offset_a - lg2_nb) +
-          a_and_B * (lg2[a_and_B] + offset_a - lg2_nB) +
-          A_and_b * (lg2[A_and_b] + offset_A - lg2_nb) +
-          A_and_B * (lg2[A_and_B] + offset_A - lg2_nB);
-        
+          a_and_b * (lg2_lookup(a_and_b) + offset_a - lg2_nb) +
+          a_and_B * (lg2_lookup(a_and_B) + offset_a - lg2_nB) +
+          A_and_b * (lg2_lookup(A_and_b) + offset_A - lg2_nb) +
+          A_and_B * (lg2_lookup(A_and_B) + offset_A - lg2_nB);
+
         // Division by n_tips converts n(A&B) to P(A&B) for each ic_element
         score(ai, bi) = max_score - static_cast<cost>(ic_sum * max_over_tips);
       }
     }
-    
+
     if (b.n_splits < most_splits) {
       score.padRowAfterCol(ai, b.n_splits, max_score);
     }
   }
-  
+
   if (exact_matches == b.n_splits || exact_matches == a.n_splits) {
     return List::create(
       Named("score") = exact_match_score * n_tips_reciprocal,
       _["matching"] = a_match);
   }
-  
-  const int16 lap_dim = most_splits - exact_matches;
+
+  const split_int lap_dim = most_splits - exact_matches;
   ASSERT(lap_dim > 0);
   std::vector<lap_col> rowsol;
   std::vector<lap_row> colsol;
   resize_uninitialized(rowsol, lap_dim);
   resize_uninitialized(colsol, lap_dim);
   cost_matrix small_score(lap_dim);
-  
+
   if (exact_matches) {
-    int16 a_pos = 0;
-    for (int16 ai = 0; ai < a.n_splits; ++ai) {
+    split_int a_pos = 0;
+    for (split_int ai = 0; ai < a.n_splits; ++ai) {
       if (a_match[ai]) continue;
-      int16 b_pos = 0;
-      for (int16 bi = 0; bi < b.n_splits; ++bi) {
+      split_int b_pos = 0;
+      for (split_int bi = 0; bi < b.n_splits; ++bi) {
         if (b_match[bi]) continue;
         small_score(a_pos, b_pos) = score(ai, bi);
         b_pos++;
@@ -517,115 +515,143 @@ List mutual_clustering(const RawMatrix &x, const RawMatrix &y,
       a_pos++;
     }
     small_score.padAfterRow(lap_dim - b_extra_splits, max_score);
-    
-    const double lap_score = static_cast<double>((max_score * lap_dim) - 
+
+    const double lap_score = static_cast<double>((max_score * lap_dim) -
       lap(lap_dim, small_score, rowsol, colsol)) * over_max_score;
     const double final_score = lap_score + (exact_match_score / n_tips);
-    
-    std::unique_ptr<int16[]> lap_decode = std::make_unique<int16[]>(lap_dim);
-    int16 fuzzy_match = 0;
-    for (int16 bi = 0; bi < b.n_splits; ++bi) {
+
+    std::unique_ptr<split_int[]> lap_decode = std::make_unique<split_int[]>(lap_dim);
+    split_int fuzzy_match = 0;
+    for (split_int bi = 0; bi < b.n_splits; ++bi) {
       if (!b_match[bi]) {
         assert(fuzzy_match < lap_dim);
         lap_decode[fuzzy_match++] = bi + 1;
       }
     }
-    
+
     fuzzy_match = 0;
     std::vector<int> final_matching;
     TreeDist::resize_uninitialized(final_matching, a.n_splits);
-    for (int16 i = 0; i < a.n_splits; ++i) {
+    for (split_int i = 0; i < a.n_splits; ++i) {
       if (a_match[i]) {
         final_matching[i] = a_match[i];
       } else {
         assert(fuzzy_match < lap_dim);
-        const int16 row_idx = fuzzy_match++;
-        const int16 col_idx = rowsol[row_idx];
+        const split_int row_idx = fuzzy_match++;
+        const split_int col_idx = rowsol[row_idx];
         final_matching[i] = (col_idx >= lap_dim - a_extra_splits) ? NA_INTEGER :
           lap_decode[col_idx];
       }
     }
-    
+
     return List::create(Named("score") = final_score,
                         _["matching"] = final_matching);
   } else {
-    
-    for (int16 ai = a.n_splits; ai < most_splits; ++ai) {
-      for (int16 bi = 0; bi < most_splits; ++bi) {
+
+    for (split_int ai = a.n_splits; ai < most_splits; ++ai) {
+      for (split_int bi = 0; bi < most_splits; ++bi) {
         score(ai, bi) = max_score;
       }
     }
-    
+
     const double final_score = static_cast<double>(
       (max_score * lap_dim) - lap(lap_dim, score, rowsol, colsol)
     ) / max_score;
-    
+
     std::vector<int> final_matching;
     final_matching.reserve(a.n_splits);
-    for (int16 i = 0; i < a.n_splits; ++i) {
+    for (split_int i = 0; i < a.n_splits; ++i) {
       const int match = (rowsol[i] < b.n_splits)
       ? static_cast<int>(rowsol[i]) + 1
       : NA_INTEGER;
       final_matching.push_back(match);
     }
-      
+
     return List::create(Named("score") = final_score,
                         _["matching"] = final_matching);
   }
 }
 
+// Templated LAP-fill so the per-cell spi_overlap resolves to direct
+// lg2_rooted[] access on the hot path (n_tips <= SL_MAX_TIPS).
+template<bool Fast>
+static inline void shared_phylo_fill(const SplitList& a, const SplitList& b,
+                                     const int32 n_tips,
+                                     const double best_overlap,
+                                     const double score_over_possible,
+                                     const cost max_score,
+                                     cost_matrix& score) {
+  for (split_int ai = a.n_splits; ai--; ) {
+    if ((ai & 1023) == 0) Rcpp::checkUserInterrupt();
+    for (split_int bi = b.n_splits; bi--; ) {
+      const double spi_over = TreeDist::spi_overlap<Fast>(
+        a.state[ai], b.state[bi], n_tips, a.in_split[ai], b.in_split[bi],
+                                                                    a.n_bins);
+
+      score(ai, bi) = spi_over == 0 ? max_score :
+        cost((spi_over - best_overlap) * score_over_possible);
+    }
+    score.padRowAfterCol(ai, b.n_splits, max_score);
+  }
+}
+
 inline List shared_phylo (const RawMatrix &x, const RawMatrix &y,
-                          const int32 n_tips) {
+                          const int32 n_tips,
+                          const bool force_slow = false) {
   const SplitList a(x), b(y);
-  const int16 most_splits = std::max(a.n_splits, b.n_splits);
-  const int16 overlap_a = int16(n_tips + 1) / 2; // avoids promotion to int
-  
+  const split_int most_splits = std::max(a.n_splits, b.n_splits);
+  const split_int overlap_a = (n_tips + 1) / 2;
+
   constexpr cost max_score = BIG;
-  const double lg2_unrooted_n = lg2_unrooted[n_tips];
-  const double best_overlap = TreeDist::one_overlap(overlap_a, n_tips / 2, n_tips);
+  // force_slow: test-only hook so the bounds-checked lookup path can be
+  // exercised on small trees for code coverage of the template <false>
+  // instantiations and the lg2_*_slow fallbacks.
+  const bool fast = !force_slow && n_tips <= static_cast<int32>(SL_MAX_TIPS + 1);
+  double lg2_unrooted_n, best_overlap;
+  if (fast) {
+    lg2_unrooted_n = lg2_unrooted[n_tips];
+    best_overlap = TreeDist::one_overlap<true>(overlap_a, n_tips / 2, n_tips);
+  } else {
+    lg2_unrooted_n = lg2_unrooted_lookup(n_tips);
+    best_overlap = TreeDist::one_overlap<false>(overlap_a, n_tips / 2, n_tips);
+  }
   const double max_possible = lg2_unrooted_n - best_overlap;
   const double score_over_possible = max_score / max_possible;
   const double possible_over_score = max_possible / max_score;
-  
+
   // a and b are "clades" separating an "ingroup" [1] from an "outgroup" [0].
   // In/out direction [i.e. 1/0 bit] is arbitrary.
   cost_matrix score(most_splits);
-  
-  for (int16 ai = a.n_splits; ai--; ) {
-    if ((ai & 1023) == 0) Rcpp::checkUserInterrupt();
-    for (int16 bi = b.n_splits; bi--; ) {
-      const double spi_over = TreeDist::spi_overlap(
-        a.state[ai], b.state[bi], n_tips, a.in_split[ai], b.in_split[bi],
-                                                                    a.n_bins);
-      
-      score(ai, bi) = spi_over == 0 ? max_score :
-        cost((spi_over - best_overlap) * score_over_possible);
-    }
-    score.padRowAfterCol(ai, b.n_splits, max_score);
+
+  if (fast) {
+    shared_phylo_fill<true>(a, b, n_tips, best_overlap,
+                            score_over_possible, max_score, score);
+  } else {
+    shared_phylo_fill<false>(a, b, n_tips, best_overlap,
+                             score_over_possible, max_score, score);
   }
   score.padAfterRow(a.n_splits, max_score);
-  
+
   std::vector<lap_col> rowsol;
   std::vector<lap_row> colsol;
-  
+
   resize_uninitialized(rowsol, most_splits);
   resize_uninitialized(colsol, most_splits);
-  
-  
+
   const double final_score = static_cast<double>(
       (max_score * most_splits) - lap(most_splits, score, rowsol, colsol)) *
         possible_over_score;
-  
+
   std::vector<int> final_matching;
   final_matching.reserve(a.n_splits);
-  
-  for (int16 i = 0; i < a.n_splits; ++i) {
+
+  for (split_int i = 0; i < a.n_splits; ++i) {
     const int match = (rowsol[i] < b.n_splits)
     ? static_cast<int>(rowsol[i]) + 1
     : NA_INTEGER;
     final_matching.push_back(match);
   }
-  
+
   return List::create(Named("score") = final_score,
                       _["matching"] = final_matching);
 }
@@ -687,9 +713,10 @@ List cpp_mutual_clustering(const RawMatrix &x, const RawMatrix &y,
 
 // [[Rcpp::export]]
 List cpp_shared_phylo(const RawMatrix &x, const RawMatrix &y,
-                      const IntegerVector &nTip) {
+                      const IntegerVector &nTip,
+                      const bool force_slow = false) {
   ASSERT(x.cols() == y.cols() && "Input splits must address same number of tips.");
   const int32 n_tip = static_cast<int32>(nTip[0]);
   TreeDist::check_ntip(n_tip);
-  return shared_phylo(x, y, n_tip);
+  return shared_phylo(x, y, n_tip, force_slow);
 }
diff --git a/src/tree_distances.h b/src/tree_distances.h
index 1b24c2b3..89ee6b2d 100644
--- a/src/tree_distances.h
+++ b/src/tree_distances.h
@@ -5,12 +5,15 @@
 #include <TreeDist/mutual_clustering.h>
 #include <TreeTools/SplitList.h>
 
-/***** Re-export shared lookup tables to global scope *****/
+/***** Re-export shared lookup tables and fallback functions to global scope *****/
 
 using TreeDist::lg2;
 using TreeDist::lg2_double_factorial;
 using TreeDist::lg2_unrooted;
 using TreeDist::lg2_rooted;
+using TreeDist::lg2_lookup;
+using TreeDist::lg2_unrooted_lookup;
+using TreeDist::lg2_rooted_lookup;
 
 /***** Constants *****/
 
@@ -23,85 +26,115 @@ namespace TreeDist {
   void check_ntip(int32 n);
 
   // Re-exported from mutual_clustering.h:
-  //   ic_matching(int16 a, int16 b, int16 n)
+  //   ic_matching(split_int a, split_int b, split_int n)
 
   // See equation 16 in Meila 2007 (k' denoted K).
   // nkK is converted to pkK in the calling function when divided by n.
-  inline void add_ic_element(double& ic_sum, const int16 nkK, const int16 nk,
-                             const int16 nK, const int16 n_tips,
+  // Parameters are split_int (int32) to handle n_tips up to 32767 safely;
+  // products nkK*n_tips and nk*nK fit int32 since both factors <= 32767.
+  inline void add_ic_element(double& ic_sum, const split_int nkK, const split_int nk,
+                             const split_int nK, const split_int n_tips,
                              const double lg2_n) noexcept {
     if (nkK && nk && nK) {
       assert(!(nkK == nk && nkK == nK && nkK << 1 == n_tips));
       const int32 numerator = nkK * n_tips;
       const int32 denominator = nk * nK;
       if (numerator != denominator) {
-        ic_sum += nkK * (lg2[nkK] + lg2_n - lg2[nk] - lg2[nK]);
+        ic_sum += nkK * (lg2_lookup(nkK) + lg2_n - lg2_lookup(nk) - lg2_lookup(nK));
       }
     }
   }
 
 
   // Returns lg2_unrooted[x] - lg2_trees_matching_split(y, x - y)
-  [[nodiscard]] inline double mmsi_pair_score(const int16 x, const int16 y) noexcept {
+  [[nodiscard]] inline double mmsi_pair_score(const split_int x, const split_int y) noexcept {
     static_assert(SL_MAX_TIPS + 2 <= std::numeric_limits<int16>::max(),
                   "int16 too narrow for SL_MAX_TIPS");
-    
-    return lg2_unrooted[x] - (lg2_rooted[y] + lg2_rooted[x - y]);
+
+    return lg2_unrooted_lookup(x) - (lg2_rooted_lookup(y) + lg2_rooted_lookup(x - y));
   }
 
-  [[nodiscard]] inline double mmsi_score(const int16 n_same, const int16 n_a_and_b,
-                    const int16 n_different, const int16 n_a_only)  noexcept {
+  [[nodiscard]] inline double mmsi_score(const split_int n_same, const split_int n_a_and_b,
+                    const split_int n_different, const split_int n_a_only)  noexcept {
     if (n_same == 0 || n_same == n_a_and_b)
       return mmsi_pair_score(n_different, n_a_only);
     if (n_different == 0 || n_different == n_a_only)
       return mmsi_pair_score(n_same, n_a_and_b);
-    
+
     const double
       score1 = mmsi_pair_score(n_same, n_a_and_b),
         score2 = mmsi_pair_score(n_different, n_a_only);
-    
+
     return (score1 > score2) ? score1 : score2;
   }
 
 
-[[nodiscard]] inline double one_overlap(const int16 a, const int16 b, const int16 n) noexcept {
+  // Compile-time-dispatched lg2 helpers: when Fast=true, callers guarantee
+  // x <= SL_MAX_TIPS, so the bounds-check + branch in lg2_rooted_lookup is
+  // skipped entirely and the inner kernel collapses to a bare table load.
+  // Used by spi_overlap / one_overlap below; large-tree callers pass
+  // Fast=false (default) and pay the predictable branch.
+  template<bool Fast>
+  [[nodiscard]] inline double lg2_rooted_fast(split_int x) noexcept {
+    if constexpr (Fast) return lg2_rooted[x];
+    else return lg2_rooted_lookup(x);
+  }
+
+  template<bool Fast>
+  [[nodiscard]] inline double lg2_unrooted_fast(split_int x) noexcept {
+    if constexpr (Fast) return lg2_unrooted[x];
+    else return lg2_unrooted_lookup(x);
+  }
+
+  template<bool Fast = false>
+  [[nodiscard]] inline double one_overlap(const split_int a, const split_int b, const split_int n) noexcept {
     static_assert(SL_MAX_TIPS + 2 <= std::numeric_limits<int16>::max(),
                   "int16 too narrow for SL_MAX_TIPS");
     if (a == b) {
-      return lg2_rooted[a] + lg2_rooted[n - a];
+      return lg2_rooted_fast<Fast>(a) + lg2_rooted_fast<Fast>(n - a);
     }
     // Unify a<b and a>b via lo/hi: removes an unpredictable branch.
-    const int16 lo = (a < b) ? a : b;
-    const int16 hi = (a < b) ? b : a;
-    return lg2_rooted[hi] + lg2_rooted[n - lo] - lg2_rooted[hi - lo + 1];
+    const split_int lo = (a < b) ? a : b;
+    const split_int hi = (a < b) ? b : a;
+    return lg2_rooted_fast<Fast>(hi) + lg2_rooted_fast<Fast>(n - lo)
+         - lg2_rooted_fast<Fast>(hi - lo + 1);
   }
-  
-  [[nodiscard]] inline double one_overlap_notb(const int16 a, const int16 n_minus_b, const int16 n) noexcept {
+
+  template<bool Fast = false>
+  [[nodiscard]] inline double one_overlap_notb(const split_int a, const split_int n_minus_b, const split_int n) noexcept {
     static_assert(SL_MAX_TIPS + 2 <= std::numeric_limits<int16>::max(),
                   "int16 too narrow for SL_MAX_TIPS");
-    const int16 b = n - n_minus_b;
+    const split_int b = n - n_minus_b;
     if (a == b) {
-      return lg2_rooted[b] + lg2_rooted[n_minus_b];
+      return lg2_rooted_fast<Fast>(b) + lg2_rooted_fast<Fast>(n_minus_b);
     } else if (a < b) {
-      return lg2_rooted[b] + lg2_rooted[n - a] - lg2_rooted[b - a + 1];
+      return lg2_rooted_fast<Fast>(b) + lg2_rooted_fast<Fast>(n - a)
+           - lg2_rooted_fast<Fast>(b - a + 1);
     } else {
-      return lg2_rooted[a] + lg2_rooted[n_minus_b] - lg2_rooted[a - b + 1];
+      return lg2_rooted_fast<Fast>(a) + lg2_rooted_fast<Fast>(n_minus_b)
+           - lg2_rooted_fast<Fast>(a - b + 1);
     }
   }
 
 
-// Popcount-based: single pass over bins replaces 4 sequential boolean scans.
+  // Popcount-based: single pass over bins replaces 4 sequential boolean scans.
   // Counts n_ab = |A ∩ B| via hardware POPCNT, then derives all 4 Venn-diagram
   // region populations from arithmetic on n_ab, in_a, in_b, n_tips.
+  // split_int used throughout so in_a + in_b does not overflow (max ~65534 for 32767-tip trees).
+  //
+  // Fast=true: caller guarantees n_tips <= SL_MAX_TIPS, so inner one_overlap*
+  // calls use direct lg2_rooted[] access.  Caller (shared_phylo_score) dispatches
+  // once on n_tips; per-cell branches are eliminated.
+  template<bool Fast = false>
   [[nodiscard]] inline double spi_overlap(const splitbit* a_state, const splitbit* b_state,
-                       const int16 n_tips, const int16 in_a,
-                       const int16 in_b, const int16 n_bins) noexcept {
+                       const split_int n_tips, const split_int in_a,
+                       const split_int in_b, const split_int n_bins) noexcept {
 
     static_assert(SL_MAX_BINS <= INT16_MAX,
                   "int16 too narrow for SL_MAX_BINS");
 
-    int16 n_ab = 0;
-    for (int16 bin = 0; bin < n_bins; ++bin) {
+    split_int n_ab = 0;
+    for (split_int bin = 0; bin < n_bins; ++bin) {
       n_ab += TreeTools::count_bits(a_state[bin] & b_state[bin]);
     }
 
@@ -113,15 +146,15 @@ namespace TreeDist {
     // unrelated splits).  Otherwise return the appropriate one_overlap score.
 
     if (n_ab == 0) {
-      return one_overlap_notb(in_a, in_b, n_tips);
+      return one_overlap_notb<Fast>(in_a, in_b, n_tips);
     }
     if (n_ab == in_b || n_ab == in_a) {
       // B ⊆ A (n_b_only == 0) or A ⊆ B (n_a_only == 0)
-      return one_overlap(in_a, in_b, n_tips);
+      return one_overlap<Fast>(in_a, in_b, n_tips);
     }
     if (in_a + in_b - n_ab == n_tips) {
       // A ∪ B covers all tips (n_neither == 0)
-      return one_overlap_notb(in_a, in_b, n_tips);
+      return one_overlap_notb<Fast>(in_a, in_b, n_tips);
     }
 
     return 0.0;
diff --git a/tests/testthat/test-large-trees.R b/tests/testthat/test-large-trees.R
new file mode 100644
index 00000000..3600fbe5
--- /dev/null
+++ b/tests/testthat/test-large-trees.R
@@ -0,0 +1,127 @@
+library("TreeTools", quietly = TRUE)
+
+# Tests that distance functions handle trees larger than the old SL_MAX_TIPS
+# threshold (2048 in TreeTools <= 2.2.0).  These require TreeTools >= 2.3.0
+# for heap-backed split storage; they skip gracefully on older builds.
+
+test_that("Distance functions handle trees exceeding TT 2.2.0 limit", {
+  skip_on_cran()
+  n <- 2049L
+  skip_if(TreeDist:::cpp_sl_max_tips() < n,
+          "Requires TreeTools >= 2.3.0 (SL_MAX_TIPS > 2048)")
+
+  t1 <- as.phylo(1, n)
+  t2 <- as.phylo(2, n)
+
+  expect_gt(RobinsonFoulds(t1, t2)[[1]], 0)
+  expect_equal(RobinsonFoulds(t1, t1)[[1]], 0)
+
+  expect_gt(InfoRobinsonFoulds(t1, t2)[[1]], 0)
+  expect_equal(InfoRobinsonFoulds(t1, t1)[[1]], 0)
+
+  expect_gt(ClusteringInfoDistance(t1, t2)[[1]], 0)
+  expect_equal(ClusteringInfoDistance(t1, t1)[[1]], 0)
+
+  expect_gt(DifferentPhylogeneticInfo(t1, t2)[[1]], 0)
+  expect_equal(DifferentPhylogeneticInfo(t1, t1)[[1]], 0)
+
+  expect_gt(MatchingSplitInfoDistance(t1, t2)[[1]], 0)
+  expect_equal(MatchingSplitInfoDistance(t1, t1)[[1]], 0)
+})
+
+test_that("Distance scores agree across stack/heap storage threshold", {
+  skip_on_cran()
+  skip_if_not_installed("TreeTools", "2.3.0")
+  # SL_STACK_SPLITS in TreeTools 2.3.0+ admits trees of n_tips <= 2048 to
+  # stack-backed split storage; n_tips == 2049 forces the heap path.  This
+  # test compares each metric on a stack-storage tree pair against the same
+  # pair after inserting a cherry next to tip 1, which both pushes them
+  # across the threshold and leaves their split structure invariant: each
+  # existing split simply gains the new tip on whichever side already
+  # contained tip 1, and one perfectly-matched cherry split is added
+  # (contributing 0 to any distance).
+  stackMax <- 2048L
+  skip_if(TreeDist:::cpp_sl_max_tips() <= stackMax,
+          "Requires TreeTools >= 2.3.0 (heap-backed storage)")
+
+  t1s <- as.phylo(1234, stackMax)
+  t2s <- PectinateTree(stackMax)
+  t1h <- AddTip(t1s, where = 1, label = "extra")
+  t2h <- AddTip(t2s, where = 1, label = "extra")
+
+  # RF (count of unmatched splits) is invariant under the inserted cherry.
+  expect_equal(RobinsonFoulds(t1s, t2s)[[1]],
+               RobinsonFoulds(t1h, t2h)[[1]])
+
+  # Per-split info depends on n_tips and on side sizes, so absolute
+  # information-theoretic distances drift by a few % when n grows by 1;
+  # normalized distances are far less sensitive.
+  tol <- 0.002
+
+  expect_equal(InfoRobinsonFoulds(t1s, t2s, normalize = TRUE)[[1]],
+               InfoRobinsonFoulds(t1h, t2h, normalize = TRUE)[[1]],
+               tolerance = tol)
+  expect_equal(ClusteringInfoDistance(t1s, t2s, normalize = TRUE)[[1]],
+               ClusteringInfoDistance(t1h, t2h, normalize = TRUE)[[1]],
+               tolerance = tol)
+  expect_equal(DifferentPhylogeneticInfo(t1s, t2s, normalize = TRUE)[[1]],
+               DifferentPhylogeneticInfo(t1h, t2h, normalize = TRUE)[[1]],
+               tolerance = tol)
+  expect_equal(MatchingSplitInfoDistance(t1s, t2s, normalize = TRUE)[[1]],
+               MatchingSplitInfoDistance(t1h, t2h, normalize = TRUE)[[1]],
+               tolerance = tol)
+})
+
+test_that("RF and IRF work for 8000-tip trees", {
+  skip_on_cran()
+  skip_if(TreeDist:::cpp_sl_max_tips() < 2049L,
+          "Requires TreeTools >= 2.3.0 (SL_MAX_TIPS > 2048)")
+  skip_if(!getOption("slowMode", FALSE), "Only runs in slow mode")
+
+  n <- 8000
+  t1 <- PectinateTree(n)
+  t2 <- BalancedTree(n)
+
+  expect_gt(RobinsonFoulds(t1, t2)[[1]], 0)
+  expect_equal(RobinsonFoulds(t1, t1)[[1]], 0)
+
+  expect_gt(InfoRobinsonFoulds(t1, t2)[[1]], 0)
+  expect_equal(InfoRobinsonFoulds(t1, t1)[[1]], 0)
+})
+
+test_that("Slow lookup path produces identical SPI to fast path", {
+  skip_on_cran()
+  # Coverage for the n_tips > SL_MAX_TIPS branches in shared_phylo:
+  #   - lg2_*_slow fallback functions
+  #   - one_overlap<false>, one_overlap_notb<false>, spi_overlap<false>
+  # These are only triggered with > 32705 tips in production, but the
+  # cpp_shared_phylo entry accepts a force_slow flag so the same code paths
+  # can be exercised on small trees.  Both paths must produce identical
+  # values: lg2_rooted_lookup() == lg2_rooted[] within the table range.
+  t1 <- as.phylo(0, 30)
+  t2 <- as.phylo(11, 30)
+  splits1 <- as.Splits(t1)
+  splits2 <- as.Splits(t2)
+
+  fast <- TreeDist:::cpp_shared_phylo(splits1, splits2, 30L, force_slow = FALSE)
+  slow <- TreeDist:::cpp_shared_phylo(splits1, splits2, 30L, force_slow = TRUE)
+
+  expect_equal(fast$score, slow$score)
+  expect_equal(fast$matching, slow$matching)
+
+  # Identical-tree case: both paths must report the self-information score.
+  fast_self <- TreeDist:::cpp_shared_phylo(splits1, splits1, 30L,
+                                           force_slow = FALSE)
+  slow_self <- TreeDist:::cpp_shared_phylo(splits1, splits1, 30L,
+                                           force_slow = TRUE)
+  expect_equal(fast_self$score, slow_self$score)
+})
+
+test_that("Tip-count ceiling is enforced correctly", {
+  skip_on_cran()
+  skip_if(TreeDist:::cpp_sl_max_tips() < 2049L,
+          "Requires TreeTools >= 2.3.0 (SL_MAX_TIPS > 2048)")
+
+  expect_no_error(.CheckMaxTips(32767L))
+  expect_error(.CheckMaxTips(32768L), "not yet supported")
+})
diff --git a/tests/testthat/test-real-data.R b/tests/testthat/test-real-data.R
deleted file mode 100644
index d34bbc89..00000000
--- a/tests/testthat/test-real-data.R
+++ /dev/null
@@ -1,32 +0,0 @@
-#if (requireNamespace("rwty", quietly = TRUE)) {
-#  test_that("Fungi MSID/PID differ", {
-#    data("fungus", package = "rwty")
-#    trees <- fungus[[1]]$trees
-#    
-#    expect_equal(PhylogeneticInfoDistance(trees[[1]], trees[[2]]),
-#                 PhylogeneticInfoDistance(trees[1:2])[1])
-#    expect_equal(MatchingSplitInfoDistance(trees[[1]], trees[[2]]),
-#                 MatchingSplitInfoDistance(trees[1:2])[1])
-#    expect_false(identical(PhylogeneticInfoDistance(trees[1:4]),
-#                           MatchingSplitInfoDistance(trees[1:4])))
-#  })
-#  
-#  if (requireNamespace("whalehead", quietly = TRUE)) {
-#    test_that("Whalehead MSID/PID differ", {
-#      whaleRoot <- system.file("Data", package = "whalehead", mustWork = TRUE)
-#      whaleRoot <- "../ProjectWhalehead/Data"
-#      treeFiles <- list.files(path = paste0(whaleRoot, "/CombinedTrees"),
-#                              pattern = "*.nex",
-#                              full.names = TRUE, recursive = FALSE)
-#      trees <- unname(rwty::load.trees(treeFiles[2], trim = 100, skip = 0)$trees)
-#      
-#      expect_equal(PhylogeneticInfoDistance(trees[[1]], trees[[2]]),
-#                   PhylogeneticInfoDistance(trees[1:2])[1])
-#      expect_equal(MatchingSplitInfoDistance(trees[[1]], trees[[2]]),
-#                   MatchingSplitInfoDistance(trees[1:2])[1])
-#      expect_false(identical(PhylogeneticInfoDistance(trees[1:4]),
-#                             MatchingSplitInfoDistance(trees[1:4])))
-#    })
-#  }
-#}
-#
diff --git a/tests/testthat/test-tree_distance_nni.R b/tests/testthat/test-tree_distance_nni.R
index 20bcda23..2259f4e8 100644
--- a/tests/testthat/test-tree_distance_nni.R
+++ b/tests/testthat/test-tree_distance_nni.R
@@ -5,21 +5,32 @@ test_that("NNIDist() handles exceptions", {
                "trees must contain the same number of leaves")
   expect_error(NNIDist(list(PectinateTree(1:8), PectinateTree(8))),
                "trees must bear identical labels")
-  expect_error(NNIDist(list(PectinateTree(1:8), 
+  expect_error(NNIDist(list(PectinateTree(1:8),
                             PectinateTree(as.character(1:8)))),
                "trees must bear identical labels")
-  # R-level guard catches too-many-tips
-  expect_error(NNIDist(PectinateTree(40000), BalancedTree(40000)), "so many tips")
-  
   expect_error(NNIDist(BalancedTree(5), RootOnNode(BalancedTree(5), 1)))
-  
+})
+
+test_that("NNIDist() rejects too many tips (TreeTools >= 2.3.0)", {
+  skip_if(TreeDist:::cpp_sl_max_tips() <= 2048L,
+          "Compiled against TreeTools < 2.3.0 (SL_MAX_TIPS = 2048)")
+  expect_error(NNIDist(PectinateTree(40000), BalancedTree(40000)),
+               "not yet supported")
+  expect_error(.NNIDistSingle(PectinateTree(32769), BalancedTree(32769), 32769L),
+               "not yet supported")
+})
+
+test_that("NNIDist() rejects too many tips (TreeTools < 2.3.0)", {
+  skip_if(TreeDist:::cpp_sl_max_tips() > 2048L,
+          "Compiled against TreeTools >= 2.3.0 (SL_MAX_TIPS > 2048)")
+  expect_error(NNIDist(PectinateTree(40000), BalancedTree(40000)),
+               "exceed the compiled limit")
+  expect_error(.NNIDistSingle(PectinateTree(32769), BalancedTree(32769), 32769L),
+               "exceed the compiled limit")
 })
 
 test_that("NNIDist() at NNI_MAX_TIPS", {
   maxTips <- 32768
-  more <- maxTips + 1
-  expect_error(.NNIDistSingle(PectinateTree(more), BalancedTree(more), more),
-               "so many tips")
   goingQuickly <- TRUE
   skip_if(goingQuickly)
   
diff --git a/tests/testthat/test-tree_distance_spr.R b/tests/testthat/test-tree_distance_spr.R
index d97c0263..ff0a3ee8 100644
--- a/tests/testthat/test-tree_distance_spr.R
+++ b/tests/testthat/test-tree_distance_spr.R
@@ -115,6 +115,7 @@ test_that("SPR shortcuts okay - exhaustive", {
 
 test_that("SPR shortcuts okay - known answer", {
   skip_if_not(getOption("slowMode", FALSE))
+  skip_if_not_installed("TBRDist")
   library("TreeTools", quietly = TRUE)
   set.seed(0)
   trees <- lapply(1:8, function(XX) RandomTree(9, root = TRUE))
diff --git a/tests/testthat/test-tree_distance_utilities.R b/tests/testthat/test-tree_distance_utilities.R
index 4a7eff99..3579e072 100644
--- a/tests/testthat/test-tree_distance_utilities.R
+++ b/tests/testthat/test-tree_distance_utilities.R
@@ -31,35 +31,64 @@ test_that("CalculateTreeDistance() errs appropriately", {
   expect_error(CalculateTreeDistance(RobinsonFouldsSplits, BalancedTree(8), "Not a tree"))
 })
 
-test_that(".SL_MAX_TIPS is populated", {
-  expect_true(is.integer(TreeDist:::.SL_MAX_TIPS))
-  expect_true(TreeDist:::.SL_MAX_TIPS >= 2048L)
+test_that("cpp_sl_max_tips() returns a valid value", {
+  expect_true(is.integer(TreeDist:::cpp_sl_max_tips()))
+  expect_true(TreeDist:::cpp_sl_max_tips() >= 2048L)
 })
 
 test_that("Tip-count guard is applied consistently", {
   expect_no_error(.CheckMaxTips(1000L))
-  
-  limit32704 <- TreeDist:::.SL_MAX_TIPS == 32704L
-  if (limit32704) expect_no_error(.CheckMaxTips(32704L))
-  
-  errMsg <- if (limit32704) {
-    "Trees with 327.. tips are not yet supported \\(maximum 32704\\)"
-  } else {
-    "Trees with 327.. tips exceed the compiled limit of 2048"
-  }
-  expect_error(.CheckMaxTips(32705L), errMsg)
-  
+
+  # Direct C++ calls bypass the R guard; check_ntip() always says "not yet supported".
+  cppErrMsg <- "Trees with 327.. tips are not yet supported \\(maximum 32767\\)"
   splits8 <- unclass(as.Splits(BalancedTree(8)))
-  expect_error(cpp_robinson_foulds_distance(splits8, splits8, 32768L),
-               errMsg)
-  expect_error(cpp_robinson_foulds_info(splits8, splits8, 32768L),
-               errMsg)
+  expect_error(cpp_robinson_foulds_distance(splits8, splits8, 32768L), cppErrMsg)
+  expect_error(cpp_robinson_foulds_info(splits8, splits8, 32768L), cppErrMsg)
+})
+
+test_that("Tip-count guard accepts up to 32767 tips (TreeTools >= 2.3.0)", {
+  skip_if(TreeDist:::cpp_sl_max_tips() <= 2048L,
+          "Compiled against TreeTools < 2.3.0 (SL_MAX_TIPS = 2048)")
+  expect_no_error(.CheckMaxTips(32705L))
+  expect_no_error(.CheckMaxTips(32767L))
+  rErrMsg <- "Trees with 327.. tips are not yet supported \\(maximum 32767\\)"
+  expect_error(.CheckMaxTips(32768L), rErrMsg)
+
+  trees <- list(BalancedTree(8), PectinateTree(8))
+  class(trees) <- "multiPhylo"
+  expect_error(
+    .SplitDistanceAllPairs(RobinsonFouldsSplits, trees, letters[1:8], 32768L),
+    rErrMsg
+  )
+})
+
+test_that("Tip-count guard caps at SL_MAX_TIPS (TreeTools < 2.3.0)", {
+  skip_if(TreeDist:::cpp_sl_max_tips() > 2048L,
+          "Compiled against TreeTools >= 2.3.0 (SL_MAX_TIPS > 2048)")
+  rErrMsg <- "Trees with 327.. tips exceed the compiled limit of 2048"
+  expect_error(.CheckMaxTips(32705L), rErrMsg)
 
   trees <- list(BalancedTree(8), PectinateTree(8))
   class(trees) <- "multiPhylo"
   expect_error(
     .SplitDistanceAllPairs(RobinsonFouldsSplits, trees, letters[1:8], 32768L),
-    errMsg
+    rErrMsg
+  )
+})
+
+test_that("Tip-count guard: TT < 2.3.0 code path (mocked)", {
+  local_mocked_bindings(
+    cpp_sl_max_tips = function() 2048L,
+    .package = "TreeDist"
+  )
+  expect_no_error(.CheckMaxTips(100L))
+  expect_error(.CheckMaxTips(3000L),
+               "Trees with 3000 tips exceed the compiled limit of 2048")
+  trees <- list(BalancedTree(8), PectinateTree(8))
+  class(trees) <- "multiPhylo"
+  expect_error(
+    .SplitDistanceAllPairs(RobinsonFouldsSplits, trees, letters[1:8], 3000L),
+    "Trees with 3000 tips exceed the compiled limit of 2048"
   )
 })