From 74e77f50e1657a30e268439dd2fe65d1dc97cf2d Mon Sep 17 00:00:00 2001 From: Scott Roy Date: Fri, 26 Jun 2026 17:47:59 -0700 Subject: [PATCH] up --- examples/models/gemma4_31b/gemma4_31b_engine.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/examples/models/gemma4_31b/gemma4_31b_engine.cpp b/examples/models/gemma4_31b/gemma4_31b_engine.cpp index 5813372abec..b572f0b928d 100644 --- a/examples/models/gemma4_31b/gemma4_31b_engine.cpp +++ b/examples/models/gemma4_31b/gemma4_31b_engine.cpp @@ -684,7 +684,15 @@ Result> Gemma4_31BEngine::create( } } #elif defined(EXECUTORCH_BUILD_MLX) - mutable_state = std::make_unique(); + // Only enable the per-session mutable-buffer path when actually serving more + // than one session. For a single session (the CLI runner) the rebind would + // allocate a second copy of the KV-cache buffers on top of the program's + // default buffers — doubling KV-cache memory and adding a one-time + // session-buffer allocation during the first prefill — for no isolation + // benefit. Leaving mutable_state null keeps the program's default buffers. + if (config.max_sessions > 1) { + mutable_state = std::make_unique(); + } #endif auto module_res = mutable_state != nullptr