From bed0adcc803b1b6f29d8cf0bc8af19f280e83904 Mon Sep 17 00:00:00 2001 From: abetlen Date: Sun, 17 May 2026 20:13:01 -0700 Subject: [PATCH 01/13] feat: update llama.cpp to dd7cad719 --- CHANGELOG.md | 2 +- llama_cpp/llama_cpp.py | 21 +++++++++++++++++++++ vendor/llama.cpp | 2 +- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36e4fa168..3c63bc070 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -- feat: Update llama.cpp to ggerganov/llama.cpp@91e84fed6 and sync Python bindings +- feat: Update llama.cpp to ggml-org/llama.cpp@dd7cad719 and sync Python bindings - chore: Migrate llama.cpp submodule URL to ggml-org/llama.cpp by @shalinib-ibm in #2034 - fix: Enable unified KV cache for embedding contexts to preserve full per-sequence context in batch embedding calls by @SanjanaB123 in #2217 diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index a9c32a15b..6560b5178 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -516,6 +516,14 @@ def _warn_deprecated(symbol: str, hint: str) -> None: LLAMA_SPLIT_MODE_TENSOR = 3 +# enum llama_context_type { +# LLAMA_CONTEXT_TYPE_DEFAULT = 0, +# LLAMA_CONTEXT_TYPE_MTP = 1, +# }; +LLAMA_CONTEXT_TYPE_DEFAULT = 0 +LLAMA_CONTEXT_TYPE_MTP = 1 + + # typedef struct llama_token_data { # llama_token id; // token id # float logit; // log-odds of the token @@ -894,9 +902,11 @@ class llama_sampler_seq_config(ctypes.Structure): # uint32_t n_batch; // logical maximum batch size that can be submitted to llama_decode # uint32_t n_ubatch; // physical maximum batch size # uint32_t n_seq_max; // max number of sequences (i.e. distinct states for recurrent models) +# uint32_t n_rs_seq; // number of recurrent-state snapshots per seq for rollback (0 = no rollback) [EXPERIMENTAL] # int32_t n_threads; // number of threads to use for generation # int32_t n_threads_batch; // number of threads to use for batch processing +# enum llama_context_type ctx_type; // set the context type (e.g. MTP) # enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type` # enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id # enum llama_attention_type attention_type; // attention type to use for embeddings @@ -947,8 +957,10 @@ class llama_context_params(ctypes.Structure): n_batch (int): logical maximum batch size that can be submitted to llama_decode n_ubatch (int): physical maximum batch size n_seq_max (int): max number of sequences (i.e. distinct states for recurrent models) + n_rs_seq (int): number of recurrent-state snapshots per sequence for rollback n_threads (int): number of threads to use for generation n_threads_batch (int): number of threads to use for batch processing + ctx_type (int): context type, from `enum llama_context_type` rope_scaling_type (int): RoPE scaling type, from `enum llama_rope_scaling_type` pooling_type (int): whether to pool (sum) embedding results by sequence id (ignored if no pooling layer) attention_type (int): attention type to use for embeddings @@ -982,8 +994,10 @@ class llama_context_params(ctypes.Structure): n_batch: int n_ubatch: int n_seq_max: int + n_rs_seq: int n_threads: int n_threads_batch: int + ctx_type: int rope_scaling_type: int pooling_type: int attention_type: int @@ -1016,8 +1030,10 @@ class llama_context_params(ctypes.Structure): ("n_batch", ctypes.c_uint32), ("n_ubatch", ctypes.c_uint32), ("n_seq_max", ctypes.c_uint32), + ("n_rs_seq", ctypes.c_uint32), ("n_threads", ctypes.c_int32), ("n_threads_batch", ctypes.c_int32), + ("ctx_type", ctypes.c_int), ("rope_scaling_type", ctypes.c_int), ("pooling_type", ctypes.c_int), ("attention_type", ctypes.c_int), @@ -1591,6 +1607,11 @@ def llama_n_ubatch(ctx: llama_context_p, /) -> int: ... def llama_n_seq_max(ctx: llama_context_p, /) -> int: ... +# LLAMA_API uint32_t llama_n_rs_seq (const struct llama_context * ctx); +@ctypes_function("llama_n_rs_seq", [llama_context_p_ctypes], ctypes.c_uint32) +def llama_n_rs_seq(ctx: llama_context_p, /) -> int: ... + + # DEPRECATED(LLAMA_API int32_t llama_n_ctx_train(const struct llama_model * model), "use llama_model_n_ctx_train instead"); @ctypes_function("llama_n_ctx_train", [llama_model_p_ctypes], ctypes.c_int32) def llama_n_ctx_train(model: llama_model_p, /) -> int: ... diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 91e84fed6..dd7cad719 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 91e84fed64329cd96202d68220724a1d92f5ec1f +Subproject commit dd7cad7197f991b18ded6aca46ff095972b95318 From 71898123b8a354e6dbff8a32c9805bc9c2e90e7b Mon Sep 17 00:00:00 2001 From: abetlen Date: Sun, 17 May 2026 23:20:06 -0700 Subject: [PATCH 02/13] feat: update llama.cpp to c3f95c1f0 --- CHANGELOG.md | 2 +- vendor/llama.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c63bc070..9f6700884 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -- feat: Update llama.cpp to ggml-org/llama.cpp@dd7cad719 and sync Python bindings +- feat: Update llama.cpp to ggml-org/llama.cpp@c3f95c1f0 and sync Python bindings - chore: Migrate llama.cpp submodule URL to ggml-org/llama.cpp by @shalinib-ibm in #2034 - fix: Enable unified KV cache for embedding contexts to preserve full per-sequence context in batch embedding calls by @SanjanaB123 in #2217 diff --git a/vendor/llama.cpp b/vendor/llama.cpp index dd7cad719..c3f95c1f0 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit dd7cad7197f991b18ded6aca46ff095972b95318 +Subproject commit c3f95c1f069c91e21b8063b09907a5fba38d1695 From d1f4bf67677a575cab418465057ea50695f22b79 Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 18 May 2026 00:05:15 -0700 Subject: [PATCH 03/13] ci: add linux qwen35 failure diagnostics --- .github/workflows/test.yaml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 8a6845ff2..e68e4b6b1 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -33,6 +33,7 @@ jobs: needs: download-model runs-on: ubuntu-latest strategy: + fail-fast: false matrix: python-version: ["3.9", "3.10", "3.11", "3.12"] steps: @@ -50,6 +51,11 @@ jobs: with: path: ~/.cache/huggingface/hub key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }} + - name: System Info + run: | + uname -a + lscpu + python -c "import platform; print(platform.machine(), platform.architecture())" - name: Install dependencies (Linux/MacOS) run: | python -m pip install --upgrade pip @@ -58,7 +64,7 @@ jobs: shell: bash - name: Test with pytest run: | - python -m pytest + python -m pytest -q -s tests/test_llama.py::test_real_model build-windows: needs: download-model From e2df6b4a0254eb49d390bdbe36b023aeb4cbd024 Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 18 May 2026 00:13:24 -0700 Subject: [PATCH 04/13] ci: instrument qwen35 native assertion --- vendor/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendor/llama.cpp b/vendor/llama.cpp index c3f95c1f0..aac47ea1e 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit c3f95c1f069c91e21b8063b09907a5fba38d1695 +Subproject commit aac47ea1e8d00ca5e52dde54008974980f81fe09 From 821905c52101753dd3627c9950e2bdefff6c798e Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 18 May 2026 00:13:58 -0700 Subject: [PATCH 05/13] ci: instrument get_rows_f32 assertion --- vendor/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendor/llama.cpp b/vendor/llama.cpp index aac47ea1e..8e34af8cc 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit aac47ea1e8d00ca5e52dde54008974980f81fe09 +Subproject commit 8e34af8cc4fbc6351914e653b3ca9cf564922d5e From 8d3139ec37523874a28d0b326de5a0e6685efedd Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 18 May 2026 00:14:30 -0700 Subject: [PATCH 06/13] ci: target get_rows_f32 assertion instrumentation --- vendor/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 8e34af8cc..8e602f69c 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 8e34af8cc4fbc6351914e653b3ca9cf564922d5e +Subproject commit 8e602f69cc7abc9257e42a8034ea167f9585b7d7 From 40b2083a5cba034de9b2ce30e1f697619cb65a1f Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 18 May 2026 00:16:00 -0700 Subject: [PATCH 07/13] ci: restore upstream llama.cpp pointer --- vendor/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 8e602f69c..c3f95c1f0 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 8e602f69cc7abc9257e42a8034ea167f9585b7d7 +Subproject commit c3f95c1f069c91e21b8063b09907a5fba38d1695 From 463316637acd152f48a802b964dd3888711ac27d Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 18 May 2026 00:16:32 -0700 Subject: [PATCH 08/13] ci: patch get_rows_f32 diagnostics on linux --- .github/workflows/test.yaml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index e68e4b6b1..b87cf2d2f 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -56,6 +56,31 @@ jobs: uname -a lscpu python -c "import platform; print(platform.machine(), platform.architecture())" + - name: Instrument Linux get_rows_f32 assertion + run: | + python - <<'PY' + from pathlib import Path + + path = Path("vendor/llama.cpp/ggml/src/ggml-cpu/ops.cpp") + text = path.read_text() + old = """ GGML_ASSERT(i01 >= 0 && i01 < ne01); + + ggml_vec_cpy_f32(nc,""" + new = """ if (!(i01 >= 0 && i01 < ne01)) { + fprintf(stderr, + "%s: invalid row index in get_rows_f32: dst=%s src0=%s src1=%s i=%lld i10=%lld i11=%lld i12=%lld i01=%lld ne01=%lld src1_ne=[%lld,%lld,%lld,%lld]\\n", + __func__, dst->name, src0->name, src1->name, + (long long) i, (long long) i10, (long long) i11, (long long) i12, + (long long) i01, (long long) ne01, + (long long) ne10, (long long) ne11, (long long) ne12, (long long) ne13); + } + GGML_ASSERT(i01 >= 0 && i01 < ne01); + + ggml_vec_cpy_f32(nc,""" + if old not in text: + raise SystemExit("get_rows_f32 assertion pattern not found") + path.write_text(text.replace(old, new, 1)) + PY - name: Install dependencies (Linux/MacOS) run: | python -m pip install --upgrade pip From 43b25bf9fcc04f8717f4d30f71d368af9917dddf Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 18 May 2026 00:21:36 -0700 Subject: [PATCH 09/13] ci: fix diagnostic workflow syntax --- .github/workflows/test.yaml | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index b87cf2d2f..623afd150 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -63,20 +63,19 @@ jobs: path = Path("vendor/llama.cpp/ggml/src/ggml-cpu/ops.cpp") text = path.read_text() - old = """ GGML_ASSERT(i01 >= 0 && i01 < ne01); - - ggml_vec_cpy_f32(nc,""" - new = """ if (!(i01 >= 0 && i01 < ne01)) { - fprintf(stderr, - "%s: invalid row index in get_rows_f32: dst=%s src0=%s src1=%s i=%lld i10=%lld i11=%lld i12=%lld i01=%lld ne01=%lld src1_ne=[%lld,%lld,%lld,%lld]\\n", - __func__, dst->name, src0->name, src1->name, - (long long) i, (long long) i10, (long long) i11, (long long) i12, - (long long) i01, (long long) ne01, - (long long) ne10, (long long) ne11, (long long) ne12, (long long) ne13); - } - GGML_ASSERT(i01 >= 0 && i01 < ne01); - - ggml_vec_cpy_f32(nc,""" + old = " GGML_ASSERT(i01 >= 0 && i01 < ne01);\\n\\n ggml_vec_cpy_f32(nc," + new = ( + " if (!(i01 >= 0 && i01 < ne01)) {\\n" + " fprintf(stderr,\\n" + " \"%s: invalid row index in get_rows_f32: dst=%s src0=%s src1=%s i=%lld i10=%lld i11=%lld i12=%lld i01=%lld ne01=%lld src1_ne=[%lld,%lld,%lld,%lld]\\\\n\",\\n" + " __func__, dst->name, src0->name, src1->name,\\n" + " (long long) i, (long long) i10, (long long) i11, (long long) i12,\\n" + " (long long) i01, (long long) ne01,\\n" + " (long long) ne10, (long long) ne11, (long long) ne12, (long long) ne13);\\n" + " }\\n" + " GGML_ASSERT(i01 >= 0 && i01 < ne01);\\n\\n" + " ggml_vec_cpy_f32(nc," + ) if old not in text: raise SystemExit("get_rows_f32 assertion pattern not found") path.write_text(text.replace(old, new, 1)) From d2b183742481847742c6bb1d873b81329c34c78a Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 18 May 2026 00:26:23 -0700 Subject: [PATCH 10/13] ci: fix diagnostic newline matching --- .github/workflows/test.yaml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 623afd150..32826b0ee 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -63,17 +63,17 @@ jobs: path = Path("vendor/llama.cpp/ggml/src/ggml-cpu/ops.cpp") text = path.read_text() - old = " GGML_ASSERT(i01 >= 0 && i01 < ne01);\\n\\n ggml_vec_cpy_f32(nc," + old = " GGML_ASSERT(i01 >= 0 && i01 < ne01);\n\n ggml_vec_cpy_f32(nc," new = ( - " if (!(i01 >= 0 && i01 < ne01)) {\\n" - " fprintf(stderr,\\n" - " \"%s: invalid row index in get_rows_f32: dst=%s src0=%s src1=%s i=%lld i10=%lld i11=%lld i12=%lld i01=%lld ne01=%lld src1_ne=[%lld,%lld,%lld,%lld]\\\\n\",\\n" - " __func__, dst->name, src0->name, src1->name,\\n" - " (long long) i, (long long) i10, (long long) i11, (long long) i12,\\n" - " (long long) i01, (long long) ne01,\\n" - " (long long) ne10, (long long) ne11, (long long) ne12, (long long) ne13);\\n" - " }\\n" - " GGML_ASSERT(i01 >= 0 && i01 < ne01);\\n\\n" + " if (!(i01 >= 0 && i01 < ne01)) {\n" + " fprintf(stderr,\n" + " \"%s: invalid row index in get_rows_f32: dst=%s src0=%s src1=%s i=%lld i10=%lld i11=%lld i12=%lld i01=%lld ne01=%lld src1_ne=[%lld,%lld,%lld,%lld]\\\\n\",\n" + " __func__, dst->name, src0->name, src1->name,\n" + " (long long) i, (long long) i10, (long long) i11, (long long) i12,\n" + " (long long) i01, (long long) ne01,\n" + " (long long) ne10, (long long) ne11, (long long) ne12, (long long) ne13);\n" + " }\n" + " GGML_ASSERT(i01 >= 0 && i01 < ne01);\n\n" " ggml_vec_cpy_f32(nc," ) if old not in text: From 51e540e5fcf04da9f2e38064b58ae5785b9d887a Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 18 May 2026 00:38:34 -0700 Subject: [PATCH 11/13] ci: test qwen35 pre-norm flag initialization --- .github/workflows/test.yaml | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 32826b0ee..84dda9c3d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -56,28 +56,21 @@ jobs: uname -a lscpu python -c "import platform; print(platform.machine(), platform.architecture())" - - name: Instrument Linux get_rows_f32 assertion + - name: Patch Linux Qwen3.5 pre-norm flag initialization run: | python - <<'PY' from pathlib import Path - path = Path("vendor/llama.cpp/ggml/src/ggml-cpu/ops.cpp") + path = Path("vendor/llama.cpp/src/llama-context.cpp") text = path.read_text() - old = " GGML_ASSERT(i01 >= 0 && i01 < ne01);\n\n ggml_vec_cpy_f32(nc," + old = " cparams.embeddings = params.embeddings;\n cparams.embeddings_pre_norm = false;\n" new = ( - " if (!(i01 >= 0 && i01 < ne01)) {\n" - " fprintf(stderr,\n" - " \"%s: invalid row index in get_rows_f32: dst=%s src0=%s src1=%s i=%lld i10=%lld i11=%lld i12=%lld i01=%lld ne01=%lld src1_ne=[%lld,%lld,%lld,%lld]\\\\n\",\n" - " __func__, dst->name, src0->name, src1->name,\n" - " (long long) i, (long long) i10, (long long) i11, (long long) i12,\n" - " (long long) i01, (long long) ne01,\n" - " (long long) ne10, (long long) ne11, (long long) ne12, (long long) ne13);\n" - " }\n" - " GGML_ASSERT(i01 >= 0 && i01 < ne01);\n\n" - " ggml_vec_cpy_f32(nc," + " cparams.embeddings = params.embeddings;\n" + " cparams.embeddings_pre_norm = false;\n" + " cparams.embeddings_pre_norm_masked = false;\n" ) if old not in text: - raise SystemExit("get_rows_f32 assertion pattern not found") + raise SystemExit("cparams pre-norm initialization pattern not found") path.write_text(text.replace(old, new, 1)) PY - name: Install dependencies (Linux/MacOS) From 7c7213e6a59632a9e342ca9c7a1a20b7f2c3a4da Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 18 May 2026 01:00:25 -0700 Subject: [PATCH 12/13] ci: test qwen35 fix from submodule --- .github/workflows/test.yaml | 25 +------------------------ vendor/llama.cpp | 2 +- 2 files changed, 2 insertions(+), 25 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 84dda9c3d..8a6845ff2 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -33,7 +33,6 @@ jobs: needs: download-model runs-on: ubuntu-latest strategy: - fail-fast: false matrix: python-version: ["3.9", "3.10", "3.11", "3.12"] steps: @@ -51,28 +50,6 @@ jobs: with: path: ~/.cache/huggingface/hub key: ${{ runner.os }}-model-${{ env.REPO_ID }}-${{ env.MODEL_FILE }} - - name: System Info - run: | - uname -a - lscpu - python -c "import platform; print(platform.machine(), platform.architecture())" - - name: Patch Linux Qwen3.5 pre-norm flag initialization - run: | - python - <<'PY' - from pathlib import Path - - path = Path("vendor/llama.cpp/src/llama-context.cpp") - text = path.read_text() - old = " cparams.embeddings = params.embeddings;\n cparams.embeddings_pre_norm = false;\n" - new = ( - " cparams.embeddings = params.embeddings;\n" - " cparams.embeddings_pre_norm = false;\n" - " cparams.embeddings_pre_norm_masked = false;\n" - ) - if old not in text: - raise SystemExit("cparams pre-norm initialization pattern not found") - path.write_text(text.replace(old, new, 1)) - PY - name: Install dependencies (Linux/MacOS) run: | python -m pip install --upgrade pip @@ -81,7 +58,7 @@ jobs: shell: bash - name: Test with pytest run: | - python -m pytest -q -s tests/test_llama.py::test_real_model + python -m pytest build-windows: needs: download-model diff --git a/vendor/llama.cpp b/vendor/llama.cpp index c3f95c1f0..e02216170 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit c3f95c1f069c91e21b8063b09907a5fba38d1695 +Subproject commit e02216170bf21d1a8f9c63540070ec1d428f8879 From 9b541bccad9cb35a27014120a873f05bb7d70260 Mon Sep 17 00:00:00 2001 From: abetlen Date: Mon, 18 May 2026 07:24:46 -0700 Subject: [PATCH 13/13] feat: update llama.cpp to b9a2170fc --- CHANGELOG.md | 2 +- vendor/llama.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f6700884..18c6af161 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -- feat: Update llama.cpp to ggml-org/llama.cpp@c3f95c1f0 and sync Python bindings +- feat: Update llama.cpp to ggml-org/llama.cpp@b9a2170fc and sync Python bindings - chore: Migrate llama.cpp submodule URL to ggml-org/llama.cpp by @shalinib-ibm in #2034 - fix: Enable unified KV cache for embedding contexts to preserve full per-sequence context in batch embedding calls by @SanjanaB123 in #2217 diff --git a/vendor/llama.cpp b/vendor/llama.cpp index e02216170..b9a2170fc 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit e02216170bf21d1a8f9c63540070ec1d428f8879 +Subproject commit b9a2170fce1f3f33cb4934b34efecb806bbbb348