Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
2528 commits
Select commit Hold shift + click to select a range
5e368d7
test(kv): add Good/Bad/Ugly + Example coverage (AX)
Snider Jun 15, 2026
2173249
test(metal): add Example coverage for cache.go restore/truncate (AX)
Snider Jun 15, 2026
6e465f9
test(metal): cover stream/array/metal/quantized/turboquant (Good/Bad/…
Snider Jun 15, 2026
65191e4
test(kv): cover SaveStateBlocks / LoadFromStateBlocks / AssembleBlock…
Snider Jun 15, 2026
c41d44e
fix(gguf): correct Q2_K/Q3_K/Q6_K/Q8_K encoder block layout to canoni…
Snider Jun 15, 2026
d1ad054
test(probe): add Good/Bad/Ugly + Example coverage (AX)
Snider Jun 15, 2026
87f2dbd
test(daemon): add Good/Bad/Ugly + Example coverage (AX)
Snider Jun 15, 2026
4fea43c
test(chaptersmoke): add Good/Bad/Ugly + Example coverage (AX)
Snider Jun 15, 2026
32e5cff
test(cpp): cover decode + activation bridges (AX)
Snider Jun 15, 2026
ffe5adb
test(tokenizer): add Good/Bad/Ugly + Example coverage (AX)
Snider Jun 15, 2026
4473639
test(metal): cover lora/tokenizer/kernel/sample/decode-gates/mlp (Goo…
Snider Jun 15, 2026
3e7714c
test(grpo): add Good/Bad/Ugly + Example coverage (AX)
Snider Jun 15, 2026
f897ef8
test(memory): add Good/Bad/Ugly + Example coverage (AX)
Snider Jun 15, 2026
be201f4
test(metal): cover decode.go OutputAt bounds guard (Good, AX)
Snider Jun 15, 2026
7a08567
test(grpo): unique example checkpoint dir to keep TempDir isolation
Snider Jun 15, 2026
69818f1
test(distill): add Good/Bad/Ugly + Example coverage (AX)
Snider Jun 15, 2026
02e54d4
test(openai): add Good/Bad/Ugly + Example coverage (AX)
Snider Jun 15, 2026
078dce9
test(dataset): add Good/Bad/Ugly + Example coverage (AX)
Snider Jun 15, 2026
f03406c
test(substrate): add Good/Bad/Ugly + Example coverage (AX)
Snider Jun 15, 2026
889f6c6
chore(codecov): drop stale go/mlxlm ignore entry (package deleted in …
Snider Jun 15, 2026
53e266d
test(gemma4): vision/audio/projector branch coverage (AX)
Snider Jun 15, 2026
694019e
test(kv): cover analysis nil-guards + snapshot short-dtype/empty/trun…
Snider Jun 15, 2026
504c0de
test(gguf): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
899de60
test(gemma4): align + audio/mel example-in-situ coverage (AX)
Snider Jun 15, 2026
82f5f53
test(gemma4): cover audio branch of unified multimodal forward (AX)
Snider Jun 15, 2026
8c5000c
test(safetensors): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
70316dc
test(session): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
34d0cf8
test(gemma4): vision-rope/pooler/decode example-in-situ coverage (AX)
Snider Jun 15, 2026
a3b9704
test(kv): align to AX standard + example-in-situ deep-path coverage (AX)
Snider Jun 15, 2026
30bc1fa
test(merge): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
e1b7d82
test(gemma4): rename vision rope/pooler coverage file off _example_ (…
Snider Jun 15, 2026
295516f
test(train): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
f8154d4
test(mlx): align + synthetic-reachable example-in-situ coverage (AX)
Snider Jun 15, 2026
b7bee90
test(lora): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
c53edac
test(compute): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
31b55fa
test(qwen3): align + synthetic example-in-situ coverage (AX)
Snider Jun 15, 2026
2c8dbd5
test(minimax/m2): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
69dc5b3
test(qwen3): file free-func + staged examples at package level (godoc…
Snider Jun 15, 2026
8429e43
test(agent): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
fc03fbc
test(composed): add Good/Bad/Ugly + Example coverage (AX)
Snider Jun 15, 2026
fa636bd
test(gemma3): align + synthetic example-in-situ coverage (AX)
Snider Jun 15, 2026
4e27315
test(blockcache): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
367df04
test(profile): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
edb71f5
test(kimi): align + synthetic example-in-situ coverage (AX)
Snider Jun 15, 2026
199a858
test(score): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
24b1a46
test(mixtral): align + synthetic example-in-situ coverage (AX)
Snider Jun 15, 2026
5799f4c
test(distill): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
05fbe5f
test(chaptersmoke): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
fd5d555
test(metal): synthetic-model session/generate/cache coverage (AX)
Snider Jun 15, 2026
9287a5b
test(kvconv): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
fa71520
test(metal): KV-blocks capture->restore round trip (AX)
Snider Jun 15, 2026
7f17808
test(bert): cover staged-loader real paths (AX)
Snider Jun 15, 2026
7198ebf
test(gptoss): align + synthetic example-in-situ coverage (AX)
Snider Jun 15, 2026
bcab970
test(deepseek): cover staged-loader real paths (AX)
Snider Jun 15, 2026
da2b188
test(cmd/mlx): synthetic-model command coverage (AX)
Snider Jun 15, 2026
009d8c0
test(mixtral): cover init loader closure via metal.LoadAndInit (100%)
Snider Jun 15, 2026
f936a7d
test(openai): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
cc91dc5
test(grpo): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
a3c8fda
test(gemma4): synthetic-model load/Generate coverage (AX)
Snider Jun 15, 2026
2d37f9e
test(model): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
e596af9
test(memorypretrain): align + example-in-situ coverage (AX)
Snider Jun 15, 2026
16a6c53
fix(metal): finish mla-latent bounded cache — rotatingLatentKVCache f…
Snider Jun 15, 2026
893ea48
chore(codecov): commit curated config — 98% project target, ignore on…
Snider Jun 15, 2026
43bc513
refactor(memvid): remove deprecated memvid/QR-video store backend
Snider Jun 15, 2026
105fa25
refactor(safetensors): remove dead wrapper funcs superseded by *Scrat…
Snider Jun 15, 2026
88f0266
refactor(gguf): remove orphaned quantizeGGUFValues format switch (#209)
Snider Jun 15, 2026
7c6211f
refactor(score): remove dead non-fast-path phonetic helpers (#209)
Snider Jun 15, 2026
76302b5
refactor(minimax/tokenizer): remove dead candidate-builder + non-Byte…
Snider Jun 15, 2026
17add54
refactor(model): remove six zero-caller pack.go wrappers (#209)
Snider Jun 15, 2026
2584c46
refactor(model): extract chat-template inspection to pack_chattemplat…
Snider Jun 15, 2026
1819eec
refactor(model): extract modelPackDirIndex to pack_dirindex.go (AX-3/…
Snider Jun 15, 2026
b158715
refactor(model): extract task-profile inspection to pack_taskprofiles…
Snider Jun 15, 2026
d2ef082
refactor(model): extract quant-detector inspection to pack_quantinspe…
Snider Jun 15, 2026
a4677e3
refactor(kv): extract snapshot tensor windowing/slicing to blocks_win…
Snider Jun 15, 2026
c885d96
refactor(kv): extract bundle-load to blocks_load.go (AX-3/AX-7)
Snider Jun 15, 2026
e8b6844
refactor(kv): pair window unit tests into blocks_window_test.go (AX t…
Snider Jun 15, 2026
57d1b06
refactor(kv): extract block assembly to blocks_assemble.go (AX-3/AX-7)
Snider Jun 15, 2026
5920d6c
refactor(kv): extract block save/encode to blocks_save.go (AX-3/AX-7/…
Snider Jun 15, 2026
2fc04ab
refactor(kv): extract snapshot dtype/quant helpers to snapshot_dtype.…
Snider Jun 15, 2026
9862e35
refactor(kv): split snapshot.go serialise paths into snapshot_{encode…
Snider Jun 15, 2026
19dfc06
test(kv): pair snapshot dtype/decode unit tests into concern test fil…
Snider Jun 15, 2026
c2cc2e4
refactor(metal): peel attention-inspect/cache-policy/prefetch off gen…
Snider Jun 15, 2026
8493b10
test(metal): pair generate cache/attention/prefetch unit tests to con…
Snider Jun 15, 2026
d29521f
refactor(minimax/m2): split m2.go into load/route/metal concern files…
Snider Jun 15, 2026
e75062f
test(minimax/m2): pair load/route/metal tests to concern test files (…
Snider Jun 15, 2026
707e1eb
refactor(gguf): split quantize.go into kernels/writer concern files (…
Snider Jun 15, 2026
565328d
refactor(gguf): split info.go into parse/quant concern files (+ paire…
Snider Jun 15, 2026
dca5d77
refactor(hf): split hf.go into jang/fit concern files (+ paired tests…
Snider Jun 15, 2026
5c931e0
refactor(merge): split merge.go into write/copy concern files (+ pair…
Snider Jun 15, 2026
022389c
refactor(distill): split distill.go into loss/checkpoint concern file…
Snider Jun 15, 2026
3101eb0
refactor(train): split sft.go into batch/checkpoint/epoch concern fil…
Snider Jun 15, 2026
259b03b
refactor(compute): extract Metal kernel-source registry to compute_ke…
Snider Jun 15, 2026
0f15e17
refactor(grpo): split grpo.go into reward/checkpoint concern files (+…
Snider Jun 15, 2026
0ba2110
test(distill): route loss/checkpoint Example+Benchmark tests to conce…
Snider Jun 15, 2026
a09aabb
test(grpo): route reward/checkpoint Example+Benchmark tests to concer…
Snider Jun 15, 2026
5a3406f
test(hf): route jang/fit Example+Benchmark tests to concern files (AX…
Snider Jun 15, 2026
8a1bd4c
test(train): route sft batch/checkpoint/epoch Example+Benchmark tests…
Snider Jun 15, 2026
2cab68e
test(merge): route write-math Benchmarks to merge_write_bench_test.go…
Snider Jun 15, 2026
a55ed49
test(gguf): route quantize-kernel Benchmarks + NormalizeQuantType Exa…
Snider Jun 15, 2026
df6edae
test(kv): route snapshot encode/decode + blocks load/assemble/save Ex…
Snider Jun 15, 2026
9936ba5
test(minimax/m2): route load/route Example+Benchmark tests to concern…
Snider Jun 15, 2026
68d09db
test(metal): route prefetch/attention-inspect Example+Benchmark tests…
Snider Jun 15, 2026
bfd516e
test(hf): move InferJANG unit tests to hf_jang_test.go (AX triplet — …
Snider Jun 15, 2026
8fb72ff
fix(gemma4): legacy dappco.re/go/core import → dappco.re/go (v0.9.0 a…
Snider Jun 15, 2026
87f02e2
test(distill): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
8f88a5a
test(grpo): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
83f57da
test(compute): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
d593cc5
test(safetensors): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
56eab6e
test(gguf): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
e162827
test(minimax/m2): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
3de3f04
test(merge): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
ca1e02a
test(train): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
77b256f
test(adapter): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
e3f5ca4
test(hf): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
82f774e
test(blockcache): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
fd284eb
test(dataset): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
921fb93
test(kv): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
a467fb7
test(daemon): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 15, 2026
9cc71dc
test(substrate): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 16, 2026
a17b16f
test(chat): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 16, 2026
f186942
test(memory): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 16, 2026
fa3baaf
test(probe): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 16, 2026
f87a291
test(openai): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 16, 2026
c429088
fix(probe): handle Write/Close errors in LineProtocolSink.add
Snider Jun 16, 2026
728623a
test(score): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 16, 2026
3f7da0d
test(autoround): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 16, 2026
6841b00
test(agent): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 16, 2026
dd89ad7
test(profile): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 16, 2026
f18eccc
test(bundle): v0.9.0 audit triplet+example compliance (move-first)
Snider Jun 16, 2026
6c245de
test(agent): unconditional persistence round-trip in SaveStateIndex Good
Snider Jun 16, 2026
c6bb9a3
test(memorypretrain): v0.9.0 audit triplet+example compliance (move-f…
Snider Jun 16, 2026
5f5ad3d
fix(coverage): measure the real metal_runtime engine, not the no-tag …
Snider Jun 16, 2026
f68aacb
chore(coverage): fold tagged coverage into Taskfile, drop coverage.sh
Snider Jun 16, 2026
d99da63
test(metal): cover session_pipelined helpers (no model load)
Snider Jun 16, 2026
9195595
test(metal): non-synthetic batch coverage on a real model (e2b)
Snider Jun 16, 2026
078a4c0
chore(cov): task cov runs model_eval so non-synthetic tests count
Snider Jun 16, 2026
7758a49
test(metal): non-synthetic prompt-cache store/match coverage (e2b)
Snider Jun 16, 2026
4983dd4
test(metal): non-synthetic session KV-snapshot lifecycle (e2b)
Snider Jun 16, 2026
c150d3d
test(metal): non-synthetic sampler-variant coverage (e2b)
Snider Jun 16, 2026
cd0f492
test(metal): non-synthetic generate entrypoints + direct path (e2b)
Snider Jun 16, 2026
d01fc1a
test(metal): isolate runtime gates in model_eval tests (fix cross-tes…
Snider Jun 16, 2026
ee47808
test(metal): non-synthetic vision-chat image input (e2b)
Snider Jun 16, 2026
d0f2e13
test(metal): non-synthetic tokenizer encode/decode variants (real e2b…
Snider Jun 16, 2026
2f09733
fix(metal): track kv format v6 in KVSnapshotVersion — store-wake reje…
Snider Jun 16, 2026
f3299a3
test(metal): MTP greedy-exactness advisory, not gated; configurable p…
Snider Jun 16, 2026
0c3e6fc
test(cli): AX-10 mlx CLI-test foundation — isolated `generate` artifa…
Snider Jun 16, 2026
6024aac
test(cli): AX-10 state artifact — isolated sleep→wake-from-store (v6 …
Snider Jun 16, 2026
7b98eec
test(cli): AX-10 serve artifact — backgrounded HTTP serve + real /v1/…
Snider Jun 16, 2026
2bf5e9c
test(cli): AX-10 mtp artifact — speculative pair engages + accepts
Snider Jun 16, 2026
4f95b70
chore(scripts): delete gemma4_prompt_contract.py — superseded, premis…
Snider Jun 16, 2026
3e9f388
chore(scripts): delete cross-engine workflow-bench cluster — supersed…
Snider Jun 16, 2026
9378081
test(cli): AX-10 discover artifact — machine/device report (no model,…
Snider Jun 16, 2026
1ab0ee3
test(cli): AX-10 help + ssd-recipes artifacts; fix(cli): help omitted…
Snider Jun 16, 2026
fef2b47
test(cli): AX-10 pack (happy) + ssd-eval/state-pack/slice/memory-pret…
Snider Jun 16, 2026
23593be
test(cli): AX-10 usage artifacts for sft/fuse/ssd/diffuse/audio
Snider Jun 16, 2026
a7bd4af
test(cli): AX-10 tune/vision/ebook happy-paths — full mlx CLI surface…
Snider Jun 16, 2026
a9e2736
perf(metal): decode replay cgo bridge + end-to-end record/replay test
Snider Jun 16, 2026
c47a301
perf(metal): prove update-input-then-replay — the decode-advance pattern
Snider Jun 16, 2026
77226dd
perf(gemma4): buffer-driven decode RoPE offset (mlx_fast_rope_dynamic)
Snider Jun 17, 2026
de7a51f
feat(native): pure-Go (no-cgo) Metal decode path — full layer + ICB r…
Snider Jun 17, 2026
a1c5beb
feat(native): full DecodeLayer ICB replay + per-layer encode-bypass m…
Snider Jun 17, 2026
36806b4
feat(native): single-submit per-token ICB harness — un-diluted encode…
Snider Jun 17, 2026
8f57175
feat(native): real decode step with growing KV cache (the cache-write…
Snider Jun 17, 2026
2eca58d
feat(native): multi-layer multi-token decode forward over a growing K…
Snider Jun 17, 2026
58fc8ee
feat(native): cache-grow ICB replay — encode-bypass over a growing KV…
Snider Jun 17, 2026
35b932e
test(native): E2B-scale encode-bypass measurement — savings hold, rat…
Snider Jun 17, 2026
14f6b42
perf(native): profile the cache-grow forward — it's GPU-bound, not ho…
Snider Jun 17, 2026
c7d4cb1
perf(native): confirm the 4-bit qmv lever — ~2x faster projections at…
Snider Jun 17, 2026
cd63ba8
feat(native): QMVBF16 — bf16-activation 4-bit quantised matvec (parit…
Snider Jun 17, 2026
449e94b
feat(native): encQMVBF16 chained helper — foundation for the projecti…
Snider Jun 17, 2026
bf85edd
feat(native): quantised DecodeForward — the 4-bit decode path, off mlx-c
Snider Jun 17, 2026
d45a784
test(native): de-risk qmv-in-ICB — affine_qmv_bfloat16_t replays as a…
Snider Jun 17, 2026
207786c
feat(native): quant-ICB — 4-bit + encode-bypass stacked, 206 tok/s @ …
Snider Jun 17, 2026
d02c164
feat(model): pkg/model part one — backend cross-section of the quant …
Snider Jun 17, 2026
4aad7b7
feat(model): register metal backend in the quant cross-section — both…
Snider Jun 17, 2026
729aa71
feat(model): declarative gemma4 arch (part three) — the cache-topolog…
Snider Jun 17, 2026
0a1ca48
feat(native): the executor (part three) — arch-driven forward honouri…
Snider Jun 17, 2026
c6b883d
feat(native): sliding-window attention in the executor
Snider Jun 17, 2026
1f2d23b
feat(native): MoE expert branch (part three) — top-k SwiGLU + weighte…
Snider Jun 17, 2026
9186b03
feat(native): MoE router (part three) — RMS-norm → expert-score gemv …
Snider Jun 17, 2026
dac9d38
feat(native): MoE dual-branch FFN composition (part three) — local ML…
Snider Jun 17, 2026
6807c74
feat(native): wire MoE into the arch executor — DecodeForwardArch rou…
Snider Jun 17, 2026
e62ab1b
feat(native): gemma4 per-layer-input gate (PerLayerInputGateBF16)
Snider Jun 17, 2026
9cea4be
feat(model/gemma4): dims-fill from config — Config + Config.Arch() bu…
Snider Jun 17, 2026
943c7c1
feat(native): 4-bit arch-forward (DecodeForwardArchQuant) + factor th…
Snider Jun 17, 2026
486d465
feat(native): arch-driven cache-grow ICB (DecodeForwardArchICB) — enc…
Snider Jun 17, 2026
0123596
feat(native): stacked quant+ICB arch-forward (DecodeForwardArchICBQua…
Snider Jun 17, 2026
53ad341
feat(model,native): the Backend seam — model.Backend interface + Nati…
Snider Jun 17, 2026
0773a64
feat(native): the decode bookends — EmbedTokensBF16 + LMHeadBF16 (tok…
Snider Jun 17, 2026
9550e0c
feat(model): backend-agnostic token sampler — Greedy + temperature/to…
Snider Jun 17, 2026
5278a86
feat(safetensors): pure-Go safetensors reader — the weight loader's f…
Snider Jun 17, 2026
0bc3d67
feat(native): gemma4 bf16 weight assembly — parsed safetensors → nati…
Snider Jun 17, 2026
dfd757c
feat(native): gemma4 norm reconciliation (part 1) — post-attention + …
Snider Jun 17, 2026
7fe2657
feat(native): gemma4 norm reconciliation (part 2) — per-head QK-norm;…
Snider Jun 17, 2026
d1a647c
feat(native): gemma4 norm reconciliation (part 3) — ICB-path norms; t…
Snider Jun 17, 2026
6d43c12
feat(native): the token loop — GenerateGemma4BF16 (embed → decode → l…
Snider Jun 17, 2026
10360d0
perf(native): incremental decode — persistent growing KV cache, O(1)/…
Snider Jun 17, 2026
df84a79
feat(native): persistent serving session — Gemma4Session, KV cache ac…
Snider Jun 17, 2026
acb304f
feat(native,safetensors): model-load pipe — config + weights bytes → …
Snider Jun 17, 2026
aaf6300
refactor(tokenizer): relocate the gemma4 tokenizer to a shared pkg/to…
Snider Jun 17, 2026
afc41cf
feat(native): text in/out — Gemma4Session.GenerateText (encode → gene…
Snider Jun 17, 2026
609d4d6
feat(native,gemma4): per-attention-type RoPE — sliding layers use the…
Snider Jun 17, 2026
6b70e49
feat(safetensors,native): sharded checkpoint loader — load a real gem…
Snider Jun 17, 2026
c0c970f
feat(native): 4-bit gemma4 layer assembler — quant projections from H…
Snider Jun 17, 2026
e0fba87
feat(native): quant decode bookends — EmbedTokensQuant + LMHeadQuant …
Snider Jun 17, 2026
8360692
feat(native,gemma4): load a 4-bit gemma4 directory into a session (4-…
Snider Jun 17, 2026
697f485
feat(native,gemma4): partial-rotary RoPE op + config (partial rotary …
Snider Jun 17, 2026
2141144
feat(native): wire partial rotary into the decode per attention type …
Snider Jun 17, 2026
c3c72bb
feat(native): handle the gemma4_unified language_model.* name prefix
Snider Jun 17, 2026
91143b2
feat(native): apply gemma4 per-layer output scalar (layer_scalar)
Snider Jun 17, 2026
f756260
feat(gemma4): proportional RoPE — fold the headDim normalisation into…
Snider Jun 17, 2026
7211dc0
feat(native): per-layer-input tensor pipeline — PerLayerInputs (E2B/E…
Snider Jun 17, 2026
5a9f374
feat(native): per-layer-input gate, quant + wired into the decode tai…
Snider Jun 17, 2026
65e37f7
feat(native): assemble + thread the per-layer-input tower — E2B/E4B l…
Snider Jun 17, 2026
6fc32e3
fix(gemma4): resolve the multimodal text_config wrapper — real config…
Snider Jun 17, 2026
9a692de
feat(native): one-call dir→text — LoadGemma4Dir + GenerateTextFromDir
Snider Jun 17, 2026
f43be6a
feat(native): 4-bit batched MoE experts — MoEExpertsQuant (26B-A4B pa…
Snider Jun 17, 2026
5536e50
feat(native): 4-bit MoE router — MoERouterQuant (26B-A4B part 2)
Snider Jun 17, 2026
4dae54e
feat(native): 4-bit dual-branch MoE block — MoEBlockQuant (26B-A4B pa…
Snider Jun 17, 2026
5bb185b
feat(gemma4): parse mixed-precision quant overrides — QuantConfig.For…
Snider Jun 17, 2026
a7b1bcf
feat(native,gemma4): load the mixed-precision MoE — 26B-A4B loads end…
Snider Jun 17, 2026
895730c
feat(mistral): config parser → backend-agnostic Arch (Ministral-3 par…
Snider Jun 17, 2026
f2f473a
feat(native): assemble + load Ministral-3 bf16 → it runs no-cgo (Mini…
Snider Jun 17, 2026
192464f
feat(deltanet): gated delta-rule recurrence — the Qwen3.5/3.6 linear …
Snider Jun 17, 2026
e4dbce9
feat(qwen3): gated-delta decay + β resolution — the discretisation fe…
Snider Jun 17, 2026
70aa518
refactor(flakernel): extract the shared gated-SSM helpers — mamba2 + …
Snider Jun 18, 2026
9b7021a
feat(qwen3): the GatedDeltaNet mixer forward — Qwen3.5/3.6 linear att…
Snider Jun 18, 2026
663f332
feat(qwen3): register the linear_attention mixer loader — composed mo…
Snider Jun 18, 2026
8cd5766
feat(qwen3,metal): route qwen3_6 → the composed hybrid + nested-rope/…
Snider Jun 18, 2026
feff7e4
fix(metal): rename DenseRopeParams — avoid the gemma4 RopeParams coll…
Snider Jun 18, 2026
1ec0dc5
feat(gemma4): whole-stack compiled decode (gated, MLX_COMPILED_STACK)
Snider Jun 18, 2026
59631a8
perf(gemma4): env-gated decode-replay + diffusion eval-split probes (…
Snider Jun 18, 2026
3303d56
style(mamba2): gofmt chunk.go + scan_test.go
Snider Jun 18, 2026
f274ca2
feat(mistral): YaRN inverse-frequency computation — the long-context …
Snider Jun 18, 2026
1ad3c53
feat(mistral,gemma4): carry YaRN freqs on the Arch — config resolves …
Snider Jun 18, 2026
e8b0707
feat(native): freqs-aware RoPE op — explicit per-dim frequencies for …
Snider Jun 18, 2026
3bb10ec
feat(native): thread YaRN freqs-rope through the decode executor — Mi…
Snider Jun 18, 2026
86024c9
feat(model): the token-loop contract — embed/lm-head bookends + Gener…
Snider Jun 18, 2026
e5ad307
feat(native): NativeTokenModel — the no-cgo backend satisfies the tok…
Snider Jun 18, 2026
ed00457
feat(native): quant NativeTokenModel — the 4-bit path satisfies the t…
Snider Jun 18, 2026
c7e0e30
feat(model,native): incremental decode on the token-loop contract — O…
Snider Jun 18, 2026
0958dc5
feat(metal/gemma4): cgo backend satisfies the token-loop contract — a…
Snider Jun 18, 2026
104af1e
feat(model,metal): DecodeStepper.Close lifecycle hook → metal Session…
Snider Jun 18, 2026
586ac6e
feat(model,native): E2B/E4B per-layer-input models decode through the…
Snider Jun 18, 2026
18a03a0
feat(mlx,native): serve the no-cgo token-loop contract — `serve --nat…
Snider Jun 18, 2026
0d88d8d
feat(model/gemma4): per-attention-type head_dim in the declaration (p…
Snider Jun 18, 2026
79ce721
feat(native,model): native consumes per-layer head_dim + the model-de…
Snider Jun 18, 2026
af590ea
feat(model/gemma4): the compute seam for the one gemma4 decode (port …
Snider Jun 18, 2026
bee3404
feat(native): implement the gemma4 compute seam (Decoder) over the pr…
Snider Jun 18, 2026
f3975b9
feat(native): make the gemma4 decode faithful to metal — value-norm +…
Snider Jun 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
15 changes: 2 additions & 13 deletions .codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,11 @@ coverage:
status:
project:
default:
target: 80%
threshold: 1%
target: 98%
threshold: 8%
patch:
default:
target: 70%

ignore:
# Hardware/native runtime paths need a separate Metal-backed integration gate.
- "go/*_darwin.go"
- "go/register_metal.go"
- "go/internal/metal/**"

# Adapter shells and sidecars are tested, but not part of the core library gate.
- "go/training.go"
- "go/mlxlm/**"
- "go/pkg/daemon/**"
- "go/pkg/memvid/cli/**"
- "go/cmd/**"
- "go/tests/**"
12 changes: 0 additions & 12 deletions .forgejo/workflows/security-scan.yml

This file was deleted.

27 changes: 0 additions & 27 deletions .forgejo/workflows/test.yml

This file was deleted.

20 changes: 17 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
# Build artifacts
build/
bin/
*.dylib
*.so
*.a

# `go build ./go/cmd/mlx/` without -o lands the binary at repo root.
# Convention is `go build -o bin/mlx` (bin/ already ignored above);
# this catches the shortcut form too.
/mlx

# CMake
CMakeCache.txt
CMakeFiles/
cmake_install.cmake
Makefile

# CMake install output (keep headers for Go module consumers)
dist/*
!dist/include/
# CMake install output
dist/

# Local Go build/test shortcuts
/go/mlx
/*.test

# IDE
.idea/
Expand All @@ -22,6 +31,11 @@ dist/*
# macOS
.DS_Store

# lthn/desktop frontend dist — copied at build time by
# scripts/make-app-bundle.sh, embedded in cmd/mlx via go:embed.
# Single source of truth lives in lthn/desktop/frontend/.
go/cmd/mlx/frontend/dist/

# Knowledge base
KB/
.core/
Expand Down
12 changes: 12 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,15 @@
path = external/go-io
url = https://github.com/dappcore/go-io.git
branch = dev
[submodule "external/go-ai"]
path = external/go-ai
url = https://github.com/dappcore/go-ai.git
branch = dev
[submodule "external/go-ml"]
path = external/go-ml
url = https://github.com/dappcore/go-ml.git
branch = dev
[submodule "external/go-cgo"]
path = external/go-cgo
url = https://github.com/dappcore/go-cgo.git
branch = dev
20 changes: 15 additions & 5 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ All Go code lives under `go/`:
`nomlxlm` removes it)
- `go/cmd/violet/` and `go/pkg/daemon/` — local Violet Unix-socket sidecar
- `cpp/` — C++ side companion (CLion-side worktree)
- `lib/mlx/` — upstream MLX submodule pinned at `v0.30.1`
- `lib/mlx/` — upstream MLX submodule pinned at `v0.31.1`
- `patches/` — local patches against `lib/mlx` (manual apply only)
- `docs/`, `examples/` — markdown documentation and per-feature usage examples

Expand All @@ -25,6 +25,15 @@ Unsupported builds compile against the `*_stub.go` files and a stub
`MetalAvailable() bool` that returns false. Do not move CGO code out of
`go/internal/metal/`.

The native path targets [macOS Tahoe 26.0+](https://developer.apple.com/documentation/macos-release-notes/macos-26-release-notes)
on Apple Silicon. The floor is intentional: the Metal 4 API generation this
runner is built around shipped with macOS 26, including lower-overhead command
encoding, explicit compilation control, tensor resources, and machine-learning
passes. Keep build and test invocations aligned with that floor by passing
`-ldflags "-extldflags=-mmacosx-version-min=26.0"` when compiling native code.
See `docs/operator/deployment.md` and `docs/operator/metallib-and-variants.md`
for the full reference chain.

## Conventions

- UK English in code, comments, and docs (colour, organisation, behaviour)
Expand All @@ -47,10 +56,11 @@ model downloads.

## Sandboxing Notes

Before handing off, run the repository gates from the brief with `GOWORK=off`.
On sandboxed systems, set `GOCACHE` to a writable directory such as
`/tmp/codex-go-mlx-cache` so Go can compile without touching the user
cache. If the sandbox cannot resolve the bundled `mlx.metallib`, apply
Before handing off, run the repository gates from the checked-in workspace; do
not use `GOWORK=off` unless the user explicitly asks for an isolated module
check. On sandboxed systems, set `GOCACHE` to a writable directory such as
`/tmp/codex-go-mlx-cache` so Go can compile without touching the user cache.
If the sandbox cannot resolve the bundled `mlx.metallib`, apply
`patches/mlx-metallib-path.patch` inside `lib/mlx` to enable the
`MLX_METALLIB_PATH` env-var override (not auto-applied).

Expand Down
7 changes: 4 additions & 3 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,18 @@ After Mantis #1241, all Go code lives under `go/`:
```
go/ Go module root (dappco.re/go/mlx)
*.go Public root API: model, tokenizer, compute, training, eval, distill, GRPO, hf-fit, merge, gguf-quantize, kv-snapshot, lora-fuse
cmd/mlx/ CLI tool (built with `-o core-mlx`; consumers rename: lthn-mlx)
cmd/violet/ Unix-socket sidecar daemon
internal/metal/ All CGO code (mlx-c bindings)
mlxlm/ CGO-free Python subprocess backend
pkg/daemon/ Daemon implementation
pkg/memvid/ Memvid storage CLI
pkg/memvid/ Deprecated State codec compatibility shim
tests/ Integration tests
cpp/ C++ side (CLion-side companion)
docs/ Markdown documentation
examples/ Per-feature usage examples (markdown)
external/ Vendored core libraries
lib/mlx/ Upstream mlx submodule (pinned at v0.30.1)
lib/mlx/ Upstream mlx submodule (pinned at v0.31.1)
patches/ Local patches to lib/mlx (not auto-applied)
```

Expand Down Expand Up @@ -127,7 +128,7 @@ Architecture is detected from `config.json` (`model_type`) for safetensors and f

## Submodule Patches

`lib/mlx` is pinned at upstream tag `v0.30.1`. Local patches that we do not upstream live in `patches/` as standalone diff files (e.g. `patches/mlx-metallib-path.patch` for the `MLX_METALLIB_PATH` env-var override). Patches are not auto-applied — run them inside the submodule manually when their function is needed:
`lib/mlx` is pinned at upstream tag `v0.31.1`. Local patches that we do not upstream live in `patches/` as standalone diff files (e.g. `patches/mlx-metallib-path.patch` for the `MLX_METALLIB_PATH` env-var override). Patches are not auto-applied — run them inside the submodule manually when their function is needed:

```bash
git -C lib/mlx apply ../../patches/mlx-metallib-path.patch
Expand Down
119 changes: 119 additions & 0 deletions CLAUDE.operator.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# CLAUDE.operator.md

Operator-facing guidance for **running** `lthn-mlx` in production. Companion to `CLAUDE.md` (developer-facing — architecture, build, contribute). If you arrived here mid-session needing to deploy, troubleshoot, or reason about distribution, you're in the right doc. If you arrived needing to add a model decoder or change cgo bindings, go to `CLAUDE.md`.

The operator audience is a future Cladius / Athena / Hephaestus session, *or* a human operator (Snider, ops-side) doing a deploy. Same mental model serves both — the difference is just whether the reader can edit code on the spot.

## Read order

1. **This file**, skim through "Operating principles" — calibrates what the binary is and isn't.
2. **`docs/operator/deployment.md`** — what you ship, how it runs, what to bind to.
3. **`docs/operator/metallib-and-variants.md`** — the variant question, the bundling strategy, the active CWD-resolution panic.
4. **`docs/operator/troubleshooting.md`** — the failure modes in lifecycle order, with fixes.
5. **`docs/operator/index.md`** — the full operator doc set + what's planned.

If you have ~3 minutes, read this file. If you have ~30 minutes, read all five.

## What lthn-mlx is

A single-process boundary that wraps native Apple Metal GPU inference (via mlx-c CGO bindings) and serves it as OpenAI / Anthropic / Ollama-compatible HTTP. Snider's framing, made explicit on 2026-05-25:

> **"The actual model is the binary, the rest is package."**

This is the load-bearing architecture decision. Everything that wants inference — `lthn` desktop, `pkg/lemma` in lthn/desktop, providers in `go-ai`, any OpenAI-compatible Python / TypeScript / curl client — talks to `lthn-mlx` over HTTP. There is no in-process library substitute for production. The binary is the boundary.

**One process. One model. One HTTP listener.** That's the unit. Multi-model deployments mean multiple processes on different ports plus a router in front (the `pkg/lemma` client is the canonical Go-side router).

The binary is built from `dappco.re/go/mlx/cmd/mlx`, default output name `core-mlx`, consumers rename to `lthn-mlx`. Module path is `dappco.re/go/mlx`.

## Operating principles

These are the load-bearing facts an operator needs in working memory. Each one shapes a deployment decision.

### 1. Apple Silicon only

`darwin/arm64`. No Linux. No Intel macOS. The CGO files carry `//go:build darwin && arm64`; a stub returns `MetalAvailable() = false` everywhere else. M1 / M2 / M3 / M4, any chip class, any deployment macOS ≥13 — one binary serves them all (modulo the metallib variant matrix; see point 5).

If the deployment target isn't Apple Silicon, you don't want `lthn-mlx` — you want a different go-inference backend (`go-rocm` for AMD GPUs, or the CGO-free `mlxlm` subprocess backend bundled in the same repo for Python-on-anything).

### 2. The binary needs the metallib

`mlx.metallib` (~107 MB, MetalLib v1.2.9, the compiled GPU kernel archive) must be findable at runtime. Today, until the bundling work lands, this means **setting `MLX_METALLIB_PATH` to an absolute path** before invoking. Not setting it is the single most common deployment failure — the binary starts, `/v1/health` passes, then panics inside `mlx_metal_load_library` on the first GPU dispatch.

```bash
export MLX_METALLIB_PATH=/opt/lthn-mlx/lib/mlx.metallib
lthn-mlx serve --model /opt/lthn-mlx/models/lemer-lite --addr :11434
```

The permanent fix is Path B bundling (embed via `//go:embed`, load via `MTLDevice newLibraryWithData:`). Until that ships, treat the env var as mandatory deployment config. See `docs/operator/metallib-and-variants.md` for the why and `docs/operator/troubleshooting.md` for the panic signature.

### 3. Model loads lazily

`lthn-mlx serve` starts in under a second. The model loads on the **first request that needs it**, not at process start. This means:

- Liveness probes against `/v1/health` pass before the model is loaded. They are not readiness probes.
- The first inference request after start takes 2-15 seconds depending on model size and storage speed.
- For consistent first-request latency, pre-warm in the service manager's post-start hook with a one-token completion (see deployment.md).

There is no on-disk lock, no PID file, no recovery state. Restart is safe; the new process starts cold and lazy-loads. The service manager is responsible for single-instance enforcement.

### 4. HTTP surface is trusted-network only

`lthn-mlx serve` has no authentication, no rate limiting, no TLS. Default bind is `:11434` (matches Ollama). Bind to `127.0.0.1:11434` for same-machine, `0.0.0.0:11434` for LAN. **Production LAN exposure sits behind a reverse proxy** that handles auth and TLS (Caddy, nginx).

If you need authenticated remote access, that lives in `pkg/lemma` (the Go client) plus a tunnel / proxy / auth-gateway — not in `lthn-mlx` itself. Don't try to add auth to the serve binary; it would violate the boundary rule and duplicate work already done one layer up.

### 5. Variants matter at the toolchain axis, not the chip axis

Snider's question of 2026-05-25: "if the lib is different for different apple versions, we need to know the variants that need building." The chip family (M1/M2/M3/M4) is **not** a variant axis — Apple's Metal driver handles forward-compatibility from a single archive. What actually varies is the build-host toolchain: Metal language version ≥4.0 + macOS SDK ≥26.2 (Xcode 26+) unlocks the NAX kernel family for M4-class tensor coprocessors.

**Practical ship matrix:**

| Variant | Build host | Runs on | Use case |
|---------|------------|---------|----------|
| `mlx-baseline.metallib` | Any modern Xcode, deployment-min 13 | M1-M4 on macOS 13+ | Default ship today |
| `mlx-nax.metallib` | Xcode 26+, deployment-min 26 | M4-class on macOS 26+ only | Deferred to M4 optimisation lane |

Ship the baseline. The NAX variant is a future M4 fast-path optimisation, not a today-decision. Full evidence and the open questions (driver-side load behaviour for higher `min`, NAX dispatch gating on non-M4) in `docs/operator/metallib-and-variants.md`.

### 6. Unified memory is the budget

On Apple Silicon there is no separate VRAM line item — the GPU and CPU share unified memory. The process budget includes: model weights, KV cache (scales linearly with `--context`), MLX allocator cache, plus everything else macOS is doing. A 7B model in 4-bit needs ~5 GB resident; the default 131k context can add several more.

Tuning knobs live in `dappco.re/go/mlx` at the package level (`SetMemoryLimit`, `SetCacheLimit`, `SetWiredLimit`, `ClearCache`, `GetActiveMemory`, `GetPeakMemory`). They are **not** exposed as `serve` flags today — if you need them on the bundled CLI, file a feature ticket against `cmd/mlx/serve.go`. For now, custom integrations on top of `openai.NewMuxWithAdmin` can wire them directly.

Activity Monitor's "Memory" column is the right place to watch the process. `/v1/cache/stats` reports MLX's allocator view.

### 7. Graceful shutdown is signal-driven

SIGINT and SIGTERM both trigger `http.Server.Shutdown` with `--shutdown-timeout` (default 10s) as the drain deadline. After the deadline, the process exits. There is no explicit model-unload step — the OS reclaims Metal allocations on exit.

If you have long-running generations and need them to drain cleanly on bounce, raise `--shutdown-timeout` (30s-60s). If you need explicit teardown for an exotic daemon scenario, wire the `Sleep` admin callback in a custom integration.

## Mental model in one paragraph

`lthn-mlx serve` is a stateless OpenAI-compatible HTTP server backed by Apple Metal GPU inference, single-model per process, lazy-load on first request, signal-driven graceful shutdown, requires a findable `mlx.metallib` (env var until bundling lands), no built-in auth or TLS, designed for trusted-network use, with a `pkg/lemma`-shaped routing layer one level up for multi-model or remote-access patterns. The architecture insists on the binary as the only process boundary — everything else is packages talking to it over HTTP.

That paragraph plus the seven principles is the working mental model. Everything else in `docs/operator/` fills in the operator's view of specific concerns.

## What this doc does not cover

- **How the inference works inside.** That's `docs/architecture.md`, `docs/runtime/`, `docs/memory/`. Developer-side.
- **How to add a model architecture.** That's a decoder under `go/internal/metal/`. Developer-side.
- **How training works.** That's `docs/training.md`, `docs/distillation.md`, `docs/grpo.md`. Production-bench / research-side.
- **GOAL.md production-bench lane.** Separate concern with its own canonical brief.
- **Memory limits & cache tuning as a knob set.** Stubbed in `docs/operator/performance-tuning.md` — not yet written. Source of truth meanwhile: `go/internal/metal/backend.go:10-12` and the `mlx.Set*` package surface.

## When the docs and reality disagree

This doc and `docs/operator/*` describe behaviour. Behaviour changes. If you find a discrepancy between what `lthn-mlx serve` actually does and what these docs claim, **the code is right and the docs are wrong**. Fix the doc, or PR a comment-block on the responsible source file referencing this directory.

The maintenance discipline lives in `docs/operator/index.md` under "Maintenance discipline." Read it if you're about to merge a PR that touches `cmd/mlx/serve.go`, `go/openai/openai.go`, `go/openai/admin.go`, or `go/internal/metal/backend.go` — those four files are the operator-visible surface.

## Files this directory ships

- `CLAUDE.operator.md` (this file) — operator mental model
- `docs/operator/index.md` — operator doc index + planned slots
- `docs/operator/deployment.md` — what you ship + how it runs
- `docs/operator/metallib-and-variants.md` — bundling strategy + variant matrix
- `docs/operator/troubleshooting.md` — lifecycle-phase failure modes
10 changes: 8 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ cmake_minimum_required(VERSION 3.24)
project(mlx)

set(CMAKE_OSX_DEPLOYMENT_TARGET "26.0" CACHE STRING "Minimum macOS version")
set(CMAKE_CXX_STANDARD 23)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS ON)

include(${CMAKE_CURRENT_LIST_DIR}/cmake/CompilerCache.cmake)

if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_SOURCE_DIR}/dist" CACHE PATH "" FORCE)
Expand All @@ -11,13 +16,14 @@ endif()
set(MLX_BUILD_GGUF ON CACHE BOOL "" FORCE)
set(MLX_BUILD_SAFETENSORS ON CACHE BOOL "" FORCE)
set(MLX_C_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE)
set(BUILD_SHARED_LIBS OFF CACHE BOOL "" FORCE)

set(CMAKE_INSTALL_RPATH "@loader_path")

include(FetchContent)

set(MLX_C_GIT_TAG "v0.4.1" CACHE STRING "")
set(MLX_C_GIT_TAG "fba4470" CACHE STRING "") # mlx-c main: bindings regenerated for MLX 0.31.2 (v0.6.0 predates the 0.31.2 FFT API)
set(FETCHCONTENT_SOURCE_DIR_MLX "${CMAKE_CURRENT_SOURCE_DIR}/lib/mlx" CACHE PATH "Local patched MLX source")

FetchContent_Declare(
mlx-c
Expand Down
Loading