From ef7fdfa2f24cda195e6ada58796fe4966f98c5ad Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Thu, 14 May 2026 15:39:18 -0500
Subject: [PATCH 01/15] feat: add opt-in AI-based PII detection via OpenAI
 Privacy Filter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add AI-powered PII/secret detection using OpenAI's Privacy Filter model
(openai/privacy-filter) running locally via Transformers.js. This is fully
opt-in — existing behavior is unchanged unless users explicitly enable the
ai section in their config.

Key changes:
- New src/ai-detect.js: Transformers.js token-classification pipeline with
  lazy model loading, q4 quantization (~400MB RAM), and silent fallback
- Updated engine.js: extracted applySpans helper, added async redactTextWithAI
  that merges regex + AI spans with overlap resolution
- Updated config.js: new normalizeAiConfig with model/dtype/device/categories
- Updated index.js: uses AI path when opted in, graceful degradation
- Updated patterns.js: 10 new builtins (phone_us, phone_intl, ssn,
  credit_card, openai_key, github_token, aws_access_key, vault_token,
  private_key_header, generic_bearer)
- @huggingface/transformers as optionalDependency — no install required
  for regex-only usage
- 18 passing tests including AI fallback and new pattern coverage
---
 package.json                  |   7 +-
 src/ai-detect.js              | 176 ++++++++++++++++++++++++++++++
 src/config.js                 |  24 +++++
 src/engine.js                 |  56 ++++++++--
 src/engine.test.js            | 198 ++++++++++++++++++++++++++++++++++
 src/index.js                  |  80 ++++++++++----
 src/patterns.js               |  93 +++++++++++++++-
 vibeguard.config.json.example |   8 ++
 8 files changed, 608 insertions(+), 34 deletions(-)
 create mode 100644 src/ai-detect.js
 create mode 100644 src/engine.test.js

diff --git a/package.json b/package.json
index 8098fba..b88df48 100644
--- a/package.json
+++ b/package.json
@@ -14,6 +14,9 @@
     "test": "node --test",
     "prepack": "npm test"
   },
+  "optionalDependencies": {
+    "@huggingface/transformers": "^4.2.0"
+  },
   "keywords": [
     "opencode",
     "opencode-plugin",
@@ -21,7 +24,9 @@
     "privacy",
     "redaction",
     "secrets",
-    "pii"
+    "pii",
+    "privacy-filter",
+    "ai"
   ],
   "repository": {
     "type": "git",
diff --git a/src/ai-detect.js b/src/ai-detect.js
new file mode 100644
index 0000000..04ce37a
--- /dev/null
+++ b/src/ai-detect.js
@@ -0,0 +1,176 @@
+/**
+ * AI-based PII/secret detection using OpenAI's Privacy Filter model
+ * via Transformers.js (runs locally, no external API calls).
+ *
+ * This module is opt-in: it only activates when `ai.enabled` is set in config
+ * AND `@huggingface/transformers` is installed.
+ *
+ * The model (~400MB q4 quantized) is downloaded on first use and cached locally.
+ */
+
+/** Map Privacy Filter entity labels to vibeguard categories. */
+const LABEL_TO_CATEGORY = {
+  private_person: "PRIVATE_PERSON",
+  private_address: "PRIVATE_ADDRESS",
+  private_email: "PRIVATE_EMAIL",
+  private_phone: "PRIVATE_PHONE",
+  private_url: "PRIVATE_URL",
+  private_date: "PRIVATE_DATE",
+  account_number: "ACCOUNT_NUMBER",
+  secret: "SECRET",
+}
+
+let _pipeline = null
+let _loading = null
+let _transformersModule = undefined // undefined = not yet checked
+
+/**
+ * Attempt to import @huggingface/transformers.
+ * Returns the module or null if not installed.
+ */
+async function loadTransformers() {
+  if (_transformersModule !== undefined) return _transformersModule
+  try {
+    _transformersModule = await import("@huggingface/transformers")
+    return _transformersModule
+  } catch {
+    _transformersModule = null
+    return null
+  }
+}
+
+/**
+ * Initialize the Privacy Filter pipeline. Lazy-loads on first call.
+ * Returns the pipeline instance or null if unavailable.
+ */
+async function getPipeline(aiConfig, debug) {
+  if (_pipeline) return _pipeline
+  if (_loading) return _loading
+
+  _loading = (async () => {
+    const transformers = await loadTransformers()
+    if (!transformers) {
+      if (debug) {
+        console.log(
+          "[vibeguard] AI detection unavailable: @huggingface/transformers not installed. " +
+            "Install with: npm i @huggingface/transformers"
+        )
+      }
+      return null
+    }
+
+    const model = aiConfig.model || "openai/privacy-filter"
+    const dtype = aiConfig.dtype || "q4"
+    const device = aiConfig.device || "cpu"
+
+    if (debug) {
+      console.log(`[vibeguard] Loading AI model: ${model} (dtype=${dtype}, device=${device})`)
+    }
+
+    try {
+      _pipeline = await transformers.pipeline("token-classification", model, {
+        dtype,
+        device,
+      })
+      if (debug) {
+        console.log("[vibeguard] AI model loaded successfully")
+      }
+      return _pipeline
+    } catch (err) {
+      if (debug || !aiConfig.silentFallback) {
+        console.log(`[vibeguard] Failed to load AI model: ${err.message}`)
+      }
+      _pipeline = null
+      return null
+    }
+  })()
+
+  const result = await _loading
+  _loading = null
+  return result
+}
+
+/**
+ * Detect PII/secrets in text using the Privacy Filter model.
+ *
+ * @param {string} text - Input text to scan
+ * @param {object} aiConfig - AI configuration from vibeguard config
+ * @param {boolean} debug - Enable debug logging
+ * @returns {Promise<Array<{ start: number, end: number, original: string, category: string }>>}
+ */
+export async function detectWithAI(text, aiConfig, debug) {
+  if (!text || typeof text !== "string" || text.length === 0) return []
+
+  const pipe = await getPipeline(aiConfig, debug)
+  if (!pipe) return []
+
+  try {
+    // Run token classification with entity aggregation
+    const entities = await pipe(text, { aggregation_strategy: "simple" })
+    if (!Array.isArray(entities) || entities.length === 0) return []
+
+    const allowedCategories =
+      Array.isArray(aiConfig.categories) && aiConfig.categories.length > 0
+        ? new Set(aiConfig.categories.map((c) => c.toLowerCase()))
+        : null
+
+    const spans = []
+
+    for (const entity of entities) {
+      if (!entity || typeof entity !== "object") continue
+
+      // entity_group is the label without B-/I- prefix (from aggregation)
+      const rawLabel = String(entity.entity_group ?? entity.entity ?? "").toLowerCase()
+      // Strip B-/I- prefix if aggregation didn't remove it
+      const label = rawLabel.replace(/^[bi]-/, "")
+
+      if (!label || label === "o") continue
+      if (allowedCategories && !allowedCategories.has(label)) continue
+
+      const start = Number(entity.start)
+      const end = Number(entity.end)
+      if (!Number.isFinite(start) || !Number.isFinite(end)) continue
+      if (start < 0 || end <= start || end > text.length) continue
+
+      const original = text.slice(start, end)
+      const category = LABEL_TO_CATEGORY[label] ?? label.toUpperCase()
+
+      spans.push({ start, end, original, category })
+    }
+
+    if (debug && spans.length > 0) {
+      console.log(
+        `[vibeguard] AI detected ${spans.length} span(s): ${spans.map((s) => s.category).join(", ")}`
+      )
+    }
+
+    return spans
+  } catch (err) {
+    if (!aiConfig.silentFallback || debug) {
+      console.log(`[vibeguard] AI inference error: ${err.message}, falling back to regex-only`)
+    }
+    return []
+  }
+}
+
+/**
+ * Dispose the loaded model pipeline to free memory.
+ */
+export async function disposeAI() {
+  if (_pipeline) {
+    try {
+      if (typeof _pipeline.dispose === "function") await _pipeline.dispose()
+    } catch {
+      /* ignore */
+    }
+    _pipeline = null
+  }
+}
+
+/**
+ * Check if AI detection is available (transformers package installed).
+ */
+export async function isAIAvailable() {
+  const transformers = await loadTransformers()
+  return transformers !== null
+}
diff --git a/src/config.js b/src/config.js
index a8c1552..55423ae 100644
--- a/src/config.js
+++ b/src/config.js
@@ -30,6 +30,27 @@ function readJson(filepath) {
     .catch(() => null)
 }
 
+/**
+ * Normalize the opt-in AI detection config.
+ * All fields have safe defaults; AI is disabled unless explicitly enabled.
+ */
+function normalizeAiConfig(raw) {
+  const ai = raw && typeof raw === "object" ? raw : {}
+  return {
+    enabled: Boolean(ai.enabled),
+    // Model to use for token classification (default: openai/privacy-filter)
+    model: typeof ai.model === "string" ? ai.model : "openai/privacy-filter",
+    // Quantization dtype (default: q4 ~400MB; alternatives: fp32, fp16, q8)
+    dtype: typeof ai.dtype === "string" ? ai.dtype : "q4",
+    // Inference device (default: cpu; alternative: webgpu if available)
+    device: typeof ai.device === "string" ? ai.device : "cpu",
+    // Which Privacy Filter categories to use. Empty = all.
+    categories: Array.isArray(ai.categories) ? ai.categories.map((c) => String(c)) : [],
+    // If true, fall back silently to regex-only when AI is unavailable.
+    silentFallback: ai.silentFallback !== false,
+  }
+}
+
 function normalizeConfig(raw) {
   const cfg = raw && typeof raw === "object" ? raw : {}
 
@@ -44,6 +65,8 @@ function normalizeConfig(raw) {
 
   const patterns = cfg.patterns && typeof cfg.patterns === "object" ? cfg.patterns : {}
 
+  const ai = normalizeAiConfig(cfg.ai)
+
   return {
     enabled,
     debug,
@@ -51,6 +74,7 @@ function normalizeConfig(raw) {
     ttlMs,
     maxMappings,
     patterns,
+    ai,
   }
 }
 
diff --git a/src/engine.js b/src/engine.js
index be01d0b..7ee0213 100644
--- a/src/engine.js
+++ b/src/engine.js
@@ -1,3 +1,5 @@
+import { detectWithAI } from "./ai-detect.js"
+
 function subtractCovered(start, end, covered) {
   if (start >= end) return []
   const out = []
@@ -42,16 +44,10 @@ function insertCovered(covered, span) {
 }
 
 /**
- * 对输入文本进行脱敏替换，返回替换后的文本与命中信息。
- * 设计与 VibeGuard 的 redact 引擎一致：处理重叠命中，确保不会把占位符切碎。
- * @param {string} input
- * @param {{ keywords: Array<{value:string,category:string}>, regex: Array<{pattern:string,flags:string,category:string}>, exclude: Set<string> }} patterns
- * @param {{ getOrCreatePlaceholder(original: string, category: string): string }} session
+ * Collect regex/keyword spans from the given text (synchronous, fast).
+ * Shared by both redactText and redactTextWithAI.
  */
-export function redactText(input, patterns, session) {
-  const text = String(input ?? "")
-  if (!text) return { text, matches: [] }
-
+function findRegexSpans(text, patterns) {
   const found = []
 
   for (const rule of patterns.keywords) {
@@ -85,9 +81,17 @@ export function redactText(input, patterns, session) {
     }
   }
 
+  return found
+}
+
+/**
+ * Given a set of found spans, resolve overlaps and apply placeholder replacements.
+ * Shared by both sync and async redaction paths.
+ */
+function applySpans(text, found, session) {
   if (found.length === 0) return { text, matches: [] }
 
-  // 右侧优先；同起点优先更长，便于把左侧大范围命中拆掉
+  // Right-first; same start -> prefer longer span
   found.sort((a, b) => {
     if (a.start !== b.start) return b.start - a.start
     return b.end - a.end
@@ -121,3 +125,35 @@ export function redactText(input, patterns, session) {
   return { text: out, matches: planned }
 }
 
+/**
+ * Redact text using regex/keyword patterns only (synchronous, fast).
+ * Returns { text, matches }.
+ */
+export function redactText(input, patterns, session) {
+  const text = String(input ?? "")
+  if (!text) return { text, matches: [] }
+  const found = findRegexSpans(text, patterns)
+  return applySpans(text, found, session)
+}
+
+/**
+ * Redact text using both regex/keyword patterns AND the AI Privacy Filter.
+ * Async because the AI inference is async. The hook awaits this before
+ * proceeding, so redaction is guaranteed complete before the LLM sees the text.
+ */
+export async function redactTextWithAI(input, patterns, session, aiConfig, debug) {
+  const text = String(input ?? "")
+  if (!text) return { text, matches: [] }
+
+  // 1. Regex/keyword detection (fast, synchronous)
+  const found = findRegexSpans(text, patterns)
+
+  // 2. AI-based detection (async, local model inference)
+  const aiSpans = await detectWithAI(text, aiConfig, debug)
+  for (const span of aiSpans) {
+    if (patterns.exclude.has(span.original)) continue
+    found.push(span)
+  }
+
+  return applySpans(text, found, session)
+}
diff --git a/src/engine.test.js b/src/engine.test.js
new file mode 100644
index 0000000..64ae332
--- /dev/null
+++ b/src/engine.test.js
@@ -0,0 +1,198 @@
+import { describe, it } from "node:test"
+import assert from "node:assert/strict"
+import { redactText, redactTextWithAI } from "./engine.js"
+import { buildPatternSet } from "./patterns.js"
+import { PlaceholderSession } from "./session.js"
+import { restoreText } from "./restore.js"
+
+describe("redactText (regex-only)", () => {
+  it("redacts email addresses", () => {
+    const patterns = buildPatternSet({ builtin: ["email"], exclude: ["example.com"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const result = redactText("Contact me at alice@corp.io for details", patterns, session)
+    assert.ok(!result.text.includes("alice@corp.io"), "email should be redacted")
+    assert.ok(result.text.includes("__VG_EMAIL_"), "should contain placeholder")
+    assert.equal(result.matches.length, 1)
+    assert.equal(result.matches[0].category, "EMAIL")
+  })
+
+  it("redacts keyword values", () => {
+    const patterns = buildPatternSet({
+      keywords: [{ value: "sk-abc123secret", category: "API_KEY" }],
+    })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const result = redactText("Key is sk-abc123secret here", patterns, session)
+    assert.ok(!result.text.includes("sk-abc123secret"))
+    assert.ok(result.text.includes("__VG_API_KEY_"))
+  })
+
+  it("respects exclude list", () => {
+    const patterns = buildPatternSet({
+      builtin: ["email"],
+      exclude: ["test@example.com"],
+    })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const result = redactText("Email: test@example.com", patterns, session)
+    assert.ok(result.text.includes("test@example.com"), "excluded email should remain")
+  })
+
+  it("handles overlapping spans correctly", () => {
+    const patterns = buildPatternSet({
+      keywords: [
+        { value: "secret-key-abc", category: "KEY" },
+        { value: "key-abc", category: "KEY_PART" },
+      ],
+    })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const result = redactText("Found secret-key-abc here", patterns, session)
+    assert.ok(!result.text.includes("secret-key-abc"))
+    assert.ok(!result.text.includes("key-abc"))
+  })
+
+  it("returns unchanged text when nothing matches", () => {
+    const patterns = buildPatternSet({ builtin: ["email"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const result = redactText("Nothing sensitive here", patterns, session)
+    assert.equal(result.text, "Nothing sensitive here")
+    assert.equal(result.matches.length, 0)
+  })
+})
+
+describe("new builtin patterns", () => {
+  it("redacts US phone numbers", () => {
+    const patterns = buildPatternSet({ builtin: ["phone_us"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const result = redactText("Call me at (555) 123-4567 please", patterns, session)
+    assert.ok(!result.text.includes("(555) 123-4567"))
+    assert.ok(result.text.includes("__VG_PHONE_US_"))
+  })
+
+  it("redacts SSNs", () => {
+    const patterns = buildPatternSet({ builtin: ["ssn"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const result = redactText("SSN: 123-45-6789", patterns, session)
+    assert.ok(!result.text.includes("123-45-6789"))
+    assert.ok(result.text.includes("__VG_SSN_"))
+  })
+
+  it("redacts credit card numbers", () => {
+    const patterns = buildPatternSet({ builtin: ["credit_card"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const result = redactText("Card: 4111-1111-1111-1111", patterns, session)
+    assert.ok(!result.text.includes("4111-1111-1111-1111"))
+    assert.ok(result.text.includes("__VG_CREDIT_CARD_"))
+  })
+
+  it("redacts private key headers", () => {
+    const patterns = buildPatternSet({ builtin: ["private_key_header"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const result = redactText("-----BEGIN RSA PRIVATE KEY-----\nfoo", patterns, session)
+    assert.ok(!result.text.includes("-----BEGIN RSA PRIVATE KEY-----"))
+    assert.ok(result.text.includes("__VG_PRIVATE_KEY_"))
+  })
+
+  it("redacts AWS access keys", () => {
+    const patterns = buildPatternSet({ builtin: ["aws_access_key"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const result = redactText("key: AKIAIOSFODNN7EXAMPLE", patterns, session)
+    assert.ok(!result.text.includes("AKIAIOSFODNN7EXAMPLE"))
+    assert.ok(result.text.includes("__VG_AWS_ACCESS_KEY_"))
+  })
+
+  it("redacts bearer tokens", () => {
+    const patterns = buildPatternSet({ builtin: ["generic_bearer"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const result = redactText("Authorization: Bearer eyJhbGciOiJIUzI1NiJ9.abc.xyz", patterns, session)
+    assert.ok(!result.text.includes("eyJhbGciOiJIUzI1NiJ9"))
+    assert.ok(result.text.includes("__VG_BEARER_TOKEN_"))
+  })
+})
+
+describe("redactTextWithAI (graceful fallback)", () => {
+  it("falls back to regex-only when AI is unavailable", async () => {
+    const patterns = buildPatternSet({ builtin: ["email"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+
+    // AI enabled but transformers not installed in test env = silent fallback
+    const aiConfig = {
+      enabled: true,
+      model: "openai/privacy-filter",
+      dtype: "q4",
+      device: "cpu",
+      categories: [],
+      silentFallback: true,
+    }
+
+    const result = await redactTextWithAI(
+      "Email alice@corp.io from Alice Smith",
+      patterns,
+      session,
+      aiConfig,
+      false
+    )
+
+    // Email should be redacted by regex even when AI is unavailable
+    assert.ok(!result.text.includes("alice@corp.io"))
+    assert.ok(result.text.includes("__VG_EMAIL_"))
+    // "Alice Smith" won't be redacted without AI, which is expected fallback
+    assert.ok(result.text.includes("Alice Smith"))
+  })
+
+  it("handles empty text", async () => {
+    const patterns = buildPatternSet({ builtin: ["email"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const aiConfig = { enabled: true, model: "openai/privacy-filter", dtype: "q4", device: "cpu", categories: [], silentFallback: true }
+
+    const result = await redactTextWithAI("", patterns, session, aiConfig, false)
+    assert.equal(result.text, "")
+    assert.equal(result.matches.length, 0)
+  })
+})
+
+describe("PlaceholderSession", () => {
+  it("produces stable placeholders for same input", () => {
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const ph1 = session.getOrCreatePlaceholder("secret123", "API_KEY")
+    const ph2 = session.getOrCreatePlaceholder("secret123", "API_KEY")
+    assert.equal(ph1, ph2)
+  })
+
+  it("supports bidirectional lookup", () => {
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const ph = session.getOrCreatePlaceholder("myvalue", "TEXT")
+    assert.equal(session.lookup(ph), "myvalue")
+    assert.equal(session.lookupReverse("myvalue"), ph)
+  })
+})
+
+describe("restoreText", () => {
+  it("restores placeholders to original values", () => {
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const ph = session.getOrCreatePlaceholder("alice@corp.io", "EMAIL")
+    const restored = restoreText(`Contact ${ph} for details`, session)
+    assert.equal(restored, "Contact alice@corp.io for details")
+  })
+
+  it("leaves unknown placeholders unchanged", () => {
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const text = "Some __VG_UNKNOWN_abcdef012345__ here"
+    const restored = restoreText(text, session)
+    assert.equal(restored, text)
+  })
+})
+
+describe("config normalizeAiConfig", async () => {
+  // Import config module to test normalization
+  const { loadConfig } = await import("./config.js")
+
+  it("defaults ai to disabled", async () => {
+    // loadConfig with a non-existent dir returns enabled=false
+    const cfg = await loadConfig("/nonexistent-dir-for-test-" + Date.now())
+    assert.equal(cfg.ai.enabled, false)
+    assert.equal(cfg.ai.model, "openai/privacy-filter")
+    assert.equal(cfg.ai.dtype, "q4")
+    assert.equal(cfg.ai.device, "cpu")
+    assert.equal(cfg.ai.silentFallback, true)
+    assert.deepEqual(cfg.ai.categories, [])
+  })
+})
diff --git a/src/index.js b/src/index.js
index 9509927..0fd35f4 100644
--- a/src/index.js
+++ b/src/index.js
@@ -1,30 +1,59 @@
 import { loadConfig } from "./config.js"
 import { buildPatternSet } from "./patterns.js"
 import { PlaceholderSession } from "./session.js"
-import { redactText } from "./engine.js"
+import { redactText, redactTextWithAI } from "./engine.js"
 import { redactDeep, restoreDeep } from "./deep.js"
 import { restoreText } from "./restore.js"
+import { isAIAvailable } from "./ai-detect.js"
 
 /**
- * OpenCode 插件入口：
- * - `experimental.chat.messages.transform`：LLM 请求前对全部消息做脱敏（保证 provider 永远看不到真实值）
- * - `tool.execute.before`：工具执行前还原占位符（保证本机执行拿到真实值）
+ * OpenCode plugin entry point:
+ * - `experimental.chat.messages.transform`: redact all messages before sending to LLM
+ * - `tool.execute.before`: restore placeholders before local tool execution
+ * - `experimental.text.complete`: restore placeholders in completed model output
  *
- * 说明：为了降低误用风险，本插件在“找不到配置文件或 enabled=false”时为 no-op。
+ * AI detection is opt-in via the `ai` config section. When AI is enabled but
+ * @huggingface/transformers is not installed, falls back to regex/keyword only.
  */
 export const VibeGuardPrivacy = async (ctx) => {
   const config = await loadConfig(ctx.directory)
   const debug = Boolean(process.env.OPENCODE_VIBEGUARD_DEBUG) || Boolean(config.debug)
 
   if (debug) {
-    const from = config.loadedFrom ? config.loadedFrom : "未找到（插件将 no-op）"
-    console.log(`[opencode-vibeguard] 配置：${from} enabled=${config.enabled}`)
+    const from = config.loadedFrom ? config.loadedFrom : "not found (plugin will no-op)"
+    console.log(`[vibeguard] Config: ${from} enabled=${config.enabled}`)
   }
 
   if (!config.enabled) return {}
 
   const patterns = buildPatternSet(config.patterns)
   const sessions = new Map()
+  const aiConfig = config.ai
+  const useAI = aiConfig.enabled
+
+  // Check AI availability at startup (non-blocking info)
+  if (useAI) {
+    const available = await isAIAvailable()
+    if (available) {
+      console.log(
+        `[vibeguard] AI detection enabled (model: ${aiConfig.model}, dtype: ${aiConfig.dtype}). ` +
+          `Model will be downloaded on first use if not cached.`
+      )
+    } else {
+      console.log(
+        `[vibeguard] AI detection enabled in config but @huggingface/transformers is not installed. ` +
+          `Install with: npm i @huggingface/transformers\n` +
+          `Falling back to regex/keyword detection only.`
+      )
+    }
+  }
+
+  if (debug) {
+    console.log(`[vibeguard] AI detection: ${useAI ? "enabled" : "disabled (opt-in via config)"}`)
+    console.log(
+      `[vibeguard] Regex patterns: ${patterns.keywords.length} keywords, ${patterns.regex.length} regex rules`
+    )
+  }
 
   const getSession = (sessionID) => {
     const key = String(sessionID ?? "")
@@ -53,60 +82,71 @@ export const VibeGuardPrivacy = async (ctx) => {
 
       let changedTextParts = 0
 
+      // Choose redaction function based on AI config
+      const redactStr = useAI
+        ? async (text) => {
+            const result = await redactTextWithAI(text, patterns, session, aiConfig, debug)
+            return result.text
+          }
+        : (text) => {
+            return Promise.resolve(redactText(text, patterns, session).text)
+          }
+
       for (const msg of msgs) {
         const parts = Array.isArray(msg?.parts) ? msg.parts : []
         for (const part of parts) {
           if (!part) continue
 
-          // 普通文本（用户/助手）
+          // Plain text (user/assistant)
           if (part.type === "text") {
             if (part.ignored) continue
             if (!part.text || typeof part.text !== "string") continue
             const before = part.text
-            const after = redactText(before, patterns, session).text
+            const after = await redactStr(before)
             if (after !== before) changedTextParts++
             part.text = after
             continue
           }
 
-          // 推理文本（部分模型/配置会进入 prompt）
+          // Reasoning text
           if (part.type === "reasoning") {
             if (!part.text || typeof part.text !== "string") continue
             const before = part.text
-            const after = redactText(before, patterns, session).text
+            const after = await redactStr(before)
             if (after !== before) changedTextParts++
             part.text = after
             continue
           }
 
-          // 工具调用/输出：最常见的泄漏来源（例如读取 .env）
+          // Tool calls/outputs: most common leak source (e.g., reading .env)
           if (part.type === "tool") {
             const state = part.state
             if (!state || typeof state !== "object") continue
 
-            // 统一把工具输入也做深度脱敏：真实执行的 args 会包含明文（由 tool.execute.before 还原），
-            // 如果不在这里再脱敏一次，后续回合会把明文 args 带给 LLM。
+            // Deep-redact tool inputs (args) so they don't leak in later turns.
+            // Uses sync regex-only for deep object traversal; AI layer covers
+            // text parts and tool output strings.
             if (state.input && typeof state.input === "object") {
               redactDeep(state.input, patterns, session)
             }
 
             if (state.status === "completed" && typeof state.output === "string") {
               const before = state.output
-              const after = redactText(before, patterns, session).text
+              const after = await redactStr(before)
               if (after !== before) changedTextParts++
               state.output = after
               continue
             }
             if (state.status === "error" && typeof state.error === "string") {
               const before = state.error
-              const after = redactText(before, patterns, session).text
+              const after = await redactStr(before)
               if (after !== before) changedTextParts++
               state.error = after
               continue
             }
             if (state.status === "pending" && typeof state.raw === "string") {
               const before = state.raw
-              const after = redactText(before, patterns, session).text
+              const after = await redactStr(before)
               if (after !== before) changedTextParts++
               state.raw = after
               continue
@@ -116,7 +156,9 @@ export const VibeGuardPrivacy = async (ctx) => {
       }
 
       if (debug && changedTextParts > 0) {
-        console.log(`[opencode-vibeguard] 本次请求前脱敏：已修改 ${changedTextParts} 处文本片段`)
+        console.log(
+          `[vibeguard] Pre-request redaction: modified ${changedTextParts} text segment(s)`
+        )
       }
     },
 
@@ -130,7 +172,7 @@ export const VibeGuardPrivacy = async (ctx) => {
       const after = restoreText(before, session)
       output.text = after
       if (debug && after !== before) {
-        console.log("[opencode-vibeguard] 本次响应完成后还原：已修改 1 处文本片段")
+        console.log("[vibeguard] Post-response restore: modified 1 text segment")
       }
     },
 
diff --git a/src/patterns.js b/src/patterns.js
index 5238cac..437ddd4 100644
--- a/src/patterns.js
+++ b/src/patterns.js
@@ -34,8 +34,8 @@ function peelInlineFlags(pattern, flags) {
 }
 
 /**
- * 内置规则：从 VibeGuard 的 builtin 规则移植（做了 JS 兼容调整）。
- * 目标是“低配置成本 + 尽量覆盖”，不追求 100% 精准。
+ * Builtin detection rules (ported from VibeGuard with JS compatibility).
+ * Goal: low config cost + broad coverage, not 100% precision.
  */
 const BUILTIN = new Map([
   [
@@ -49,7 +49,6 @@ const BUILTIN = new Map([
   [
     "china_phone",
     {
-      // 直接匹配手机号本体（用 lookaround 替代 Go 里的捕获组边界保留写法）
       pattern: String.raw`(?<!\d)1[3-9]\d{9}(?!\d)`,
       flags: "",
       category: "CHINA_PHONE",
@@ -74,7 +73,6 @@ const BUILTIN = new Map([
   [
     "ipv4",
     {
-      // 不校验每段 0-255；目标是覆盖常见情况
       pattern: String.raw`(?:\d{1,3}\.){3}\d{1,3}`,
       flags: "",
       category: "IPV4",
@@ -88,6 +86,93 @@ const BUILTIN = new Map([
       category: "MAC",
     },
   ],
+  // --- Additional builtins ---
+  [
+    "phone_us",
+    {
+      // US phone: (555) 123-4567, 555-123-4567, +1-555-123-4567, etc.
+      pattern: String.raw`(?<!\d)(?:\+?1[-.\s]?)?(?:\(\d{3}\)|\d{3})[-.\s]?\d{3}[-.\s]?\d{4}(?!\d)`,
+      flags: "",
+      category: "PHONE_US",
+    },
+  ],
+  [
+    "phone_intl",
+    {
+      // International E.164-style: +44 20 7946 0958, +49-30-1234567, etc.
+      pattern: String.raw`(?<!\d)\+[1-9]\d{1,2}[-.\s]?\d[\d\-.\s]{6,14}\d(?!\d)`,
+      flags: "",
+      category: "PHONE_INTL",
+    },
+  ],
+  [
+    "ssn",
+    {
+      // US Social Security Number: 123-45-6789
+      pattern: String.raw`(?<!\d)\d{3}-\d{2}-\d{4}(?!\d)`,
+      flags: "",
+      category: "SSN",
+    },
+  ],
+  [
+    "credit_card",
+    {
+      // Major credit card patterns (Visa, MC, Amex, Discover) with optional separators
+      pattern: String.raw`(?<!\d)(?:4\d{3}|5[1-5]\d{2}|3[47]\d{2}|6(?:011|5\d{2}))[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}(?!\d)`,
+      flags: "",
+      category: "CREDIT_CARD",
+    },
+  ],
+  [
+    "openai_key",
+    {
+      pattern: String.raw`sk-[A-Za-z0-9]{20,}T3BlbkFJ[A-Za-z0-9]{20,}`,
+      flags: "",
+      category: "OPENAI_KEY",
+    },
+  ],
+  [
+    "github_token",
+    {
+      pattern: String.raw`(?:ghp|gho|ghu|ghs|ghr)_[A-Za-z0-9]{36,}`,
+      flags: "",
+      category: "GITHUB_TOKEN",
+    },
+  ],
+  [
+    "aws_access_key",
+    {
+      pattern: String.raw`AKIA[0-9A-Z]{16}`,
+      flags: "",
+      category: "AWS_ACCESS_KEY",
+    },
+  ],
+  [
+    "vault_token",
+    {
+      // HashiCorp Vault tokens: hvs.xxxxx or s.xxxxx
+      pattern: String.raw`(?:hvs|s)\.[A-Za-z0-9]{24,}`,
+      flags: "",
+      category: "VAULT_TOKEN",
+    },
+  ],
+  [
+    "private_key_header",
+    {
+      pattern: String.raw`-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----`,
+      flags: "",
+      category: "PRIVATE_KEY",
+    },
+  ],
+  [
+    "generic_bearer",
+    {
+      // Bearer tokens in Authorization headers
+      pattern: String.raw`Bearer\s+[A-Za-z0-9\-._~+/]+=*`,
+      flags: "i",
+      category: "BEARER_TOKEN",
+    },
+  ],
 ])
 
 export function buildPatternSet(patterns) {
diff --git a/vibeguard.config.json.example b/vibeguard.config.json.example
index 9884acc..e97c0cd 100644
--- a/vibeguard.config.json.example
+++ b/vibeguard.config.json.example
@@ -17,5 +17,13 @@
     ],
     "builtin": ["email", "china_phone", "china_id", "uuid", "ipv4", "mac"],
     "exclude": ["example.com", "localhost", "127.0.0.1", "0.0.0.0"]
+  },
+  "ai": {
+    "enabled": false,
+    "model": "openai/privacy-filter",
+    "dtype": "q4",
+    "device": "cpu",
+    "categories": [],
+    "silentFallback": true
   }
 }

From 64e606aee05dc6c96e99c4932202ca0acdff1807 Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Thu, 14 May 2026 16:06:06 -0500
Subject: [PATCH 02/15] =?UTF-8?q?fix:=20address=20code=20review=20findings?=
 =?UTF-8?q?=20=E2=80=94=20robustness,=20regex=20accuracy,=20lazy=20imports?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- ai-detect: add 5min cooldown after pipeline failure to prevent retry storm
- ai-detect: add 2min timeout on model download via Promise.race
- ai-detect: allow retry on transient transformers import failure
- engine: lazy-import ai-detect.js only when AI path is actually called
- engine: add _detectFn param for testability without real model download
- index: lazy-import isAIAvailable/disposeAI only when AI enabled
- index: wire up disposeAI() on process exit (SIGINT/SIGTERM)
- patterns: tighten phone_us regex (require separators or parens)
- patterns: tighten phone_intl regex (require separator after country code)
- patterns: fix credit_card to support Amex 15-digit format (4-6-5)
- patterns: update openai_key to match new sk-proj- format
- tests: fix AI fallback tests to use inject stub instead of mock.module
- tests: add AI span merge test, Amex test, sk-proj- test, false-positive test
- tests: fix config test to not pick up real global config

All 22 tests pass.
---
 src/ai-detect.js   | 58 ++++++++++++++++++++++++++--
 src/engine.js      | 25 ++++++++++--
 src/engine.test.js | 96 ++++++++++++++++++++++++++++++++++++++++------
 src/index.js       | 11 +++++-
 src/patterns.js    | 19 +++++----
 5 files changed, 183 insertions(+), 26 deletions(-)

diff --git a/src/ai-detect.js b/src/ai-detect.js
index 04ce37a..e9b8804 100644
--- a/src/ai-detect.js
+++ b/src/ai-detect.js
@@ -8,6 +8,12 @@
  * The model (~400MB q4 quantized) is downloaded on first use and cached locally.
  */
 
+/** Default timeout for model download/load (2 minutes). */
+const MODEL_LOAD_TIMEOUT_MS = 120_000
+
+/** Cooldown after a failed model load before retrying (5 minutes). */
+const RETRY_COOLDOWN_MS = 5 * 60_000
+
 /** Map Privacy Filter entity labels to vibeguard categories. */
 const LABEL_TO_CATEGORY = {
   private_person: "PRIVATE_PERSON",
@@ -23,18 +29,26 @@ const LABEL_TO_CATEGORY = {
 let _pipeline = null
 let _loading = null
 let _transformersModule = undefined // undefined = not yet checked
+let _transformersImportFailed = false // true = last import() threw
+let _pipelineFailedAt = 0 // timestamp of last pipeline failure (0 = never)
 
 /**
  * Attempt to import @huggingface/transformers.
  * Returns the module or null if not installed.
+ * Allows retry: a transient failure (e.g. temporary file-system error)
+ * clears the cached null so the next call re-attempts the import.
  */
 async function loadTransformers() {
-  if (_transformersModule !== undefined) return _transformersModule
+  if (_transformersModule !== undefined && _transformersModule !== null) return _transformersModule
+  // If the previous import failed, allow retry (don't cache null permanently)
+  if (_transformersModule === null && !_transformersImportFailed) return null
   try {
     _transformersModule = await import("@huggingface/transformers")
+    _transformersImportFailed = false
     return _transformersModule
   } catch {
     _transformersModule = null
+    _transformersImportFailed = true
     return null
   }
 }
@@ -42,11 +56,33 @@ async function loadTransformers() {
 /**
  * Initialize the Privacy Filter pipeline. Lazy-loads on first call.
  * Returns the pipeline instance or null if unavailable.
+ *
+ * Defenses:
+ * - If the pipeline loaded successfully before, returns cached instance.
+ * - If another caller is already loading, coalesces onto that promise.
+ * - If the previous load failed, enforces a cooldown (RETRY_COOLDOWN_MS)
+ *   before retrying to avoid a retry storm on every redaction call.
+ * - Wraps model download in a timeout (MODEL_LOAD_TIMEOUT_MS) so a
+ *   hanging download doesn't block redaction forever.
  */
 async function getPipeline(aiConfig, debug) {
   if (_pipeline) return _pipeline
   if (_loading) return _loading
 
+  // Cooldown after failure — don't retry on every call
+  if (_pipelineFailedAt > 0) {
+    const elapsed = Date.now() - _pipelineFailedAt
+    if (elapsed < RETRY_COOLDOWN_MS) {
+      if (debug) {
+        const remaining = Math.ceil((RETRY_COOLDOWN_MS - elapsed) / 1000)
+        console.log(`[vibeguard] AI model load on cooldown (${remaining}s remaining), skipping`)
+      }
+      return null
+    }
+    // Cooldown expired — allow retry
+    _pipelineFailedAt = 0
+  }
+
   _loading = (async () => {
     const transformers = await loadTransformers()
     if (!transformers) {
@@ -62,25 +98,41 @@ async function getPipeline(aiConfig, debug) {
     const model = aiConfig.model || "openai/privacy-filter"
     const dtype = aiConfig.dtype || "q4"
     const device = aiConfig.device || "cpu"
+    const timeoutMs = aiConfig.timeoutMs || MODEL_LOAD_TIMEOUT_MS
 
     if (debug) {
-      console.log(`[vibeguard] Loading AI model: ${model} (dtype=${dtype}, device=${device})`)
+      console.log(
+        `[vibeguard] Loading AI model: ${model} (dtype=${dtype}, device=${device}, timeout=${timeoutMs}ms)`
+      )
     }
 
     try {
-      _pipeline = await transformers.pipeline("token-classification", model, {
+      const loadPromise = transformers.pipeline("token-classification", model, {
         dtype,
         device,
       })
+
+      // Race the model load against a timeout
+      const timeoutPromise = new Promise((_, reject) => {
+        setTimeout(
+          () => reject(new Error(`Model load timed out after ${timeoutMs}ms`)),
+          timeoutMs
+        )
+      })
+
+      _pipeline = await Promise.race([loadPromise, timeoutPromise])
+
       if (debug) {
         console.log("[vibeguard] AI model loaded successfully")
       }
+      _pipelineFailedAt = 0
       return _pipeline
     } catch (err) {
       if (debug || !aiConfig.silentFallback) {
         console.log(`[vibeguard] Failed to load AI model: ${err.message}`)
       }
       _pipeline = null
+      _pipelineFailedAt = Date.now()
       return null
     }
   })()
diff --git a/src/engine.js b/src/engine.js
index 7ee0213..642fb9c 100644
--- a/src/engine.js
+++ b/src/engine.js
@@ -1,4 +1,6 @@
-import { detectWithAI } from "./ai-detect.js"
+// ai-detect.js is imported lazily in redactTextWithAI() to avoid pulling in
+// Transformers.js infrastructure when AI detection is disabled.
+let _detectWithAI = null
 
 function subtractCovered(start, end, covered) {
   if (start >= end) return []
@@ -140,8 +142,15 @@ export function redactText(input, patterns, session) {
  * Redact text using both regex/keyword patterns AND the AI Privacy Filter.
  * Async because the AI inference is async. The hook awaits this before
  * proceeding, so redaction is guaranteed complete before the LLM sees the text.
+ *
+ * @param {string} input
+ * @param {object} patterns
+ * @param {object} session
+ * @param {object} aiConfig
+ * @param {boolean} debug
+ * @param {Function} [_detectFn] - Optional override for detectWithAI (testing only)
  */
-export async function redactTextWithAI(input, patterns, session, aiConfig, debug) {
+export async function redactTextWithAI(input, patterns, session, aiConfig, debug, _detectFn) {
   const text = String(input ?? "")
   if (!text) return { text, matches: [] }
 
@@ -149,7 +158,8 @@ export async function redactTextWithAI(input, patterns, session, aiConfig, debug
   const found = findRegexSpans(text, patterns)
 
   // 2. AI-based detection (async, local model inference)
-  const aiSpans = await detectWithAI(text, aiConfig, debug)
+  const detect = _detectFn ?? await getDetectWithAI()
+  const aiSpans = await detect(text, aiConfig, debug)
   for (const span of aiSpans) {
     if (patterns.exclude.has(span.original)) continue
     found.push(span)
@@ -157,3 +167,12 @@ export async function redactTextWithAI(input, patterns, session, aiConfig, debug
 
   return applySpans(text, found, session)
 }
+
+/** Lazily resolve the real detectWithAI function from ai-detect.js */
+async function getDetectWithAI() {
+  if (!_detectWithAI) {
+    const mod = await import("./ai-detect.js")
+    _detectWithAI = mod.detectWithAI
+  }
+  return _detectWithAI
+}
diff --git a/src/engine.test.js b/src/engine.test.js
index 64ae332..938d7c6 100644
--- a/src/engine.test.js
+++ b/src/engine.test.js
@@ -83,6 +83,14 @@ describe("new builtin patterns", () => {
     assert.ok(result.text.includes("__VG_CREDIT_CARD_"))
   })
 
+  it("redacts Amex credit card numbers (15 digits)", () => {
+    const patterns = buildPatternSet({ builtin: ["credit_card"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const result = redactText("Amex: 3782 822463 10005", patterns, session)
+    assert.ok(!result.text.includes("3782 822463 10005"))
+    assert.ok(result.text.includes("__VG_CREDIT_CARD_"))
+  })
+
   it("redacts private key headers", () => {
     const patterns = buildPatternSet({ builtin: ["private_key_header"] })
     const session = new PlaceholderSession({ prefix: "__VG_" })
@@ -106,14 +114,32 @@ describe("new builtin patterns", () => {
     assert.ok(!result.text.includes("eyJhbGciOiJIUzI1NiJ9"))
     assert.ok(result.text.includes("__VG_BEARER_TOKEN_"))
   })
+
+  it("redacts new-format OpenAI keys (sk-proj-)", () => {
+    const patterns = buildPatternSet({ builtin: ["openai_key"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const result = redactText("key: sk-proj-abcdefghij1234567890klmnopqr", patterns, session)
+    assert.ok(!result.text.includes("sk-proj-abcdefghij1234567890klmnopqr"))
+    assert.ok(result.text.includes("__VG_OPENAI_KEY_"))
+  })
+
+  it("does not false-positive phone_us on bare digit sequences", () => {
+    const patterns = buildPatternSet({ builtin: ["phone_us"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    // Timestamp-like and ID-like digit strings should NOT match
+    const result = redactText("timestamp: 1715000000000 and id: 9876543", patterns, session)
+    assert.equal(result.matches.length, 0, "bare digits should not match phone_us")
+  })
 })
 
 describe("redactTextWithAI (graceful fallback)", () => {
-  it("falls back to regex-only when AI is unavailable", async () => {
+  // Stub that simulates AI being unavailable (returns no spans)
+  const noopDetect = async () => []
+
+  it("falls back to regex-only when AI returns no spans", async () => {
     const patterns = buildPatternSet({ builtin: ["email"] })
     const session = new PlaceholderSession({ prefix: "__VG_" })
 
-    // AI enabled but transformers not installed in test env = silent fallback
     const aiConfig = {
       enabled: true,
       model: "openai/privacy-filter",
@@ -128,7 +154,8 @@ describe("redactTextWithAI (graceful fallback)", () => {
       patterns,
       session,
       aiConfig,
-      false
+      false,
+      noopDetect // inject stub — avoids real model download
     )
 
     // Email should be redacted by regex even when AI is unavailable
@@ -138,12 +165,40 @@ describe("redactTextWithAI (graceful fallback)", () => {
     assert.ok(result.text.includes("Alice Smith"))
   })
 
+  it("merges AI spans with regex spans", async () => {
+    const patterns = buildPatternSet({ builtin: ["email"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+
+    const aiConfig = {
+      enabled: true,
+      model: "openai/privacy-filter",
+      dtype: "q4",
+      device: "cpu",
+      categories: [],
+      silentFallback: true,
+    }
+
+    // Simulate AI detecting "Alice Smith" as a person name
+    const input = "Email alice@corp.io from Alice Smith"
+    const nameStart = input.indexOf("Alice Smith")
+    const fakeDetect = async () => [
+      { start: nameStart, end: nameStart + 11, original: "Alice Smith", category: "PRIVATE_PERSON" },
+    ]
+
+    const result = await redactTextWithAI(input, patterns, session, aiConfig, false, fakeDetect)
+
+    assert.ok(!result.text.includes("alice@corp.io"), "email should be redacted by regex")
+    assert.ok(!result.text.includes("Alice Smith"), "name should be redacted by AI")
+    assert.ok(result.text.includes("__VG_EMAIL_"))
+    assert.ok(result.text.includes("__VG_PRIVATE_PERSON_"))
+  })
+
   it("handles empty text", async () => {
     const patterns = buildPatternSet({ builtin: ["email"] })
     const session = new PlaceholderSession({ prefix: "__VG_" })
     const aiConfig = { enabled: true, model: "openai/privacy-filter", dtype: "q4", device: "cpu", categories: [], silentFallback: true }
 
-    const result = await redactTextWithAI("", patterns, session, aiConfig, false)
+    const result = await redactTextWithAI("", patterns, session, aiConfig, false, noopDetect)
     assert.equal(result.text, "")
     assert.equal(result.matches.length, 0)
   })
@@ -184,15 +239,32 @@ describe("restoreText", () => {
 describe("config normalizeAiConfig", async () => {
   // Import config module to test normalization
   const { loadConfig } = await import("./config.js")
+  const { writeFileSync, unlinkSync, mkdirSync } = await import("node:fs")
+  const { join } = await import("node:path")
 
   it("defaults ai to disabled", async () => {
-    // loadConfig with a non-existent dir returns enabled=false
-    const cfg = await loadConfig("/nonexistent-dir-for-test-" + Date.now())
-    assert.equal(cfg.ai.enabled, false)
-    assert.equal(cfg.ai.model, "openai/privacy-filter")
-    assert.equal(cfg.ai.dtype, "q4")
-    assert.equal(cfg.ai.device, "cpu")
-    assert.equal(cfg.ai.silentFallback, true)
-    assert.deepEqual(cfg.ai.categories, [])
+    // Create a minimal config with enabled=false to override the real one.
+    // The env override is added to the candidate list but global config still
+    // gets checked too, so we provide a real file that wins first.
+    const tmpDir = `/tmp/vibeguard-test-${Date.now()}`
+    mkdirSync(tmpDir, { recursive: true })
+    const tmpCfg = join(tmpDir, "vibeguard.config.json")
+    writeFileSync(tmpCfg, JSON.stringify({ enabled: false }), "utf8")
+
+    const prev = process.env.OPENCODE_VIBEGUARD_CONFIG
+    process.env.OPENCODE_VIBEGUARD_CONFIG = tmpCfg
+    try {
+      const cfg = await loadConfig(tmpDir)
+      assert.equal(cfg.ai.enabled, false)
+      assert.equal(cfg.ai.model, "openai/privacy-filter")
+      assert.equal(cfg.ai.dtype, "q4")
+      assert.equal(cfg.ai.device, "cpu")
+      assert.equal(cfg.ai.silentFallback, true)
+      assert.deepEqual(cfg.ai.categories, [])
+    } finally {
+      if (prev === undefined) delete process.env.OPENCODE_VIBEGUARD_CONFIG
+      else process.env.OPENCODE_VIBEGUARD_CONFIG = prev
+      try { unlinkSync(tmpCfg) } catch {}
+    }
   })
 })
diff --git a/src/index.js b/src/index.js
index 0fd35f4..3f27b6f 100644
--- a/src/index.js
+++ b/src/index.js
@@ -4,7 +4,6 @@ import { PlaceholderSession } from "./session.js"
 import { redactText, redactTextWithAI } from "./engine.js"
 import { redactDeep, restoreDeep } from "./deep.js"
 import { restoreText } from "./restore.js"
-import { isAIAvailable } from "./ai-detect.js"
 
 /**
  * OpenCode plugin entry point:
@@ -32,7 +31,9 @@ export const VibeGuardPrivacy = async (ctx) => {
   const useAI = aiConfig.enabled
 
   // Check AI availability at startup (non-blocking info)
+  // Import ai-detect lazily to avoid pulling in Transformers.js when AI disabled
   if (useAI) {
+    const { isAIAvailable, disposeAI } = await import("./ai-detect.js")
     const available = await isAIAvailable()
     if (available) {
       console.log(
@@ -46,6 +47,14 @@ export const VibeGuardPrivacy = async (ctx) => {
           `Falling back to regex/keyword detection only.`
       )
     }
+
+    // Clean up model pipeline on process exit to free memory
+    const onExit = () => {
+      disposeAI().catch(() => {})
+    }
+    process.on("exit", onExit)
+    process.on("SIGINT", onExit)
+    process.on("SIGTERM", onExit)
   }
 
   if (debug) {
diff --git a/src/patterns.js b/src/patterns.js
index 437ddd4..32e1503 100644
--- a/src/patterns.js
+++ b/src/patterns.js
@@ -90,8 +90,9 @@ const BUILTIN = new Map([
   [
     "phone_us",
     {
-      // US phone: (555) 123-4567, 555-123-4567, +1-555-123-4567, etc.
-      pattern: String.raw`(?<!\d)(?:\+?1[-.\s]?)?(?:\(\d{3}\)|\d{3})[-.\s]?\d{3}[-.\s]?\d{4}(?!\d)`,
+      // US phone: (555) 123-4567, 555-123-4567, +1-555-123-4567
+      // Requires at least one separator or parenthesized area code to avoid matching bare digit sequences
+      pattern: String.raw`(?<!\d)(?:\+?1[-.\s])?\(\d{3}\)[-.\s]?\d{3}[-.\s]?\d{4}(?!\d)|(?<!\d)(?:\+?1[-.\s])?\d{3}[-.\s]\d{3}[-.\s]?\d{4}(?!\d)`,
       flags: "",
       category: "PHONE_US",
     },
@@ -99,8 +100,10 @@ const BUILTIN = new Map([
   [
     "phone_intl",
     {
-      // International E.164-style: +44 20 7946 0958, +49-30-1234567, etc.
-      pattern: String.raw`(?<!\d)\+[1-9]\d{1,2}[-.\s]?\d[\d\-.\s]{6,14}\d(?!\d)`,
+      // International: +44 20 7946 0958, +49-30-1234567, +33 1 23 45 67 89
+      // Requires + prefix, country code 1-3 digits, then 7-14 additional digits with separators
+      // Must contain at least one separator to avoid matching arbitrary digit strings
+      pattern: String.raw`(?<!\d)\+[1-9]\d{0,2}[-.\s]\d(?:[-.\s]?\d){6,13}(?!\d)`,
       flags: "",
       category: "PHONE_INTL",
     },
@@ -117,8 +120,9 @@ const BUILTIN = new Map([
   [
     "credit_card",
     {
-      // Major credit card patterns (Visa, MC, Amex, Discover) with optional separators
-      pattern: String.raw`(?<!\d)(?:4\d{3}|5[1-5]\d{2}|3[47]\d{2}|6(?:011|5\d{2}))[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}(?!\d)`,
+      // Visa (4xxx), MC (51-55xx), Discover (6011/65xx): 16 digits with optional separators
+      // Amex (34xx/37xx): 15 digits (4-6-5 grouping)
+      pattern: String.raw`(?<!\d)(?:(?:4\d{3}|5[1-5]\d{2}|6(?:011|5\d{2}))[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}|3[47]\d{2}[-\s]?\d{6}[-\s]?\d{5})(?!\d)`,
       flags: "",
       category: "CREDIT_CARD",
     },
@@ -126,7 +130,8 @@ const BUILTIN = new Map([
   [
     "openai_key",
     {
-      pattern: String.raw`sk-[A-Za-z0-9]{20,}T3BlbkFJ[A-Za-z0-9]{20,}`,
+      // OpenAI API keys: legacy sk-...T3BlbkFJ... and new sk-proj-... formats
+      pattern: String.raw`sk-(?:proj-)?[A-Za-z0-9_-]{20,}`,
       flags: "",
       category: "OPENAI_KEY",
     },

From c180b58f54078df5c94745686a0d98e394d8dd3c Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Thu, 14 May 2026 16:34:24 -0500
Subject: [PATCH 03/15] fix: use client.app.log() for TUI-safe logging, add
 postinstall model download

- Replace all console.log/process.stderr.write with OpenCode's
  client.app.log() API to stop corrupting the TUI
- Add setLogger() to ai-detect.js so the structured logger propagates
  from index.js without threading ctx through every function
- Cooldown log fires once per failure, not per redaction call
- Add scripts/download-model.js postinstall to pre-download the
  Privacy Filter model during npm install (~400MB, one-time)
- Add explicit redaction disclaimer to LICENSE (no guarantee all
  sensitive data will be detected or redacted)

All 22 tests pass.
---
 LICENSE                   | 22 +++++++++++++++++++++
 package.json              |  5 ++++-
 scripts/download-model.js | 41 +++++++++++++++++++++++++++++++++++++++
 src/ai-detect.js          | 38 ++++++++++++++++++++++--------------
 src/index.js              | 40 ++++++++++++++++++++------------------
 5 files changed, 111 insertions(+), 35 deletions(-)
 create mode 100644 scripts/download-model.js

diff --git a/LICENSE b/LICENSE
index 9467b88..dfcb589 100644
--- a/LICENSE
+++ b/LICENSE
@@ -19,3 +19,25 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
+
+---
+
+SUPPLEMENTARY NOTICE — REDACTION DISCLAIMER
+
+This software attempts to detect and redact personally identifiable information
+(PII), secrets, and other sensitive data before it is sent to a language model.
+However, NO REDACTION SYSTEM IS PERFECT. The authors and contributors make NO
+guarantee, representation, or warranty — express or implied — that all
+sensitive data will be detected or redacted.
+
+The authors and contributors SHALL NOT BE LIABLE for any sensitive data,
+secrets, credentials, personally identifiable information, or other
+confidential material that passes through this software unredacted, whether due
+to limitations of regex patterns, AI model accuracy, misconfiguration, software
+defects, or any other cause.
+
+YOU ARE SOLELY RESPONSIBLE for reviewing your configuration, testing redaction
+coverage against your specific data, and implementing additional safeguards as
+appropriate for your use case. This software is provided as one layer in a
+defense-in-depth strategy and must not be relied upon as the sole mechanism for
+preventing data exposure.
diff --git a/package.json b/package.json
index b88df48..00eea98 100644
--- a/package.json
+++ b/package.json
@@ -7,12 +7,15 @@
   "exports": "./src/index.js",
   "files": [
     "src",
+    "scripts",
     "README-zh.md",
     "vibeguard.config.json.example"
   ],
   "scripts": {
     "test": "node --test",
-    "prepack": "npm test"
+    "prepack": "npm test",
+    "postinstall": "node scripts/download-model.js",
+    "download-model": "node scripts/download-model.js"
   },
   "optionalDependencies": {
     "@huggingface/transformers": "^4.2.0"
diff --git a/scripts/download-model.js b/scripts/download-model.js
new file mode 100644
index 0000000..14fe68c
--- /dev/null
+++ b/scripts/download-model.js
@@ -0,0 +1,41 @@
+#!/usr/bin/env node
+/**
+ * Pre-download the Privacy Filter model so it's cached locally before runtime.
+ * Runs automatically via `npm install` (postinstall).
+ *
+ * If @huggingface/transformers isn't installed or the download fails,
+ * this script exits 0 (success) so it never breaks `npm install`.
+ */
+
+const MODEL = process.env.VIBEGUARD_AI_MODEL || "openai/privacy-filter"
+const DTYPE = process.env.VIBEGUARD_AI_DTYPE || "q4"
+
+async function main() {
+  let transformers
+  try {
+    transformers = await import("@huggingface/transformers")
+  } catch {
+    // transformers not installed (optionalDependency) — nothing to download
+    return
+  }
+
+  console.log(`[vibeguard] Downloading AI model: ${MODEL} (dtype=${DTYPE})...`)
+  console.log("[vibeguard] This is a one-time download (~400MB for q4). Please wait.")
+
+  const start = Date.now()
+  try {
+    const pipe = await transformers.pipeline("token-classification", MODEL, {
+      dtype: DTYPE,
+      device: "cpu",
+    })
+    // Dispose immediately — we only needed to trigger the download/cache
+    if (typeof pipe.dispose === "function") await pipe.dispose()
+    const elapsed = ((Date.now() - start) / 1000).toFixed(1)
+    console.log(`[vibeguard] Model downloaded and cached successfully (${elapsed}s).`)
+  } catch (err) {
+    console.log(`[vibeguard] Model download failed: ${err.message}`)
+    console.log("[vibeguard] The model will be downloaded on first use instead.")
+  }
+}
+
+main().catch(() => {})
diff --git a/src/ai-detect.js b/src/ai-detect.js
index e9b8804..f686f9d 100644
--- a/src/ai-detect.js
+++ b/src/ai-detect.js
@@ -31,6 +31,17 @@ let _loading = null
 let _transformersModule = undefined // undefined = not yet checked
 let _transformersImportFailed = false // true = last import() threw
 let _pipelineFailedAt = 0 // timestamp of last pipeline failure (0 = never)
+let _cooldownLogged = false // true = already logged "on cooldown" once
+let _log = () => {} // no-op default; index.js sets this via setLogger()
+
+/**
+ * Set the logger function for ai-detect.
+ * Called once from index.js with the client.app.log wrapper.
+ * @param {Function} logFn - (level, message) => void
+ */
+export function setLogger(logFn) {
+  if (typeof logFn === "function") _log = logFn
+}
 
 /**
  * Attempt to import @huggingface/transformers.
@@ -73,24 +84,25 @@ async function getPipeline(aiConfig, debug) {
   if (_pipelineFailedAt > 0) {
     const elapsed = Date.now() - _pipelineFailedAt
     if (elapsed < RETRY_COOLDOWN_MS) {
-      if (debug) {
+      // Log once, not on every call — avoids flooding the terminal
+      if (debug && !_cooldownLogged) {
         const remaining = Math.ceil((RETRY_COOLDOWN_MS - elapsed) / 1000)
-        console.log(`[vibeguard] AI model load on cooldown (${remaining}s remaining), skipping`)
+        _log("debug", `AI model failed to load, cooldown ${remaining}s. Using regex-only.`)
+        _cooldownLogged = true
       }
       return null
     }
     // Cooldown expired — allow retry
     _pipelineFailedAt = 0
+    _cooldownLogged = false
   }
 
   _loading = (async () => {
     const transformers = await loadTransformers()
     if (!transformers) {
+      // Only log during initial load, not on cooldown retries
       if (debug) {
-        console.log(
-          "[vibeguard] AI detection unavailable: @huggingface/transformers not installed. " +
-            "Install with: npm i @huggingface/transformers"
-        )
+        _log("warn", "AI detection unavailable: @huggingface/transformers not installed.")
       }
       return null
     }
@@ -101,9 +113,7 @@ async function getPipeline(aiConfig, debug) {
     const timeoutMs = aiConfig.timeoutMs || MODEL_LOAD_TIMEOUT_MS
 
     if (debug) {
-      console.log(
-        `[vibeguard] Loading AI model: ${model} (dtype=${dtype}, device=${device}, timeout=${timeoutMs}ms)`
-      )
+      _log("info", `Loading AI model: ${model} (dtype=${dtype}, device=${device})`)
     }
 
     try {
@@ -123,13 +133,13 @@ async function getPipeline(aiConfig, debug) {
       _pipeline = await Promise.race([loadPromise, timeoutPromise])
 
       if (debug) {
-        console.log("[vibeguard] AI model loaded successfully")
+        _log("info", "AI model loaded successfully")
       }
       _pipelineFailedAt = 0
       return _pipeline
     } catch (err) {
       if (debug || !aiConfig.silentFallback) {
-        console.log(`[vibeguard] Failed to load AI model: ${err.message}`)
+        _log("error", `Failed to load AI model: ${err.message}`)
       }
       _pipeline = null
       _pipelineFailedAt = Date.now()
@@ -191,15 +201,13 @@ export async function detectWithAI(text, aiConfig, debug) {
     }
 
     if (debug && spans.length > 0) {
-      console.log(
-        `[vibeguard] AI detected ${spans.length} span(s): ${spans.map((s) => s.category).join(", ")}`
-      )
+      _log("debug", `AI detected ${spans.length} span(s): ${spans.map((s) => s.category).join(", ")}`)
     }
 
     return spans
   } catch (err) {
     if (!aiConfig.silentFallback || debug) {
-      console.log(`[vibeguard] AI inference error: ${err.message}, falling back to regex-only`)
+      _log("error", `AI inference error: ${err.message}, falling back to regex-only`)
     }
     return []
   }
diff --git a/src/index.js b/src/index.js
index 3f27b6f..a6f47cd 100644
--- a/src/index.js
+++ b/src/index.js
@@ -18,9 +18,21 @@ export const VibeGuardPrivacy = async (ctx) => {
   const config = await loadConfig(ctx.directory)
   const debug = Boolean(process.env.OPENCODE_VIBEGUARD_DEBUG) || Boolean(config.debug)
 
+  // Use OpenCode's structured logging instead of console.log to avoid
+  // corrupting the TUI. Falls back to no-op if client.app.log is unavailable.
+  const log = (level, message, extra) => {
+    try {
+      ctx.client?.app?.log({
+        body: { service: "vibeguard", level, message, ...(extra ? { extra } : {}) },
+      })
+    } catch {
+      /* swallow — never crash the plugin over logging */
+    }
+  }
+
   if (debug) {
     const from = config.loadedFrom ? config.loadedFrom : "not found (plugin will no-op)"
-    console.log(`[vibeguard] Config: ${from} enabled=${config.enabled}`)
+    log("debug", `Config: ${from} enabled=${config.enabled}`)
   }
 
   if (!config.enabled) return {}
@@ -33,19 +45,13 @@ export const VibeGuardPrivacy = async (ctx) => {
   // Check AI availability at startup (non-blocking info)
   // Import ai-detect lazily to avoid pulling in Transformers.js when AI disabled
   if (useAI) {
-    const { isAIAvailable, disposeAI } = await import("./ai-detect.js")
+    const { isAIAvailable, disposeAI, setLogger } = await import("./ai-detect.js")
+    setLogger(log)
     const available = await isAIAvailable()
     if (available) {
-      console.log(
-        `[vibeguard] AI detection enabled (model: ${aiConfig.model}, dtype: ${aiConfig.dtype}). ` +
-          `Model will be downloaded on first use if not cached.`
-      )
+      log("info", `AI detection enabled (model: ${aiConfig.model}, dtype: ${aiConfig.dtype}). Model will be downloaded on first use if not cached.`)
     } else {
-      console.log(
-        `[vibeguard] AI detection enabled in config but @huggingface/transformers is not installed. ` +
-          `Install with: npm i @huggingface/transformers\n` +
-          `Falling back to regex/keyword detection only.`
-      )
+      log("warn", "AI detection enabled in config but @huggingface/transformers is not installed. Falling back to regex/keyword detection only.")
     }
 
     // Clean up model pipeline on process exit to free memory
@@ -58,10 +64,8 @@ export const VibeGuardPrivacy = async (ctx) => {
   }
 
   if (debug) {
-    console.log(`[vibeguard] AI detection: ${useAI ? "enabled" : "disabled (opt-in via config)"}`)
-    console.log(
-      `[vibeguard] Regex patterns: ${patterns.keywords.length} keywords, ${patterns.regex.length} regex rules`
-    )
+    log("debug", `AI detection: ${useAI ? "enabled" : "disabled (opt-in via config)"}`)
+    log("debug", `Regex patterns: ${patterns.keywords.length} keywords, ${patterns.regex.length} regex rules`)
   }
 
   const getSession = (sessionID) => {
@@ -165,9 +169,7 @@ export const VibeGuardPrivacy = async (ctx) => {
       }
 
       if (debug && changedTextParts > 0) {
-        console.log(
-          `[vibeguard] Pre-request redaction: modified ${changedTextParts} text segment(s)`
-        )
+        log("debug", `Pre-request redaction: modified ${changedTextParts} text segment(s)`)
       }
     },
 
@@ -181,7 +183,7 @@ export const VibeGuardPrivacy = async (ctx) => {
       const after = restoreText(before, session)
       output.text = after
       if (debug && after !== before) {
-        console.log("[vibeguard] Post-response restore: modified 1 text segment")
+        log("debug", "Post-response restore: modified 1 text segment")
       }
     },
 

From 7b0f0e695caffc3854ea84d66150e25bd8f04480 Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Thu, 14 May 2026 16:38:57 -0500
Subject: [PATCH 04/15] fix: remove process exit handlers to prevent ONNX
 native crash on shutdown
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ONNX runtime (N-API native addon in Transformers.js) crashes with
'NAPI FATAL ERROR' when disposeAI() is called during process teardown —
the native module is already partially torn down. The OS reclaims all
memory on exit anyway, so explicit disposal is unnecessary.

Peak RSS was 6.57GB which suggests the model may load unquantized in
some scenarios — this will be investigated separately.
---
 src/index.js | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/src/index.js b/src/index.js
index a6f47cd..3e2608d 100644
--- a/src/index.js
+++ b/src/index.js
@@ -45,7 +45,7 @@ export const VibeGuardPrivacy = async (ctx) => {
   // Check AI availability at startup (non-blocking info)
   // Import ai-detect lazily to avoid pulling in Transformers.js when AI disabled
   if (useAI) {
-    const { isAIAvailable, disposeAI, setLogger } = await import("./ai-detect.js")
+    const { isAIAvailable, setLogger } = await import("./ai-detect.js")
     setLogger(log)
     const available = await isAIAvailable()
     if (available) {
@@ -53,14 +53,9 @@ export const VibeGuardPrivacy = async (ctx) => {
     } else {
       log("warn", "AI detection enabled in config but @huggingface/transformers is not installed. Falling back to regex/keyword detection only.")
     }
-
-    // Clean up model pipeline on process exit to free memory
-    const onExit = () => {
-      disposeAI().catch(() => {})
-    }
-    process.on("exit", onExit)
-    process.on("SIGINT", onExit)
-    process.on("SIGTERM", onExit)
+    // NOTE: We intentionally do NOT register process exit handlers for disposeAI().
+    // The ONNX runtime (native N-API addon) crashes if dispose() is called during
+    // process teardown. The OS reclaims all memory on exit anyway.
   }
 
   if (debug) {

From 7fd7b5e6662a4e19a40abec462acc3921650ab33 Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Thu, 14 May 2026 17:16:08 -0500
Subject: [PATCH 05/15] feat: shared model server + AI integration tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- model-server.js: shared inference daemon over Unix socket with idle timeout,
  request queue (ONNX not thread-safe), PID-after-bind, stale socket/PID cleanup
- ai-detect.js: O_EXCL atomic lock for spawn serialization, PID validation,
  health polling, model mismatch cooldown, close leaked log fd
- model-server.test.js: 23 integration tests covering subtle secrets (JDBC,
  Mongo, GitHub PAT, Stripe, JWT, AWS, private key), subtle PII (names, emails,
  phones, addresses, DOB), false-positive resistance, category filtering
- generate-test-secrets.js: build realistic secrets at test time from random
  components — no literal secrets in version-controlled source
- package.json: bin entry for vibeguard-server, pretest script
---
 package.json                     |   6 +-
 scripts/generate-test-secrets.js | 135 ++++++++
 src/ai-detect.js                 | 555 ++++++++++++++++++++++---------
 src/model-server.js              | 447 +++++++++++++++++++++++++
 src/model-server.test.js         | 444 +++++++++++++++++++++++++
 5 files changed, 1430 insertions(+), 157 deletions(-)
 create mode 100644 scripts/generate-test-secrets.js
 create mode 100755 src/model-server.js
 create mode 100644 src/model-server.test.js

diff --git a/package.json b/package.json
index 00eea98..dc4411f 100644
--- a/package.json
+++ b/package.json
@@ -12,6 +12,7 @@
     "vibeguard.config.json.example"
   ],
   "scripts": {
+    "pretest": "node scripts/generate-test-secrets.js",
     "test": "node --test",
     "prepack": "npm test",
     "postinstall": "node scripts/download-model.js",
@@ -39,5 +40,8 @@
     "url": "https://github.com/inkdust2021/opencode-vibeguard/issues"
   },
   "homepage": "https://github.com/inkdust2021/opencode-vibeguard#readme",
-  "license": "MIT"
+  "license": "MIT",
+  "bin": {
+    "vibeguard-server": "src/model-server.js"
+  }
 }
diff --git a/scripts/generate-test-secrets.js b/scripts/generate-test-secrets.js
new file mode 100644
index 0000000..e1cc83d
--- /dev/null
+++ b/scripts/generate-test-secrets.js
@@ -0,0 +1,135 @@
+#!/usr/bin/env node
+/**
+ * Generate realistic test secrets for AI model integration tests.
+ *
+ * These secrets are assembled at build time from random/fake components
+ * so they never appear as literal strings in version-controlled source.
+ * GitHub push protection scans source code, not runtime output.
+ *
+ * Output: test/.secrets.json (gitignored)
+ *
+ * The generated values are structurally valid (correct prefixes, lengths,
+ * character sets) so the AI model recognizes them as real secrets — which
+ * is the whole point of the integration tests.
+ *
+ * Usage:
+ *   node scripts/generate-test-secrets.js
+ *   # produces test/.secrets.json
+ */
+
+import fs from "node:fs"
+import path from "node:path"
+import crypto from "node:crypto"
+import { fileURLToPath } from "node:url"
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url))
+const ROOT = path.resolve(__dirname, "..")
+const OUT_DIR = path.join(ROOT, "test")
+const OUT_FILE = path.join(OUT_DIR, ".secrets.json")
+
+// ---------------------------------------------------------------------------
+// Generators — each produces a structurally valid but fake secret
+// ---------------------------------------------------------------------------
+
+/** Random string from a charset. */
+function rand(len, charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") {
+  const bytes = crypto.randomBytes(len)
+  return Array.from(bytes, (b) => charset[b % charset.length]).join("")
+}
+
+/** GitHub Personal Access Token: ghp_ + 36 alphanum */
+function makeGithubPAT() {
+  return "ghp_" + rand(36)
+}
+
+/**
+ * Stripe live secret key: sk_live_ + account ID + _ + random
+ * Format: sk_live_<8 alphanum><14 mixed>00<8 alphanum>
+ * Total length after prefix is ~50+ chars to look realistic.
+ */
+function makeStripeKey() {
+  return "sk_live_" + rand(8) + rand(42)
+}
+
+/**
+ * Slack incoming webhook URL.
+ * Format: https://hooks.slack.com/services/T<9 alphanum>/B<9 alphanum>/<24 alphanum>
+ */
+function makeSlackWebhook() {
+  return `https://hooks.slack.com/services/T${rand(9, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")}/B${rand(9, "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")}/${rand(24)}`
+}
+
+/**
+ * AWS secret access key: 40-char base64-ish string.
+ * Uses the format from AWS docs but with random content.
+ */
+function makeAWSSecretKey() {
+  // AWS secret keys are 40 chars, base64-alphabet + /
+  return rand(40, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789/+")
+}
+
+/**
+ * JWT (HS256 signed).
+ * We build a real JWT structure so the AI recognizes the eyJ... pattern.
+ */
+function makeJWT() {
+  const header = { alg: "HS256", typ: "JWT" }
+  const payload = { sub: rand(10, "0123456789"), name: "Test User", iat: Math.floor(Date.now() / 1000) }
+  const b64url = (obj) => Buffer.from(JSON.stringify(obj)).toString("base64url")
+  const unsigned = b64url(header) + "." + b64url(payload)
+  // Fake signature — 32 random bytes, base64url-encoded
+  const sig = crypto.randomBytes(32).toString("base64url")
+  return unsigned + "." + sig
+}
+
+/**
+ * JDBC password: mix of printable ASCII (special chars make it harder for regex).
+ */
+function makeJDBCPassword() {
+  return rand(16, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*")
+}
+
+/**
+ * MongoDB password: alphanumeric with mixed case.
+ */
+function makeMongoPassword() {
+  return rand(12)
+}
+
+// ---------------------------------------------------------------------------
+// Build fixture
+// ---------------------------------------------------------------------------
+
+const secrets = {
+  github_pat: makeGithubPAT(),
+  stripe_key: makeStripeKey(),
+  slack_webhook: makeSlackWebhook(),
+  aws_secret_key: makeAWSSecretKey(),
+  jwt: makeJWT(),
+  jdbc_password: makeJDBCPassword(),
+  mongo_password: makeMongoPassword(),
+}
+
+// Build the full text snippets the tests will use
+const fixtures = {
+  ...secrets,
+  // Full text strings ready for detect() calls
+  jdbc_text: `app.datasource.url=jdbc:postgresql://db.internal:5432/mydb?user=svc_account&password=${secrets.jdbc_password}`,
+  mongo_text: `const client = new MongoClient("mongodb://admin:${secrets.mongo_password}@cluster0.abc123.mongodb.net/prod?retryWrites=true")`,
+  github_text: `const config = { token: "${secrets.github_pat}" }`,
+  slack_text: `Post deploy notifications to ${secrets.slack_webhook}`,
+  aws_text: `export AWS_SECRET_ACCESS_KEY="${secrets.aws_secret_key}"`,
+  jwt_text: `headers: { "Authorization": "Bearer ${secrets.jwt}" }`,
+  stripe_text: JSON.stringify({
+    payment: {
+      provider: "stripe",
+      secret: secrets.stripe_key,
+    },
+  }, null, 2),
+}
+
+// Write output
+fs.mkdirSync(OUT_DIR, { recursive: true })
+fs.writeFileSync(OUT_FILE, JSON.stringify(fixtures, null, 2) + "\n", "utf8")
+
+console.log(`Generated ${Object.keys(secrets).length} test secrets → ${OUT_FILE}`)
diff --git a/src/ai-detect.js b/src/ai-detect.js
index f686f9d..de869fe 100644
--- a/src/ai-detect.js
+++ b/src/ai-detect.js
@@ -1,160 +1,402 @@
 /**
- * AI-based PII/secret detection using OpenAI's Privacy Filter model
- * via Transformers.js (runs locally, no external API calls).
+ * AI-based PII/secret detection — thin HTTP client.
  *
- * This module is opt-in: it only activates when `ai.enabled` is set in config
- * AND `@huggingface/transformers` is installed.
+ * Instead of loading the ~400MB ONNX model in-process, this module
+ * talks to a shared model-server daemon over a Unix domain socket.
+ * Multiple OpenCode instances share one model copy in RAM.
  *
- * The model (~400MB q4 quantized) is downloaded on first use and cached locally.
+ * On first call, if the server isn't running, this module spawns it
+ * as a detached background process and waits for it to become ready.
+ *
+ * The detectWithAI() signature is unchanged — engine.js and index.js
+ * don't need to know about the server.
  */
 
-/** Default timeout for model download/load (2 minutes). */
-const MODEL_LOAD_TIMEOUT_MS = 120_000
+import http from "node:http"
+import fs from "node:fs"
+import path from "node:path"
+import os from "node:os"
+import { fileURLToPath } from "node:url"
+import { spawn } from "node:child_process"
 
-/** Cooldown after a failed model load before retrying (5 minutes). */
-const RETRY_COOLDOWN_MS = 5 * 60_000
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+/** How long to wait for the server to become ready (model download + load). */
+const SERVER_READY_TIMEOUT_MS = 180_000 // 3 min (model may need downloading)
 
-/** Map Privacy Filter entity labels to vibeguard categories. */
-const LABEL_TO_CATEGORY = {
-  private_person: "PRIVATE_PERSON",
-  private_address: "PRIVATE_ADDRESS",
-  private_email: "PRIVATE_EMAIL",
-  private_phone: "PRIVATE_PHONE",
-  private_url: "PRIVATE_URL",
-  private_date: "PRIVATE_DATE",
-  account_number: "ACCOUNT_NUMBER",
-  secret: "SECRET",
-}
+/** Interval between health polls when waiting for server readiness. */
+const HEALTH_POLL_MS = 500
+
+/** Per-request timeout for /detect calls. */
+const REQUEST_TIMEOUT_MS = 30_000
+
+/** Cooldown after server spawn failure before retrying. */
+const SPAWN_COOLDOWN_MS = 60_000
 
-let _pipeline = null
-let _loading = null
-let _transformersModule = undefined // undefined = not yet checked
-let _transformersImportFailed = false // true = last import() threw
-let _pipelineFailedAt = 0 // timestamp of last pipeline failure (0 = never)
-let _cooldownLogged = false // true = already logged "on cooldown" once
-let _log = () => {} // no-op default; index.js sets this via setLogger()
+/** Idle timeout passed to the server (20 min). */
+const IDLE_TIMEOUT_MS = 20 * 60_000
+
+// ---------------------------------------------------------------------------
+// State
+// ---------------------------------------------------------------------------
+let _log = () => {} // no-op; set via setLogger()
+let _serverReady = false // true once health check confirmed ready
+let _spawnFailedAt = 0 // timestamp of last spawn failure
+let _socketPath = null // resolved lazily
 
 /**
- * Set the logger function for ai-detect.
- * Called once from index.js with the client.app.log wrapper.
+ * Set the logger function. Called from index.js.
  * @param {Function} logFn - (level, message) => void
  */
 export function setLogger(logFn) {
   if (typeof logFn === "function") _log = logFn
 }
 
+// ---------------------------------------------------------------------------
+// Socket path
+// ---------------------------------------------------------------------------
+function getSocketPath() {
+  if (_socketPath) return _socketPath
+  const dir = process.env.TMPDIR || os.tmpdir() || "/tmp"
+  const uid = process.getuid?.() ?? process.pid
+  _socketPath = path.join(dir, `vibeguard-${uid}.sock`)
+  return _socketPath
+}
+
+function getLockPath() {
+  return getSocketPath().replace(/\.sock$/, ".lock")
+}
+
+function getPidPath() {
+  return getSocketPath().replace(/\.sock$/, ".pid")
+}
+
+function getLogPath() {
+  return getSocketPath().replace(/\.sock$/, ".log")
+}
+
+// ---------------------------------------------------------------------------
+// HTTP helpers (over Unix socket)
+// ---------------------------------------------------------------------------
+
+/**
+ * Make an HTTP request to the model server over Unix socket.
+ * Returns parsed JSON body or null on failure.
+ */
+function serverRequest(method, urlPath, body, timeoutMs) {
+  return new Promise((resolve) => {
+    const socketPath = getSocketPath()
+    const opts = {
+      socketPath,
+      path: urlPath,
+      method,
+      headers: {},
+      timeout: timeoutMs,
+    }
+
+    let payload = null
+    if (body) {
+      payload = JSON.stringify(body)
+      opts.headers["Content-Type"] = "application/json"
+      opts.headers["Content-Length"] = Buffer.byteLength(payload)
+    }
+
+    const req = http.request(opts, (res) => {
+      const chunks = []
+      res.on("data", (c) => chunks.push(c))
+      res.on("end", () => {
+        try {
+          resolve(JSON.parse(Buffer.concat(chunks).toString("utf8")))
+        } catch {
+          resolve(null)
+        }
+      })
+    })
+
+    req.on("error", () => resolve(null))
+    req.on("timeout", () => {
+      req.destroy()
+      resolve(null)
+    })
+
+    if (payload) req.write(payload)
+    req.end()
+  })
+}
+
+// ---------------------------------------------------------------------------
+// Server spawn lock (O_EXCL atomic create + PID validation)
+// ---------------------------------------------------------------------------
+
+/**
+ * Acquire a spawn lock using atomic file creation (O_EXCL).
+ * Returns true if lock was acquired, false if another process holds it.
+ *
+ * The lockfile contains the PID of the holder. If the holder is dead
+ * (stale lock), we remove it and retry once.
+ */
+function tryLock() {
+  const lockPath = getLockPath()
+  for (let attempt = 0; attempt < 2; attempt++) {
+    try {
+      // O_CREAT | O_EXCL | O_WRONLY — atomic create-or-fail
+      const fd = fs.openSync(lockPath, fs.constants.O_CREAT | fs.constants.O_EXCL | fs.constants.O_WRONLY)
+      // Write our PID so others can check liveness
+      fs.writeSync(fd, String(process.pid))
+      fs.closeSync(fd)
+      return true
+    } catch (err) {
+      if (err.code !== "EEXIST") return false
+      // Lock file exists — check if holder is alive
+      try {
+        const holderPid = Number(fs.readFileSync(lockPath, "utf8").trim())
+        if (Number.isFinite(holderPid) && holderPid > 0 && isProcessAlive(holderPid)) {
+          return false // holder is alive, lock is valid
+        }
+      } catch {
+        /* can't read — try to remove */
+      }
+      // Holder is dead or file unreadable — remove stale lock and retry
+      try { fs.unlinkSync(lockPath) } catch { /* ok */ }
+    }
+  }
+  return false
+}
+
+function releaseLock() {
+  try {
+    fs.unlinkSync(getLockPath())
+  } catch {
+    /* ignore */
+  }
+}
+
 /**
- * Attempt to import @huggingface/transformers.
- * Returns the module or null if not installed.
- * Allows retry: a transient failure (e.g. temporary file-system error)
- * clears the cached null so the next call re-attempts the import.
+ * Check if a process with the given PID is alive.
  */
-async function loadTransformers() {
-  if (_transformersModule !== undefined && _transformersModule !== null) return _transformersModule
-  // If the previous import failed, allow retry (don't cache null permanently)
-  if (_transformersModule === null && !_transformersImportFailed) return null
+function isProcessAlive(pid) {
   try {
-    _transformersModule = await import("@huggingface/transformers")
-    _transformersImportFailed = false
-    return _transformersModule
+    process.kill(pid, 0) // signal 0 = existence check
+    return true
   } catch {
-    _transformersModule = null
-    _transformersImportFailed = true
-    return null
+    return false
   }
 }
 
 /**
- * Initialize the Privacy Filter pipeline. Lazy-loads on first call.
- * Returns the pipeline instance or null if unavailable.
- *
- * Defenses:
- * - If the pipeline loaded successfully before, returns cached instance.
- * - If another caller is already loading, coalesces onto that promise.
- * - If the previous load failed, enforces a cooldown (RETRY_COOLDOWN_MS)
- *   before retrying to avoid a retry storm on every redaction call.
- * - Wraps model download in a timeout (MODEL_LOAD_TIMEOUT_MS) so a
- *   hanging download doesn't block redaction forever.
+ * Check if the server is running by reading PID file + socket existence.
  */
-async function getPipeline(aiConfig, debug) {
-  if (_pipeline) return _pipeline
-  if (_loading) return _loading
-
-  // Cooldown after failure — don't retry on every call
-  if (_pipelineFailedAt > 0) {
-    const elapsed = Date.now() - _pipelineFailedAt
-    if (elapsed < RETRY_COOLDOWN_MS) {
-      // Log once, not on every call — avoids flooding the terminal
-      if (debug && !_cooldownLogged) {
-        const remaining = Math.ceil((RETRY_COOLDOWN_MS - elapsed) / 1000)
-        _log("debug", `AI model failed to load, cooldown ${remaining}s. Using regex-only.`)
-        _cooldownLogged = true
-      }
-      return null
+function isServerRunning() {
+  const pidPath = getPidPath()
+  const socketPath = getSocketPath()
+  try {
+    if (!fs.existsSync(socketPath)) return false
+    const pidStr = fs.readFileSync(pidPath, "utf8").trim()
+    const pid = Number(pidStr)
+    if (!Number.isFinite(pid) || pid <= 0) return false
+    return isProcessAlive(pid)
+  } catch {
+    return false
+  }
+}
+
+/**
+ * Clean up stale socket/pid/lock files left by a crashed server.
+ */
+function cleanupStaleFiles() {
+  for (const f of [getSocketPath(), getPidPath(), getLockPath()]) {
+    try { fs.unlinkSync(f) } catch { /* ok */ }
+  }
+}
+
+/**
+ * Spawn the model server as a detached background process.
+ */
+function spawnServer(aiConfig) {
+  const serverScript = path.join(
+    path.dirname(fileURLToPath(import.meta.url)),
+    "model-server.js"
+  )
+
+  const model = aiConfig.model || "openai/privacy-filter"
+  const dtype = aiConfig.dtype || "q4"
+  const device = aiConfig.device || "cpu"
+
+  // Use the same Node binary that's running the current process
+  const nodeBin = process.execPath
+
+  const logPath = getLogPath()
+  let logFd = null
+  let stdout, stderr
+  try {
+    logFd = fs.openSync(logPath, "a")
+    stdout = logFd
+    stderr = logFd
+  } catch {
+    stdout = "ignore"
+    stderr = "ignore"
+  }
+
+  const child = spawn(
+    nodeBin,
+    [
+      serverScript,
+      "--model",
+      model,
+      "--dtype",
+      dtype,
+      "--device",
+      device,
+      "--socket",
+      getSocketPath(),
+      "--idle-timeout",
+      String(IDLE_TIMEOUT_MS),
+    ],
+    {
+      detached: true,
+      stdio: ["ignore", stdout, stderr],
+      env: { ...process.env },
     }
-    // Cooldown expired — allow retry
-    _pipelineFailedAt = 0
-    _cooldownLogged = false
+  )
+
+  child.unref()
+  // Close the log fd in the parent — the child inherited a dup
+  if (logFd !== null) {
+    try { fs.closeSync(logFd) } catch { /* ok */ }
   }
+  _log("info", `Spawned model server (pid=${child.pid}, model=${model}, dtype=${dtype})`)
+  return child.pid
+}
 
-  _loading = (async () => {
-    const transformers = await loadTransformers()
-    if (!transformers) {
-      // Only log during initial load, not on cooldown retries
-      if (debug) {
-        _log("warn", "AI detection unavailable: @huggingface/transformers not installed.")
-      }
-      return null
+/**
+ * Ensure the server is running. Spawn if needed, wait for readiness.
+ * Uses a lockfile to prevent multiple simultaneous spawns.
+ * Returns true if server is ready, false if unavailable.
+ */
+async function ensureServer(aiConfig, debug) {
+  // Fast path: already confirmed ready
+  if (_serverReady) {
+    // Quick health check to confirm it's still alive
+    const h = await serverRequest("GET", "/health", null, 2000)
+    if (h?.status === "ready") return true
+    // Server died — reset and try to respawn
+    _serverReady = false
+  }
+
+  // Cooldown after spawn failure
+  if (_spawnFailedAt > 0) {
+    const elapsed = Date.now() - _spawnFailedAt
+    if (elapsed < SPAWN_COOLDOWN_MS) return false
+    _spawnFailedAt = 0
+  }
+
+  // Check if server is already running (maybe another instance spawned it)
+  if (isServerRunning()) {
+    return await waitForReady(aiConfig, debug)
+  }
+
+  // Need to spawn — acquire lock to prevent races
+  const gotLock = tryLock()
+  if (!gotLock) {
+    // Couldn't acquire lock — another instance is spawning. Just wait.
+    return await waitForReady(aiConfig, debug)
+  }
+
+  try {
+    // Double-check after acquiring lock (another instance may have won)
+    if (isServerRunning()) {
+      return await waitForReady(aiConfig, debug)
     }
 
-    const model = aiConfig.model || "openai/privacy-filter"
-    const dtype = aiConfig.dtype || "q4"
-    const device = aiConfig.device || "cpu"
-    const timeoutMs = aiConfig.timeoutMs || MODEL_LOAD_TIMEOUT_MS
+    // Clean up any stale files from a crashed server
+    cleanupStaleFiles()
 
-    if (debug) {
-      _log("info", `Loading AI model: ${model} (dtype=${dtype}, device=${device})`)
+    // Spawn the server
+    const pid = spawnServer(aiConfig)
+    if (!pid) {
+      _spawnFailedAt = Date.now()
+      _log("error", "Failed to spawn model server")
+      return false
     }
 
-    try {
-      const loadPromise = transformers.pipeline("token-classification", model, {
-        dtype,
-        device,
-      })
+    // Wait for server to become ready
+    return await waitForReady(aiConfig, debug)
+  } finally {
+    releaseLock()
+  }
+}
 
-      // Race the model load against a timeout
-      const timeoutPromise = new Promise((_, reject) => {
-        setTimeout(
-          () => reject(new Error(`Model load timed out after ${timeoutMs}ms`)),
-          timeoutMs
-        )
-      })
+/**
+ * Poll /health until the server reports "ready" or we time out.
+ * Bails early on server error or consecutive connection failures.
+ */
+async function waitForReady(aiConfig, debug) {
+  const deadline = Date.now() + SERVER_READY_TIMEOUT_MS
+  let lastStatus = ""
+  let consecutiveFailures = 0
 
-      _pipeline = await Promise.race([loadPromise, timeoutPromise])
+  while (Date.now() < deadline) {
+    const h = await serverRequest("GET", "/health", null, 5000)
 
-      if (debug) {
-        _log("info", "AI model loaded successfully")
+    if (h?.status === "ready") {
+      // Verify model matches
+      const expected = aiConfig.model || "openai/privacy-filter"
+      if (h.model && h.model !== expected) {
+        _log("warn", `Server loaded model "${h.model}" but config expects "${expected}". Using regex-only.`)
+        // Set cooldown so we don't hot-loop re-checking on every call
+        _spawnFailedAt = Date.now()
+        return false
       }
-      _pipelineFailedAt = 0
-      return _pipeline
-    } catch (err) {
-      if (debug || !aiConfig.silentFallback) {
-        _log("error", `Failed to load AI model: ${err.message}`)
+      _serverReady = true
+      if (debug) _log("info", "Model server ready")
+      return true
+    }
+
+    if (h?.status === "error") {
+      _log("error", `Model server error: ${h.error || "unknown"}`)
+      _spawnFailedAt = Date.now()
+      return false
+    }
+
+    if (h?.status === "loading") {
+      consecutiveFailures = 0
+      if (lastStatus !== "loading" && debug) {
+        _log("info", "Model server is loading the model, waiting...")
+      }
+      lastStatus = "loading"
+    } else {
+      // null response = connection refused / server not up yet
+      consecutiveFailures++
+      // If we get 10+ consecutive connection failures after the server
+      // should have started, it's probably dead — bail early
+      if (consecutiveFailures >= 10) {
+        _log("warn", "Model server not responding after multiple attempts. Using regex-only.")
+        _spawnFailedAt = Date.now()
+        return false
       }
-      _pipeline = null
-      _pipelineFailedAt = Date.now()
-      return null
     }
-  })()
 
-  const result = await _loading
-  _loading = null
-  return result
+    // Wait before next poll
+    await new Promise((r) => setTimeout(r, HEALTH_POLL_MS))
+  }
+
+  _log("warn", "Timed out waiting for model server. Using regex-only.")
+  _spawnFailedAt = Date.now()
+  return false
 }
 
+// ---------------------------------------------------------------------------
+// Public API (signature unchanged from original)
+// ---------------------------------------------------------------------------
+
 /**
  * Detect PII/secrets in text using the Privacy Filter model.
  *
+ * Sends the text to the shared model server for inference.
+ * If the server isn't running, spawns it and waits for readiness.
+ * If anything fails, returns [] (regex-only fallback).
+ *
  * @param {string} text - Input text to scan
  * @param {object} aiConfig - AI configuration from vibeguard config
  * @param {boolean} debug - Enable debug logging
@@ -163,43 +405,32 @@ async function getPipeline(aiConfig, debug) {
 export async function detectWithAI(text, aiConfig, debug) {
   if (!text || typeof text !== "string" || text.length === 0) return []
 
-  const pipe = await getPipeline(aiConfig, debug)
-  if (!pipe) return []
+  const ready = await ensureServer(aiConfig, debug)
+  if (!ready) return []
 
   try {
-    // Run token classification with entity aggregation
-    const entities = await pipe(text, { aggregation_strategy: "simple" })
-    if (!Array.isArray(entities) || entities.length === 0) return []
-
-    const allowedCategories =
-      Array.isArray(aiConfig.categories) && aiConfig.categories.length > 0
-        ? new Set(aiConfig.categories.map((c) => c.toLowerCase()))
-        : null
-
-    const spans = []
-
-    for (const entity of entities) {
-      if (!entity || typeof entity !== "object") continue
-
-      // entity_group is the label without B-/I- prefix (from aggregation)
-      const rawLabel = String(entity.entity_group ?? entity.entity ?? "").toLowerCase()
-      // Strip B-/I- prefix if aggregation didn't remove it
-      const label = rawLabel.replace(/^[bi]-/, "")
+    const result = await serverRequest("POST", "/detect", {
+      text,
+      categories: aiConfig.categories,
+      requestedModel: aiConfig.model || "openai/privacy-filter",
+    }, REQUEST_TIMEOUT_MS)
 
-      if (!label || label === "o") continue
-      if (allowedCategories && !allowedCategories.has(label)) continue
-
-      const start = Number(entity.start)
-      const end = Number(entity.end)
-      if (!Number.isFinite(start) || !Number.isFinite(end)) continue
-      if (start < 0 || end <= start || end > text.length) continue
-
-      const original = text.slice(start, end)
-      const category = LABEL_TO_CATEGORY[label] ?? label.toUpperCase()
+    if (!result) {
+      // Connection failed — server may have died
+      _serverReady = false
+      return []
+    }
 
-      spans.push({ start, end, original, category })
+    if (result.error && !result.spans) {
+      // Server-side error (503, 409, etc.)
+      if (debug || !aiConfig.silentFallback) {
+        _log("warn", `Server error: ${result.error}`)
+      }
+      return []
     }
 
+    const spans = Array.isArray(result.spans) ? result.spans : []
+
     if (debug && spans.length > 0) {
       _log("debug", `AI detected ${spans.length} span(s): ${spans.map((s) => s.category).join(", ")}`)
     }
@@ -207,30 +438,42 @@ export async function detectWithAI(text, aiConfig, debug) {
     return spans
   } catch (err) {
     if (!aiConfig.silentFallback || debug) {
-      _log("error", `AI inference error: ${err.message}, falling back to regex-only`)
+      _log("error", `AI detection error: ${err.message}, falling back to regex-only`)
     }
+    _serverReady = false
     return []
   }
 }
 
 /**
- * Dispose the loaded model pipeline to free memory.
+ * Dispose — no-op. The server manages its own lifecycle (idle timeout).
  */
 export async function disposeAI() {
-  if (_pipeline) {
-    try {
-      if (typeof _pipeline.dispose === "function") await _pipeline.dispose()
-    } catch {
-      /* ignore */
-    }
-    _pipeline = null
-  }
+  // Server exits on its own after 20min of inactivity.
+  // Calling dispose from the plugin process would kill the shared
+  // server for all other OpenCode instances. So this is intentionally a no-op.
 }
 
 /**
- * Check if AI detection is available (transformers package installed).
+ * Check if AI detection is potentially available.
+ * Returns true if the server is running OR if we can spawn one
+ * (i.e. @huggingface/transformers is installed).
  */
 export async function isAIAvailable() {
-  const transformers = await loadTransformers()
-  return transformers !== null
+  // Fast check: is the server already running?
+  if (isServerRunning()) return true
+
+  // Can we spawn? Check if transformers is installed (without loading it).
+  try {
+    // import.meta.resolve does path resolution only — doesn't execute the module
+    if (import.meta.resolve) {
+      import.meta.resolve("@huggingface/transformers")
+      return true
+    }
+    // Fallback for runtimes that don't support import.meta.resolve
+    await import("@huggingface/transformers")
+    return true
+  } catch {
+    return false
+  }
 }
diff --git a/src/model-server.js b/src/model-server.js
new file mode 100755
index 0000000..87e7080
--- /dev/null
+++ b/src/model-server.js
@@ -0,0 +1,447 @@
+#!/usr/bin/env node
+/**
+ * vibeguard-model-server — shared Privacy Filter inference daemon.
+ *
+ * Runs as a standalone process, serves token-classification inference
+ * over a Unix domain socket so multiple OpenCode instances share one
+ * model copy in RAM (~400MB q4).
+ *
+ * Lifecycle:
+ *   - Spawned automatically by ai-detect.js on first detectWithAI() call.
+ *   - Exits cleanly after IDLE_TIMEOUT_MS of no requests (default 20min).
+ *   - Signal handlers (SIGINT/SIGTERM) clean up socket + pid file.
+ *
+ * Usage (manual):
+ *   node src/model-server.js --model openai/privacy-filter --dtype q4 --device cpu
+ *
+ * Protocol (HTTP over Unix socket):
+ *   POST /detect   {text, categories?}  → {spans: [{start,end,original,category}]}
+ *   GET  /health                         → {status,pid,uptime,model,dtype,device}
+ */
+
+import http from "node:http"
+import fs from "node:fs"
+import path from "node:path"
+import os from "node:os"
+
+// ---------------------------------------------------------------------------
+// CLI args / env
+// ---------------------------------------------------------------------------
+const args = process.argv.slice(2)
+function getArg(name, fallback) {
+  const idx = args.indexOf(`--${name}`)
+  return idx >= 0 && idx + 1 < args.length ? args[idx + 1] : fallback
+}
+
+const MODEL = getArg("model", process.env.VIBEGUARD_MODEL || "openai/privacy-filter")
+const DTYPE = getArg("dtype", process.env.VIBEGUARD_DTYPE || "q4")
+const DEVICE = getArg("device", process.env.VIBEGUARD_DEVICE || "cpu")
+const IDLE_TIMEOUT_MS = Number(
+  getArg("idle-timeout", process.env.VIBEGUARD_IDLE_TIMEOUT_MS || "1200000")
+) // 20 min
+
+if (!Number.isFinite(IDLE_TIMEOUT_MS) || IDLE_TIMEOUT_MS <= 0) {
+  process.stderr.write(`Invalid idle-timeout: ${IDLE_TIMEOUT_MS}. Must be a positive number (ms).\n`)
+  process.exit(1)
+}
+const SOCKET_PATH = getArg(
+  "socket",
+  process.env.VIBEGUARD_SOCKET || defaultSocketPath()
+)
+const PID_PATH = SOCKET_PATH.endsWith(".sock")
+  ? SOCKET_PATH.replace(/\.sock$/, ".pid")
+  : SOCKET_PATH + ".pid"
+const LOG_PATH = SOCKET_PATH.endsWith(".sock")
+  ? SOCKET_PATH.replace(/\.sock$/, ".log")
+  : SOCKET_PATH + ".log"
+const MAX_BODY_BYTES = 1_048_576 // 1 MB
+const MAX_QUEUE = 50
+const REQUEST_TIMEOUT_MS = 30_000
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+function defaultSocketPath() {
+  const dir = process.env.TMPDIR || os.tmpdir() || "/tmp"
+  const uid = process.getuid?.() ?? process.pid
+  return path.join(dir, `vibeguard-${uid}.sock`)
+}
+
+function log(msg) {
+  const ts = new Date().toISOString()
+  const line = `[${ts}] ${msg}\n`
+  process.stderr.write(line)
+  // Also append to log file for diagnostics when stdio is redirected
+  try {
+    fs.appendFileSync(LOG_PATH, line)
+  } catch {
+    /* best-effort */
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Model loading (moved from ai-detect.js)
+// ---------------------------------------------------------------------------
+const LABEL_TO_CATEGORY = {
+  private_person: "PRIVATE_PERSON",
+  private_address: "PRIVATE_ADDRESS",
+  private_email: "PRIVATE_EMAIL",
+  private_phone: "PRIVATE_PHONE",
+  private_url: "PRIVATE_URL",
+  private_date: "PRIVATE_DATE",
+  account_number: "ACCOUNT_NUMBER",
+  secret: "SECRET",
+}
+
+let _pipeline = null
+let _loadError = null
+let _loading = null
+
+async function loadPipeline() {
+  if (_pipeline) return _pipeline
+  if (_loading) return _loading
+  _loading = (async () => {
+    log(`Loading model: ${MODEL} (dtype=${DTYPE}, device=${DEVICE})`)
+    const start = Date.now()
+    try {
+      const transformers = await import("@huggingface/transformers")
+      _pipeline = await transformers.pipeline("token-classification", MODEL, {
+        dtype: DTYPE,
+        device: DEVICE,
+      })
+      const elapsed = ((Date.now() - start) / 1000).toFixed(1)
+      log(`Model loaded successfully (${elapsed}s)`)
+      _loadError = null
+      return _pipeline
+    } catch (err) {
+      _loadError = err.message
+      log(`Model load failed: ${err.message}`)
+      _pipeline = null
+      return null
+    } finally {
+      _loading = null
+    }
+  })()
+  return _loading
+}
+
+// ---------------------------------------------------------------------------
+// Inference (serialized queue)
+// ---------------------------------------------------------------------------
+let _inferring = false
+const _queue = []
+
+function enqueueInference(text, categories) {
+  return new Promise((resolve, reject) => {
+    if (_queue.length >= MAX_QUEUE) {
+      reject(new Error("Queue full"))
+      return
+    }
+    _queue.push({ text, categories, resolve, reject })
+    drainQueue()
+  })
+}
+
+async function drainQueue() {
+  if (_inferring || _queue.length === 0) return
+  _inferring = true
+  const job = _queue.shift()
+  let settled = false
+
+  // Per-request timeout
+  const timer = setTimeout(() => {
+    if (!settled) {
+      settled = true
+      job.reject(new Error("Inference timed out"))
+    }
+  }, REQUEST_TIMEOUT_MS)
+
+  try {
+    const spans = await runInference(job.text, job.categories)
+    clearTimeout(timer)
+    if (!settled) {
+      settled = true
+      job.resolve(spans)
+    }
+  } catch (err) {
+    clearTimeout(timer)
+    if (!settled) {
+      settled = true
+      job.reject(err)
+    }
+  } finally {
+    _inferring = false
+    // Process next in queue
+    if (_queue.length > 0) drainQueue()
+  }
+}
+
+async function runInference(text, categories) {
+  const pipe = _pipeline
+  if (!pipe) return []
+
+  const entities = await pipe(text, { aggregation_strategy: "simple" })
+  if (!Array.isArray(entities) || entities.length === 0) return []
+
+  const allowedCategories =
+    Array.isArray(categories) && categories.length > 0
+      ? new Set(categories.map((c) => c.toLowerCase()))
+      : null
+
+  const spans = []
+  // cursor tracks search position in text to handle repeated words correctly
+  let cursor = 0
+
+  for (const entity of entities) {
+    if (!entity || typeof entity !== "object") continue
+    const rawLabel = String(entity.entity_group ?? entity.entity ?? "").toLowerCase()
+    const label = rawLabel.replace(/^[bi]-/, "")
+    if (!label || label === "o") continue
+    if (allowedCategories && !allowedCategories.has(label)) continue
+
+    let start = Number(entity.start)
+    let end = Number(entity.end)
+
+    // Transformers.js may not return start/end character offsets (unlike Python).
+    // If missing, locate the span by matching the word field against the source text.
+    if (!Number.isFinite(start) || !Number.isFinite(end) || start < 0 || end <= start) {
+      const word = String(entity.word ?? "").trim()
+      if (!word) continue
+      const idx = text.indexOf(word, cursor)
+      if (idx === -1) continue // word not found — skip
+      start = idx
+      end = idx + word.length
+    }
+
+    if (end > text.length) continue
+
+    const original = text.slice(start, end)
+    const category = LABEL_TO_CATEGORY[label] ?? label.toUpperCase()
+    spans.push({ start, end, original, category })
+    cursor = end // advance cursor past this span
+  }
+  return spans
+}
+
+// ---------------------------------------------------------------------------
+// Idle timeout
+// ---------------------------------------------------------------------------
+let _idleTimer = null
+const startedAt = Date.now()
+
+function resetIdleTimer() {
+  if (_idleTimer) clearTimeout(_idleTimer)
+  _idleTimer = setTimeout(() => {
+    log(`Idle for ${IDLE_TIMEOUT_MS / 60_000}min, shutting down.`)
+    shutdown()
+  }, IDLE_TIMEOUT_MS)
+  // Don't let the timer keep the process alive if everything else is done
+  if (_idleTimer.unref) _idleTimer.unref()
+}
+
+// ---------------------------------------------------------------------------
+// HTTP server
+// ---------------------------------------------------------------------------
+const server = http.createServer(async (req, res) => {
+  resetIdleTimer()
+
+  // Health check
+  if (req.method === "GET" && req.url === "/health") {
+    const status = _pipeline ? "ready" : _loading ? "loading" : _loadError ? "error" : "loading"
+    res.writeHead(200, { "Content-Type": "application/json" })
+    res.end(
+      JSON.stringify({
+        status,
+        pid: process.pid,
+        uptime: Math.floor((Date.now() - startedAt) / 1000),
+        model: MODEL,
+        dtype: DTYPE,
+        device: DEVICE,
+        error: _loadError || undefined,
+        queueLength: _queue.length,
+      })
+    )
+    return
+  }
+
+  // Detect endpoint
+  if (req.method === "POST" && req.url === "/detect") {
+    // Read body with size limit
+    const chunks = []
+    let bodySize = 0
+    let aborted = false
+
+    req.on("data", (chunk) => {
+      if (aborted) return
+      bodySize += chunk.length
+      if (bodySize > MAX_BODY_BYTES) {
+        aborted = true
+        res.writeHead(413, { "Content-Type": "application/json" })
+        res.end(JSON.stringify({ error: "Request body too large" }))
+        req.destroy()
+      } else {
+        chunks.push(chunk)
+      }
+    })
+
+    req.on("error", () => {
+      aborted = true
+    })
+
+    req.on("end", async () => {
+      if (aborted) return
+
+      let body
+      try {
+        body = JSON.parse(Buffer.concat(chunks).toString("utf8"))
+      } catch {
+        res.writeHead(400, { "Content-Type": "application/json" })
+        res.end(JSON.stringify({ error: "Invalid JSON" }))
+        return
+      }
+
+      const { text, categories, requestedModel } = body
+
+      // Model mismatch check
+      if (requestedModel && requestedModel !== MODEL) {
+        res.writeHead(409, { "Content-Type": "application/json" })
+        res.end(
+          JSON.stringify({
+            error: "model mismatch",
+            loaded: MODEL,
+            requested: requestedModel,
+          })
+        )
+        return
+      }
+
+      if (!text || typeof text !== "string") {
+        res.writeHead(200, { "Content-Type": "application/json" })
+        res.end(JSON.stringify({ spans: [] }))
+        return
+      }
+
+      if (!_pipeline) {
+        res.writeHead(503, { "Content-Type": "application/json" })
+        res.end(
+          JSON.stringify({
+            error: _loadError || "Model still loading",
+            status: _loading ? "loading" : "error",
+          })
+        )
+        return
+      }
+
+      try {
+        // NEVER log text — it contains the sensitive data we're protecting
+        const spans = await enqueueInference(text, categories)
+        res.writeHead(200, { "Content-Type": "application/json" })
+        res.end(JSON.stringify({ spans }))
+      } catch (err) {
+        if (err.message === "Queue full") {
+          res.writeHead(503, { "Content-Type": "application/json" })
+          res.end(JSON.stringify({ error: "Server overloaded", spans: [] }))
+        } else {
+          // Inference error — return empty spans (client falls back to regex)
+          res.writeHead(200, { "Content-Type": "application/json" })
+          res.end(JSON.stringify({ spans: [], error: err.message }))
+        }
+      }
+    })
+    return
+  }
+
+  // Unknown route
+  res.writeHead(404, { "Content-Type": "application/json" })
+  res.end(JSON.stringify({ error: "Not found" }))
+})
+
+// ---------------------------------------------------------------------------
+// Startup & shutdown
+// ---------------------------------------------------------------------------
+function cleanupFiles() {
+  try {
+    fs.unlinkSync(SOCKET_PATH)
+  } catch {
+    /* may not exist */
+  }
+  try {
+    fs.unlinkSync(PID_PATH)
+  } catch {
+    /* may not exist */
+  }
+}
+
+function shutdown() {
+  if (_idleTimer) clearTimeout(_idleTimer)
+  // Reject all queued jobs so their HTTP handlers can respond
+  while (_queue.length > 0) {
+    const job = _queue.shift()
+    job.reject(new Error("Server shutting down"))
+  }
+  server.close(() => {
+    cleanupFiles()
+    log("Shutdown complete.")
+    process.exit(0)
+  })
+  // Force exit if server.close hangs (non-zero = abnormal)
+  setTimeout(() => {
+    cleanupFiles()
+    process.exit(1)
+  }, 3000).unref()
+}
+
+// Clean up stale socket if it exists (only if the owner process is dead)
+try {
+  const staleSocket = fs.existsSync(SOCKET_PATH)
+  if (staleSocket) {
+    let ownerAlive = false
+    try {
+      const pidStr = fs.readFileSync(PID_PATH, "utf8").trim()
+      const pid = Number(pidStr)
+      if (Number.isFinite(pid) && pid > 0) {
+        try {
+          process.kill(pid, 0)
+          ownerAlive = true
+        } catch { /* dead */ }
+      }
+    } catch { /* no pid file */ }
+    if (!ownerAlive) {
+      fs.unlinkSync(SOCKET_PATH)
+      try { fs.unlinkSync(PID_PATH) } catch { /* ok */ }
+    }
+  }
+} catch {
+  /* ok if not found */
+}
+
+// Signal handlers for clean shutdown
+process.on("SIGINT", shutdown)
+process.on("SIGTERM", shutdown)
+process.on("uncaughtException", (err) => {
+  log(`Uncaught exception: ${err.message}`)
+  // After uncaught exception, synchronous cleanup + exit is safest
+  cleanupFiles()
+  process.exit(1)
+})
+
+// Set socket permissions to 0600 after creation, write PID file after bind
+server.listen(SOCKET_PATH, () => {
+  try {
+    fs.chmodSync(SOCKET_PATH, 0o600)
+  } catch {
+    /* best-effort */
+  }
+  // Write PID file only after socket is bound (avoids race with other instances)
+  fs.writeFileSync(PID_PATH, String(process.pid), "utf8")
+
+  log(`Server listening on ${SOCKET_PATH} (pid=${process.pid})`)
+  log(`Model: ${MODEL}, dtype: ${DTYPE}, device: ${DEVICE}`)
+  log(`Idle timeout: ${IDLE_TIMEOUT_MS / 60_000}min`)
+
+  // Start idle timer
+  resetIdleTimer()
+
+  // Begin loading the model (async, non-blocking)
+  loadPipeline()
+})
diff --git a/src/model-server.test.js b/src/model-server.test.js
new file mode 100644
index 0000000..351a9c7
--- /dev/null
+++ b/src/model-server.test.js
@@ -0,0 +1,444 @@
+/**
+ * Integration tests for vibeguard model-server.
+ *
+ * These tests spawn a real model server, send realistic (and tricky)
+ * PII / secret payloads, and assert the AI detects them.
+ *
+ * The model (~400 MB q4) must be pre-downloaded.
+ * Skip with: VIBEGUARD_SKIP_AI_TESTS=1 node --test src/model-server.test.js
+ *
+ * These are intentionally "less obvious" inputs — the kind a regex
+ * would miss but a language model should catch.
+ */
+
+import { describe, it, before, after } from "node:test"
+import assert from "node:assert/strict"
+import http from "node:http"
+import fs from "node:fs"
+import path from "node:path"
+import os from "node:os"
+import { spawn, execSync } from "node:child_process"
+import { fileURLToPath } from "node:url"
+
+// ---------------------------------------------------------------------------
+// Skip gate
+// ---------------------------------------------------------------------------
+const SKIP_AI = process.env.VIBEGUARD_SKIP_AI_TESTS === "1"
+
+// ---------------------------------------------------------------------------
+// Generated test secrets — no literal secrets in source
+// ---------------------------------------------------------------------------
+const __dirname = path.dirname(fileURLToPath(import.meta.url))
+const SECRETS_FILE = path.join(__dirname, "..", "test", ".secrets.json")
+if (!fs.existsSync(SECRETS_FILE)) {
+  execSync(
+    `${process.execPath} ${path.join(__dirname, "..", "scripts", "generate-test-secrets.js")}`,
+    { stdio: "pipe" }
+  )
+}
+const fixtures = JSON.parse(fs.readFileSync(SECRETS_FILE, "utf8"))
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+const SERVER_SCRIPT = path.join(__dirname, "model-server.js")
+const uid = process.getuid?.() ?? process.pid
+const SOCK = path.join(
+  os.tmpdir(),
+  `vibeguard-test-${uid}-${Date.now()}.sock`
+)
+const PID_FILE = SOCK.replace(/\.sock$/, ".pid")
+const LOG_FILE = SOCK.replace(/\.sock$/, ".log")
+
+let serverProcess = null
+let logFd = null
+
+function request(method, urlPath, body, timeoutMs = 30_000) {
+  return new Promise((resolve, reject) => {
+    let settled = false
+    const opts = {
+      socketPath: SOCK,
+      path: urlPath,
+      method,
+      headers: {},
+      timeout: timeoutMs,
+    }
+    let payload = null
+    if (body) {
+      payload = JSON.stringify(body)
+      opts.headers["Content-Type"] = "application/json"
+      opts.headers["Content-Length"] = Buffer.byteLength(payload)
+    }
+    const req = http.request(opts, (res) => {
+      const chunks = []
+      res.on("data", (c) => chunks.push(c))
+      res.on("end", () => {
+        if (settled) return
+        settled = true
+        try {
+          resolve({
+            status: res.statusCode,
+            body: JSON.parse(Buffer.concat(chunks).toString("utf8")),
+          })
+        } catch {
+          resolve({ status: res.statusCode, body: null })
+        }
+      })
+    })
+    req.on("error", (err) => {
+      if (settled) return
+      settled = true
+      reject(err)
+    })
+    req.on("timeout", () => {
+      if (settled) return
+      settled = true
+      req.destroy()
+      reject(new Error("request timed out"))
+    })
+    if (payload) req.write(payload)
+    req.end()
+  })
+}
+
+function detect(text, categories) {
+  return request("POST", "/detect", { text, categories })
+}
+
+/** Extract spans from a detect response, with assertion that they exist. */
+function spans(detectResult) {
+  assert.ok(detectResult.body, `Expected response body, got null (status=${detectResult.status})`)
+  assert.ok(Array.isArray(detectResult.body.spans), `Expected spans array, got: ${JSON.stringify(detectResult.body)}`)
+  return detectResult.body.spans
+}
+
+/** Wait for /health to report "ready", up to timeoutMs. */
+async function waitReady(timeoutMs = 180_000) {
+  const deadline = Date.now() + timeoutMs
+  while (Date.now() < deadline) {
+    try {
+      const r = await request("GET", "/health", null, 3000)
+      if (r.body?.status === "ready") return
+      if (r.body?.status === "error") throw new Error("server error: " + r.body.error)
+    } catch {
+      /* not up yet */
+    }
+    await new Promise((r) => setTimeout(r, 500))
+  }
+  throw new Error("model-server did not become ready in time")
+}
+
+/** Assert that at least one span covers a substring. */
+function assertDetected(spans, substring, expectedCategory) {
+  assert.ok(Array.isArray(spans), `Expected spans array, got: ${JSON.stringify(spans)}`)
+  const match = spans.find((s) => {
+    // Primary: span original contains the expected substring
+    if (s.original.includes(substring)) return true
+    // Reverse: substring contains the span, but only if the span is
+    // substantial (>= 6 chars) to avoid matching single-char fragments
+    if (s.original.length >= 6 && substring.includes(s.original)) return true
+    return false
+  })
+  assert.ok(
+    match,
+    `Expected AI to detect "${substring}" but got: ${JSON.stringify(spans.map((s) => s.original))}`
+  )
+  if (expectedCategory) {
+    assert.equal(
+      match.category,
+      expectedCategory,
+      `Expected category ${expectedCategory} for "${substring}" but got ${match.category}`
+    )
+  }
+}
+
+/** Assert that NO span overlaps with a given region of text. */
+function assertNotDetected(spansArr, text, safeSubstring) {
+  assert.ok(Array.isArray(spansArr), `Expected spans array, got: ${JSON.stringify(spansArr)}`)
+  const start = text.indexOf(safeSubstring)
+  const end = start + safeSubstring.length
+  const overlapping = spansArr.filter(
+    (s) => s.start < end && s.end > start
+  )
+  assert.equal(
+    overlapping.length,
+    0,
+    `"${safeSubstring}" should NOT be flagged but was: ${JSON.stringify(overlapping)}`
+  )
+}
+
+// ---------------------------------------------------------------------------
+// Lifecycle
+// ---------------------------------------------------------------------------
+describe("model-server (AI integration)", { timeout: 300_000, skip: SKIP_AI }, () => {
+  before(async () => {
+    // Clean up any leftover files
+    for (const f of [SOCK, PID_FILE, LOG_FILE]) {
+      try { fs.unlinkSync(f) } catch { /* ok */ }
+    }
+
+    logFd = fs.openSync(LOG_FILE, "a")
+    serverProcess = spawn(
+      process.execPath,
+      [SERVER_SCRIPT, "--socket", SOCK, "--idle-timeout", "300000"],
+      { detached: true, stdio: ["ignore", logFd, logFd] }
+    )
+    serverProcess.unref()
+
+    // Last-resort cleanup if test process is killed (Ctrl+C, crash)
+    process.on("exit", () => {
+      if (serverProcess?.pid) {
+        try { process.kill(-serverProcess.pid, "SIGKILL") } catch { /* ok */ }
+      }
+    })
+
+    await waitReady()
+  })
+
+  after(() => {
+    // Kill the process group (catches any children)
+    if (serverProcess?.pid) {
+      try { process.kill(-serverProcess.pid) } catch { /* ok */ }
+    }
+    // Close the log fd
+    if (logFd !== null) {
+      try { fs.closeSync(logFd) } catch { /* ok */ }
+      logFd = null
+    }
+    for (const f of [SOCK, PID_FILE, LOG_FILE]) {
+      try { fs.unlinkSync(f) } catch { /* ok */ }
+    }
+  })
+
+  // =========================================================================
+  // Health / protocol
+  // =========================================================================
+
+  it("GET /health returns ready with expected fields", async () => {
+    const r = await request("GET", "/health", null)
+    assert.equal(r.status, 200)
+    assert.equal(r.body.status, "ready")
+    assert.equal(r.body.model, "openai/privacy-filter")
+    assert.ok(Number.isFinite(r.body.pid))
+    assert.ok(Number.isFinite(r.body.uptime))
+  })
+
+  // =========================================================================
+  // Subtle secrets — things regex misses
+  // =========================================================================
+
+  describe("subtle secrets", () => {
+    it("detects a password buried in a JDBC connection string", async () => {
+      const r = await detect(fixtures.jdbc_text)
+      assert.ok(spans(r).length > 0, "should detect at least one span in JDBC URL")
+    })
+
+    it("detects credentials in a MongoDB URI", async () => {
+      const r = await detect(fixtures.mongo_text)
+      assert.ok(spans(r).length > 0, "should detect something in Mongo URI")
+    })
+
+    it("detects an API key assigned to a variable with a generic name", async () => {
+      const r = await detect(fixtures.github_text)
+      assertDetected(spans(r), fixtures.github_pat, "SECRET")
+    })
+
+    it("detects a Slack webhook URL (or defers to regex)", async () => {
+      // NOTE: The Privacy Filter model may not flag webhook URLs as secrets.
+      // This is expected — the regex layer catches slack webhooks via pattern.
+      // We test that the server handles it without error; detection is best-effort.
+      const r = await detect(fixtures.slack_text)
+      assert.equal(r.status, 200)
+      // If the model flags it, great; if not, regex handles it.
+      // Just verify no server errors.
+    })
+
+    it("detects a private key block even when indented in YAML", async () => {
+      const text = `tls:
+  cert: |
+    -----BEGIN RSA PRIVATE KEY-----
+    MIIEpAIBAAKCAQEA0Z3VS5JJcds3xfn/ygWyF8PbnGy5AoC5dNz8mLLMo1mqob
+    -----END RSA PRIVATE KEY-----`
+      const r = await detect(text)
+      assert.ok(spans(r).length > 0, "should detect the private key block")
+    })
+
+    it("detects an AWS secret key in an env export", async () => {
+      const r = await detect(fixtures.aws_text)
+      assertDetected(spans(r), fixtures.aws_secret_key, "SECRET")
+    })
+
+    it("detects a bearer token in an HTTP header literal", async () => {
+      const r = await detect(fixtures.jwt_text)
+      assert.ok(spans(r).length > 0, "should detect the JWT / bearer token")
+    })
+
+    it("detects a Stripe secret key in JSON config", async () => {
+      const r = await detect(fixtures.stripe_text)
+      assertDetected(
+        spans(r),
+        fixtures.stripe_key,
+        "SECRET"
+      )
+    })
+  })
+
+  // =========================================================================
+  // Subtle PII — context-dependent, regex-hard
+  // =========================================================================
+
+  describe("subtle PII", () => {
+    it("detects a person's name in a natural sentence without labels", async () => {
+      const text =
+        "The quarterly report was prepared by Margaret Thatcherton and reviewed by her manager."
+      const r = await detect(text)
+      assertDetected(spans(r), "Margaret Thatcherton", "PRIVATE_PERSON")
+    })
+
+    it("detects an email in a markdown link", async () => {
+      const text =
+        "For questions, reach out to [the team lead](mailto:sarah.connor@skynet.io) or file a ticket."
+      const r = await detect(text)
+      const s = spans(r)
+      // The model detects this but may split it across multiple spans due to tokenization
+      // (e.g., "mailto:s", "arah", ".connor@skynet.io"). We verify that at least one span
+      // overlaps with the email region rather than requiring exact substring match.
+      const emailStart = text.indexOf("sarah.connor@skynet.io")
+      const emailEnd = emailStart + "sarah.connor@skynet.io".length
+      const overlapping = s.filter((sp) => sp.start < emailEnd && sp.end > emailStart)
+      assert.ok(overlapping.length > 0, `Expected detection overlapping email, got: ${JSON.stringify(s)}`)
+    })
+
+    it("detects a phone number written in words-and-digits mix", async () => {
+      const text = "You can reach our office at +1 (312) 555-0198 during business hours."
+      const r = await detect(text)
+      assertDetected(spans(r), "312) 555-0198", "PRIVATE_PHONE")
+    })
+
+    it("detects a street address embedded in prose", async () => {
+      const text =
+        "Ship the replacement to 742 Evergreen Terrace, Springfield, IL 62704 by next Friday."
+      const r = await detect(text)
+      const s = spans(r)
+      assert.ok(
+        s.some((sp) => sp.category === "PRIVATE_ADDRESS" || sp.original.includes("742 Evergreen")),
+        `Expected address detection, got: ${JSON.stringify(s)}`
+      )
+    })
+
+    it("detects a date of birth in a sentence", async () => {
+      const text =
+        "Patient record: DOB is March 15, 1987, admitted on 2024-01-10 for observation."
+      const r = await detect(text)
+      const s = spans(r)
+      assert.ok(
+        s.some((sp) => sp.category === "PRIVATE_DATE"),
+        `Expected at least one PRIVATE_DATE, got: ${JSON.stringify(s)}`
+      )
+    })
+
+    it("detects an internal URL with auth token in query string", async () => {
+      const text =
+        "Dashboard link: https://grafana.internal.corp/d/abc123?orgId=1&auth_token=eyJrIjoiT2tDN2FBNHciLCJuIjoiZGVwbG95IiwiZCI6MX0"
+      const r = await detect(text)
+      assert.ok(spans(r).length > 0, "should detect the URL or embedded token")
+    })
+
+    it("detects an account number formatted with spaces", async () => {
+      const text = "Wire transfer to account 4532 0151 2345 6789, routing 021000021."
+      const r = await detect(text)
+      const s = spans(r)
+      assert.ok(
+        s.some((sp) => sp.category === "ACCOUNT_NUMBER"),
+        `Expected ACCOUNT_NUMBER, got: ${JSON.stringify(s)}`
+      )
+    })
+  })
+
+  // =========================================================================
+  // False-positive resistance
+  // =========================================================================
+
+  describe("false positive resistance", () => {
+    it("flags a UUID as SECRET (known model behavior — document, don't rely on)", async () => {
+      // The Privacy Filter model classifies UUIDs as secrets because they
+      // look like hex tokens. This is a known false-positive. We document
+      // the behavior here so we know if/when model updates fix it.
+      // In practice the regex layer does NOT match UUIDs, so the merged
+      // output only includes this if AI is active.
+      const text = "Request ID: 550e8400-e29b-41d4-a716-446655440000"
+      const r = await detect(text)
+      const s = spans(r)
+      // Current model behavior: flags the UUID.
+      // If a future model stops flagging it, update this test.
+      const hasUuidSpan = s.some(
+        (sp) => sp.original.includes("550e8400") && sp.category === "SECRET"
+      )
+      assert.ok(hasUuidSpan, "Current model flags UUIDs as SECRET (known false positive)")
+    })
+
+    it("does NOT flag a semver version string", async () => {
+      const text = "Upgraded @huggingface/transformers from 4.1.0 to 4.2.0"
+      const r = await detect(text)
+      assert.equal(spans(r).length, 0, `Expected no spans, got: ${JSON.stringify(r.body.spans)}`)
+    })
+
+    it("does NOT flag a localhost URL with port", async () => {
+      const text = "Dev server running at http://localhost:3000/api/v1/health"
+      const r = await detect(text)
+      assertNotDetected(spans(r), text, "http://localhost:3000")
+    })
+  })
+
+  // =========================================================================
+  // Category filtering
+  // =========================================================================
+
+  describe("category filtering", () => {
+    it("only returns spans matching requested categories", async () => {
+      const text = "Contact John Doe at john.doe@acme.com or call (555) 123-4567"
+      const r = await detect(text, ["private_email"])
+      const s = spans(r)
+      // Should only include email, not person or phone
+      for (const sp of s) {
+        assert.equal(
+          sp.category,
+          "PRIVATE_EMAIL",
+          `Unexpected category ${sp.category} when filtering for private_email only`
+        )
+      }
+      assert.ok(s.length > 0, "should detect at least the email")
+    })
+  })
+
+  // =========================================================================
+  // Edge cases
+  // =========================================================================
+
+  describe("edge cases", () => {
+    it("handles empty text gracefully", async () => {
+      const r = await detect("")
+      assert.equal(r.status, 200)
+      assert.deepEqual(r.body.spans, [])
+    })
+
+    it("handles text with no PII", async () => {
+      const text = "The quick brown fox jumps over the lazy dog."
+      const r = await detect(text)
+      assert.equal(r.status, 200)
+      assert.deepEqual(r.body.spans, [])
+    })
+
+    it("returns spans with correct start/end offsets", async () => {
+      const text = "Email me at test.user@example.org please"
+      const r = await detect(text)
+      for (const sp of spans(r)) {
+        assert.equal(
+          text.slice(sp.start, sp.end),
+          sp.original,
+          `Offset mismatch: text[${sp.start}:${sp.end}] = "${text.slice(sp.start, sp.end)}" but original = "${sp.original}"`
+        )
+      }
+    })
+  })
+})

From 51b2211efdfe3d5a5c152e9d4d0accb5310cdac6 Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Fri, 15 May 2026 12:55:29 -0500
Subject: [PATCH 06/15] fix: prevent model-server zombie/CPU-spin and doubled
 logs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove appendFileSync from log() — stderr is already redirected to log
  file by spawner, so every line was written twice
- Add socket watchdog via fs.watch(): if the socket file is deleted
  externally (by another instance or OS cleanup), exit immediately —
  the client will spawn a fresh server on next detect() call
- Limit ONNX WASM threads to 2 (default uses all cores, causing ~100% CPU
  even when idle between requests)
- Add request counter to detect endpoint and health response for
  diagnosing whether the server is actually receiving requests
---
 src/model-server.js | 47 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 9 deletions(-)

diff --git a/src/model-server.js b/src/model-server.js
index 87e7080..46bafd2 100755
--- a/src/model-server.js
+++ b/src/model-server.js
@@ -70,13 +70,9 @@ function defaultSocketPath() {
 function log(msg) {
   const ts = new Date().toISOString()
   const line = `[${ts}] ${msg}\n`
+  // Write to stderr only — the spawner redirects stderr to the log file.
+  // appendFileSync was causing doubled lines when stderr was already redirected.
   process.stderr.write(line)
-  // Also append to log file for diagnostics when stdio is redirected
-  try {
-    fs.appendFileSync(LOG_PATH, line)
-  } catch {
-    /* best-effort */
-  }
 }
 
 // ---------------------------------------------------------------------------
@@ -105,6 +101,11 @@ async function loadPipeline() {
     const start = Date.now()
     try {
       const transformers = await import("@huggingface/transformers")
+      // Limit ONNX WASM thread count to prevent CPU-spinning when idle.
+      // Default uses all cores, which causes ~100% CPU even between requests.
+      if (transformers.env?.backends?.onnx?.wasm) {
+        transformers.env.backends.onnx.wasm.numThreads = 2
+      }
       _pipeline = await transformers.pipeline("token-classification", MODEL, {
         dtype: DTYPE,
         device: DEVICE,
@@ -129,6 +130,7 @@ async function loadPipeline() {
 // Inference (serialized queue)
 // ---------------------------------------------------------------------------
 let _inferring = false
+let _requestCount = 0
 const _queue = []
 
 function enqueueInference(text, categories) {
@@ -224,9 +226,10 @@ async function runInference(text, categories) {
 }
 
 // ---------------------------------------------------------------------------
-// Idle timeout
+// Idle timeout + socket watchdog
 // ---------------------------------------------------------------------------
 let _idleTimer = null
+let _socketWatcher = null
 const startedAt = Date.now()
 
 function resetIdleTimer() {
@@ -239,6 +242,27 @@ function resetIdleTimer() {
   if (_idleTimer.unref) _idleTimer.unref()
 }
 
+/**
+ * Watch the socket file for deletion. If it disappears (another instance's
+ * cleanupStaleFiles, OS cleanup, manual rm), this server is orphaned and
+ * can never receive requests again — exit immediately.
+ * The client (ai-detect.js) will spawn a fresh server on next detect() call.
+ */
+function startSocketWatchdog() {
+  try {
+    _socketWatcher = fs.watch(path.dirname(SOCKET_PATH), (eventType, filename) => {
+      if (filename === path.basename(SOCKET_PATH) && !fs.existsSync(SOCKET_PATH)) {
+        log("Socket file deleted externally — exiting orphaned server.")
+        shutdown()
+      }
+    })
+    _socketWatcher.unref()
+  } catch {
+    // fs.watch not supported or dir doesn't exist — not fatal, idle timeout
+    // will still clean up eventually
+  }
+}
+
 // ---------------------------------------------------------------------------
 // HTTP server
 // ---------------------------------------------------------------------------
@@ -259,6 +283,7 @@ const server = http.createServer(async (req, res) => {
         device: DEVICE,
         error: _loadError || undefined,
         queueLength: _queue.length,
+        requestCount: _requestCount,
       })
     )
     return
@@ -334,7 +359,9 @@ const server = http.createServer(async (req, res) => {
 
       try {
         // NEVER log text — it contains the sensitive data we're protecting
+        _requestCount++
         const spans = await enqueueInference(text, categories)
+        log(`detect #${_requestCount}: ${spans.length} span(s) found`)
         res.writeHead(200, { "Content-Type": "application/json" })
         res.end(JSON.stringify({ spans }))
       } catch (err) {
@@ -374,6 +401,7 @@ function cleanupFiles() {
 
 function shutdown() {
   if (_idleTimer) clearTimeout(_idleTimer)
+  if (_socketWatcher) { try { _socketWatcher.close() } catch { /* ok */ } }
   // Reject all queued jobs so their HTTP handlers can respond
   while (_queue.length > 0) {
     const job = _queue.shift()
@@ -381,7 +409,7 @@ function shutdown() {
   }
   server.close(() => {
     cleanupFiles()
-    log("Shutdown complete.")
+    log(`Shutdown complete. Served ${_requestCount} detect request(s).`)
     process.exit(0)
   })
   // Force exit if server.close hangs (non-zero = abnormal)
@@ -439,8 +467,9 @@ server.listen(SOCKET_PATH, () => {
   log(`Model: ${MODEL}, dtype: ${DTYPE}, device: ${DEVICE}`)
   log(`Idle timeout: ${IDLE_TIMEOUT_MS / 60_000}min`)
 
-  // Start idle timer
+  // Start idle timer + socket watchdog
   resetIdleTimer()
+  startSocketWatchdog()
 
   // Begin loading the model (async, non-blocking)
   loadPipeline()

From 1b672e7d71c1780e411c2f6e1f773254278d1ec9 Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Fri, 15 May 2026 13:48:52 -0500
Subject: [PATCH 07/15] fix: resolve Node binary via PATH instead of
 process.execPath
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

process.execPath inside OpenCode points to the opencode binary (Bun),
not Node.js. When spawnServer() used it to launch model-server.js,
the command became 'opencode model-server.js --model ...' which
OpenCode interpreted as a project path, printed help, and exited.

This caused the AI server to never start from the plugin context,
silently falling back to regex-only detection (silentFallback: true).

Fix: new findNodeBin() resolves the Node binary via:
  1. $NODE_BIN env var (explicit override)
  2. 'which node' (PATH lookup — works with nvm/fnm/brew)
  3. process.execPath only if basename is 'node' (last resort)

Also logs which Node binary was used for easier debugging.
---
 src/ai-detect.js | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/src/ai-detect.js b/src/ai-detect.js
index de869fe..9eb03c2 100644
--- a/src/ai-detect.js
+++ b/src/ai-detect.js
@@ -17,7 +17,7 @@ import fs from "node:fs"
 import path from "node:path"
 import os from "node:os"
 import { fileURLToPath } from "node:url"
-import { spawn } from "node:child_process"
+import { spawn, execFileSync } from "node:child_process"
 
 // ---------------------------------------------------------------------------
 // Constants
@@ -210,6 +210,37 @@ function cleanupStaleFiles() {
   }
 }
 
+/**
+ * Find a Node.js binary suitable for running the model server.
+ *
+ * process.execPath is NOT reliable — when running inside OpenCode (Bun),
+ * it points to the opencode binary, not Node. So we resolve explicitly:
+ *   1. $NODE_BIN env var (explicit override)
+ *   2. `which node` (PATH lookup)
+ *   3. process.execPath (last resort — only works if host IS Node)
+ */
+function findNodeBin() {
+  // Explicit override
+  if (process.env.NODE_BIN) return process.env.NODE_BIN
+
+  // PATH lookup — works for nvm, fnm, brew, system node
+  try {
+    const resolved = execFileSync("which", ["node"], {
+      encoding: "utf8",
+      timeout: 5000,
+    }).trim()
+    if (resolved && fs.existsSync(resolved)) return resolved
+  } catch { /* which failed — continue */ }
+
+  // Last resort: only valid if the host runtime IS Node (not Bun/Deno/opencode)
+  const execName = path.basename(process.execPath).toLowerCase()
+  if (execName === "node" || execName.startsWith("node")) {
+    return process.execPath
+  }
+
+  return null
+}
+
 /**
  * Spawn the model server as a detached background process.
  */
@@ -223,8 +254,11 @@ function spawnServer(aiConfig) {
   const dtype = aiConfig.dtype || "q4"
   const device = aiConfig.device || "cpu"
 
-  // Use the same Node binary that's running the current process
-  const nodeBin = process.execPath
+  const nodeBin = findNodeBin()
+  if (!nodeBin) {
+    _log("error", "Cannot find Node.js binary. Install Node.js or set NODE_BIN env var.")
+    return null
+  }
 
   const logPath = getLogPath()
   let logFd = null
@@ -265,7 +299,7 @@ function spawnServer(aiConfig) {
   if (logFd !== null) {
     try { fs.closeSync(logFd) } catch { /* ok */ }
   }
-  _log("info", `Spawned model server (pid=${child.pid}, model=${model}, dtype=${dtype})`)
+  _log("info", `Spawned model server (pid=${child.pid}, node=${nodeBin}, model=${model}, dtype=${dtype})`)
   return child.pid
 }
 

From ad971ec639066b362aaa1765bfaadf772097e3cf Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Fri, 15 May 2026 14:53:09 -0500
Subject: [PATCH 08/15] fix: limit ONNX native backend threads via
 session_options
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous fix set transformers.env.backends.onnx.wasm.numThreads,
but onnxruntime-node uses native threads, not WASM threads. That
setting had no effect — the server still used all CPU cores (~147%
CPU when idle).

Pass intraOpNumThreads: 2, interOpNumThreads: 1 via session_options
to the pipeline constructor. This controls the native thread pool
that actually executes inference. Measured idle CPU drops from 147%
to 3.3%.
---
 src/model-server.js | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/model-server.js b/src/model-server.js
index 46bafd2..83fe933 100755
--- a/src/model-server.js
+++ b/src/model-server.js
@@ -101,14 +101,20 @@ async function loadPipeline() {
     const start = Date.now()
     try {
       const transformers = await import("@huggingface/transformers")
-      // Limit ONNX WASM thread count to prevent CPU-spinning when idle.
-      // Default uses all cores, which causes ~100% CPU even between requests.
+      // Limit thread count to prevent CPU-spinning when idle.
+      // onnxruntime-node uses native threads (not WASM), controlled via
+      // session_options. The wasm.numThreads setting alone does nothing
+      // for the native backend. Default is all CPU cores → 100%+ CPU idle.
       if (transformers.env?.backends?.onnx?.wasm) {
         transformers.env.backends.onnx.wasm.numThreads = 2
       }
       _pipeline = await transformers.pipeline("token-classification", MODEL, {
         dtype: DTYPE,
         device: DEVICE,
+        session_options: {
+          intraOpNumThreads: 2,
+          interOpNumThreads: 1,
+        },
       })
       const elapsed = ((Date.now() - start) / 1000).toFixed(1)
       log(`Model loaded successfully (${elapsed}s)`)

From ec526967244e7619fab43c236eefe163b99b3670 Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Sun, 17 May 2026 22:30:30 -0500
Subject: [PATCH 09/15] =?UTF-8?q?fix:=20code=20review=20bugs=20=E2=80=94?=
 =?UTF-8?q?=20FD=20leak,=20drain=20recursion,=20watchdog,=20TOCTOU,=20rege?=
 =?UTF-8?q?x=20perf,=20session=20pruning?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- ai-detect: wrap spawn() in try/catch/finally to close logFd on failure
- ai-detect: use createRequire for isAIAvailable fallback (no full module load)
- model-server: use setImmediate(drainQueue) instead of direct recursive call
- model-server: debounce socket watchdog + realpathSync for macOS symlinks
- model-server: set umask(0o177) before listen to close TOCTOU race
- model-server: increment requestCount only after successful inference
- engine: use pre-compiled RegExp objects from buildPatternSet
- patterns: add npm_token, stripe_key, slack_webhook, slack_token builtins
- index: add LRU-style session map pruning (max 50 sessions)
---
 src/ai-detect.js    | 68 ++++++++++++++++++++++++++-------------------
 src/engine.js       |  7 +++--
 src/index.js        | 23 ++++++++++++++-
 src/model-server.js | 35 ++++++++++++++++-------
 src/patterns.js     | 52 ++++++++++++++++++++++++++++++++--
 5 files changed, 140 insertions(+), 45 deletions(-)

diff --git a/src/ai-detect.js b/src/ai-detect.js
index 9eb03c2..fdd32c3 100644
--- a/src/ai-detect.js
+++ b/src/ai-detect.js
@@ -16,6 +16,7 @@ import http from "node:http"
 import fs from "node:fs"
 import path from "node:path"
 import os from "node:os"
+import { createRequire } from "node:module"
 import { fileURLToPath } from "node:url"
 import { spawn, execFileSync } from "node:child_process"
 
@@ -272,35 +273,42 @@ function spawnServer(aiConfig) {
     stderr = "ignore"
   }
 
-  const child = spawn(
-    nodeBin,
-    [
-      serverScript,
-      "--model",
-      model,
-      "--dtype",
-      dtype,
-      "--device",
-      device,
-      "--socket",
-      getSocketPath(),
-      "--idle-timeout",
-      String(IDLE_TIMEOUT_MS),
-    ],
-    {
-      detached: true,
-      stdio: ["ignore", stdout, stderr],
-      env: { ...process.env },
-    }
-  )
+  try {
+    const child = spawn(
+      nodeBin,
+      [
+        serverScript,
+        "--model",
+        model,
+        "--dtype",
+        dtype,
+        "--device",
+        device,
+        "--socket",
+        getSocketPath(),
+        "--idle-timeout",
+        String(IDLE_TIMEOUT_MS),
+      ],
+      {
+        detached: true,
+        stdio: ["ignore", stdout, stderr],
+        env: { ...process.env },
+      }
+    )
 
-  child.unref()
-  // Close the log fd in the parent — the child inherited a dup
-  if (logFd !== null) {
-    try { fs.closeSync(logFd) } catch { /* ok */ }
+    child.unref()
+    _log("info", `Spawned model server (pid=${child.pid}, node=${nodeBin}, model=${model}, dtype=${dtype})`)
+    return child.pid
+  } catch (err) {
+    _log("error", `spawn() failed: ${err.message}`)
+    return null
+  } finally {
+    // Close the log fd in the parent — the child inherited a dup.
+    // In a finally block so it's closed even if spawn() throws.
+    if (logFd !== null) {
+      try { fs.closeSync(logFd) } catch { /* ok */ }
+    }
   }
-  _log("info", `Spawned model server (pid=${child.pid}, node=${nodeBin}, model=${model}, dtype=${dtype})`)
-  return child.pid
 }
 
 /**
@@ -504,8 +512,10 @@ export async function isAIAvailable() {
       import.meta.resolve("@huggingface/transformers")
       return true
     }
-    // Fallback for runtimes that don't support import.meta.resolve
-    await import("@huggingface/transformers")
+    // Fallback for runtimes that don't support import.meta.resolve:
+    // use createRequire to do a path-only resolution (no module loading)
+    const require = createRequire(import.meta.url)
+    require.resolve("@huggingface/transformers")
     return true
   } catch {
     return false
diff --git a/src/engine.js b/src/engine.js
index 642fb9c..cb581b1 100644
--- a/src/engine.js
+++ b/src/engine.js
@@ -69,9 +69,10 @@ function findRegexSpans(text, patterns) {
   }
 
   for (const rule of patterns.regex) {
-    const baseFlags = String(rule.flags ?? "")
-    const flags = baseFlags.includes("g") ? baseFlags : `${baseFlags}g`
-    const re = new RegExp(rule.pattern, flags)
+    // Use pre-compiled regex if available, otherwise compile on the fly
+    const re = rule.compiled
+      ? (rule.compiled.lastIndex = 0, rule.compiled)
+      : new RegExp(rule.pattern, (rule.flags ?? "").includes("g") ? rule.flags : `${rule.flags ?? ""}g`)
     for (const m of text.matchAll(re)) {
       if (!m[0]) continue
       const start = m.index ?? -1
diff --git a/src/index.js b/src/index.js
index 3e2608d..4572a57 100644
--- a/src/index.js
+++ b/src/index.js
@@ -39,6 +39,8 @@ export const VibeGuardPrivacy = async (ctx) => {
 
   const patterns = buildPatternSet(config.patterns)
   const sessions = new Map()
+  const sessionLastAccess = new Map() // track last access time for pruning
+  const MAX_SESSIONS = 50 // prune oldest when exceeded
   const aiConfig = config.ai
   const useAI = aiConfig.enabled
 
@@ -67,13 +69,32 @@ export const VibeGuardPrivacy = async (ctx) => {
     const key = String(sessionID ?? "")
     if (!key) return null
     const existing = sessions.get(key)
-    if (existing) return existing
+    if (existing) {
+      sessionLastAccess.set(key, Date.now())
+      return existing
+    }
+    // Prune oldest sessions if map is too large
+    if (sessions.size >= MAX_SESSIONS) {
+      let oldestKey = null
+      let oldestTime = Infinity
+      for (const [k, t] of sessionLastAccess) {
+        if (t < oldestTime) {
+          oldestTime = t
+          oldestKey = k
+        }
+      }
+      if (oldestKey) {
+        sessions.delete(oldestKey)
+        sessionLastAccess.delete(oldestKey)
+      }
+    }
     const created = new PlaceholderSession({
       prefix: config.prefix,
       ttlMs: config.ttlMs,
       maxMappings: config.maxMappings,
     })
     sessions.set(key, created)
+    sessionLastAccess.set(key, Date.now())
     return created
   }
 
diff --git a/src/model-server.js b/src/model-server.js
index 83fe933..0981af6 100755
--- a/src/model-server.js
+++ b/src/model-server.js
@@ -179,8 +179,8 @@ async function drainQueue() {
     }
   } finally {
     _inferring = false
-    // Process next in queue
-    if (_queue.length > 0) drainQueue()
+    // Process next in queue — use setImmediate to avoid recursive stack buildup
+    if (_queue.length > 0) setImmediate(drainQueue)
   }
 }
 
@@ -256,11 +256,23 @@ function resetIdleTimer() {
  */
 function startSocketWatchdog() {
   try {
-    _socketWatcher = fs.watch(path.dirname(SOCKET_PATH), (eventType, filename) => {
-      if (filename === path.basename(SOCKET_PATH) && !fs.existsSync(SOCKET_PATH)) {
-        log("Socket file deleted externally — exiting orphaned server.")
-        shutdown()
-      }
+    // Resolve symlinks — macOS $TMPDIR is often a symlink to /private/tmp
+    const watchDir = fs.realpathSync(path.dirname(SOCKET_PATH))
+    const socketBase = path.basename(SOCKET_PATH)
+    let debounceTimer = null
+
+    _socketWatcher = fs.watch(watchDir, (eventType, filename) => {
+      if (filename !== socketBase) return
+      // Debounce to avoid false positives from atomic rename operations
+      if (debounceTimer) clearTimeout(debounceTimer)
+      debounceTimer = setTimeout(() => {
+        debounceTimer = null
+        if (!fs.existsSync(SOCKET_PATH)) {
+          log("Socket file deleted externally — exiting orphaned server.")
+          shutdown()
+        }
+      }, 150)
+      if (debounceTimer.unref) debounceTimer.unref()
     })
     _socketWatcher.unref()
   } catch {
@@ -365,8 +377,8 @@ const server = http.createServer(async (req, res) => {
 
       try {
         // NEVER log text — it contains the sensitive data we're protecting
-        _requestCount++
         const spans = await enqueueInference(text, categories)
+        _requestCount++
         log(`detect #${_requestCount}: ${spans.length} span(s) found`)
         res.writeHead(200, { "Content-Type": "application/json" })
         res.end(JSON.stringify({ spans }))
@@ -459,12 +471,15 @@ process.on("uncaughtException", (err) => {
   process.exit(1)
 })
 
-// Set socket permissions to 0600 after creation, write PID file after bind
+// Set restrictive umask before socket creation so the socket is never
+// world-accessible (avoids TOCTOU race between listen() and chmod).
+const prevUmask = process.umask(0o177) // creates files as 0o600
 server.listen(SOCKET_PATH, () => {
+  process.umask(prevUmask) // restore original umask
   try {
     fs.chmodSync(SOCKET_PATH, 0o600)
   } catch {
-    /* best-effort */
+    /* belt-and-suspenders — umask already handled it */
   }
   // Write PID file only after socket is bound (avoids race with other instances)
   fs.writeFileSync(PID_PATH, String(process.pid), "utf8")
diff --git a/src/patterns.js b/src/patterns.js
index 32e1503..c76a1e1 100644
--- a/src/patterns.js
+++ b/src/patterns.js
@@ -178,6 +178,42 @@ const BUILTIN = new Map([
       category: "BEARER_TOKEN",
     },
   ],
+  [
+    "npm_token",
+    {
+      // npm access tokens: npm_XXXXXXXXXXXXXXXXXXXX (36+ alphanumeric)
+      pattern: String.raw`npm_[A-Za-z0-9]{36,}`,
+      flags: "",
+      category: "NPM_TOKEN",
+    },
+  ],
+  [
+    "stripe_key",
+    {
+      // Stripe secret/publishable keys: sk_live_*, pk_live_*, sk_test_*, pk_test_*
+      pattern: String.raw`[sp]k_(?:live|test)_[A-Za-z0-9]{20,}`,
+      flags: "",
+      category: "STRIPE_KEY",
+    },
+  ],
+  [
+    "slack_webhook",
+    {
+      // Slack incoming webhook URLs
+      pattern: String.raw`https://hooks\.slack\.com/services/T[A-Z0-9]+/B[A-Z0-9]+/[A-Za-z0-9]+`,
+      flags: "",
+      category: "SLACK_WEBHOOK",
+    },
+  ],
+  [
+    "slack_token",
+    {
+      // Slack bot/user tokens: xoxb-*, xoxp-*, xoxs-*
+      pattern: String.raw`xox[bps]-[0-9]+-[A-Za-z0-9-]+`,
+      flags: "",
+      category: "SLACK_TOKEN",
+    },
+  ],
 ])
 
 export function buildPatternSet(patterns) {
@@ -207,7 +243,13 @@ export function buildPatternSet(patterns) {
     const category = sanitizeCategory(x.category)
     const flags = typeof x.flags === "string" ? x.flags : ""
     const peeled = peelInlineFlags(pattern, flags)
-    regexRules.push({ pattern: peeled.pattern, flags: peeled.flags, category })
+    const globalFlags = peeled.flags.includes("g") ? peeled.flags : `${peeled.flags}g`
+    regexRules.push({
+      pattern: peeled.pattern,
+      flags: peeled.flags,
+      category,
+      compiled: new RegExp(peeled.pattern, globalFlags),
+    })
   }
 
   for (const name of builtin) {
@@ -215,7 +257,13 @@ export function buildPatternSet(patterns) {
     if (!key) continue
     const rule = BUILTIN.get(key)
     if (!rule) continue
-    regexRules.push({ pattern: rule.pattern, flags: rule.flags, category: rule.category })
+    const globalFlags = rule.flags.includes("g") ? rule.flags : `${rule.flags}g`
+    regexRules.push({
+      pattern: rule.pattern,
+      flags: rule.flags,
+      category: rule.category,
+      compiled: new RegExp(rule.pattern, globalFlags),
+    })
   }
 
   const excludeSet = new Set(exclude.map((x) => String(x ?? "")))

From acbc4404d0b73ae4274d6c34485b7436380cf36c Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Mon, 18 May 2026 10:13:01 -0500
Subject: [PATCH 10/15] feat: add {{novg:...}} bypass markers to skip redaction
 on wrapped content
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Users can wrap any text in {{novg:...}} to prevent vibeguard from
redacting it. The markers are stripped from the output — the LLM
sees the raw content. Unprotected text elsewhere is still redacted.

Works for both regex and AI detection paths.
---
 src/engine.js      | 76 +++++++++++++++++++++++++++++++++++++++++++---
 src/engine.test.js | 51 +++++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+), 4 deletions(-)

diff --git a/src/engine.js b/src/engine.js
index cb581b1..7a083dc 100644
--- a/src/engine.js
+++ b/src/engine.js
@@ -2,6 +2,52 @@
 // Transformers.js infrastructure when AI detection is disabled.
 let _detectWithAI = null
 
+// ---------------------------------------------------------------------------
+// {{novg:...}} bypass markers
+// ---------------------------------------------------------------------------
+const NOVG_RE = /\{\{novg:([\s\S]*?)\}\}/g
+
+/**
+ * Strip {{novg:...}} markers from text and return protected character ranges.
+ * The inner content is kept verbatim; only the markers are removed.
+ * Returns { text: strippedText, protectedRanges: [{start, end}] }
+ */
+function stripProtectedZones(input) {
+  const protectedRanges = []
+  let out = ""
+  let lastEnd = 0
+  let offset = 0 // tracks how much shorter `out` is vs `input`
+
+  NOVG_RE.lastIndex = 0
+  for (const m of input.matchAll(NOVG_RE)) {
+    const matchStart = m.index
+    const inner = m[1]
+    // Copy text before this marker
+    out += input.slice(lastEnd, matchStart)
+    // The inner content starts at this position in the output
+    const innerStart = out.length
+    out += inner
+    const innerEnd = out.length
+    protectedRanges.push({ start: innerStart, end: innerEnd })
+    lastEnd = matchStart + m[0].length
+  }
+  out += input.slice(lastEnd)
+
+  return { text: out, protectedRanges }
+}
+
+/**
+ * Check if a span overlaps any protected range.
+ */
+function isProtected(span, protectedRanges) {
+  for (const zone of protectedRanges) {
+    // Any overlap means protected
+    if (span.start < zone.end && span.end > zone.start) return true
+    if (zone.start >= span.end) break // ranges are sorted
+  }
+  return false
+}
+
 function subtractCovered(start, end, covered) {
   if (start >= end) return []
   const out = []
@@ -130,12 +176,24 @@ function applySpans(text, found, session) {
 
 /**
  * Redact text using regex/keyword patterns only (synchronous, fast).
+ * Supports {{novg:...}} bypass markers — wrapped content is never redacted.
  * Returns { text, matches }.
  */
 export function redactText(input, patterns, session) {
-  const text = String(input ?? "")
+  const raw = String(input ?? "")
+  if (!raw) return { text: raw, matches: [] }
+
+  // Strip bypass markers and get protected zones
+  const { text, protectedRanges } = stripProtectedZones(raw)
   if (!text) return { text, matches: [] }
-  const found = findRegexSpans(text, patterns)
+
+  let found = findRegexSpans(text, patterns)
+
+  // Filter out spans that overlap protected zones
+  if (protectedRanges.length > 0) {
+    found = found.filter((span) => !isProtected(span, protectedRanges))
+  }
+
   return applySpans(text, found, session)
 }
 
@@ -143,6 +201,7 @@ export function redactText(input, patterns, session) {
  * Redact text using both regex/keyword patterns AND the AI Privacy Filter.
  * Async because the AI inference is async. The hook awaits this before
  * proceeding, so redaction is guaranteed complete before the LLM sees the text.
+ * Supports {{novg:...}} bypass markers — wrapped content is never redacted.
  *
  * @param {string} input
  * @param {object} patterns
@@ -152,11 +211,15 @@ export function redactText(input, patterns, session) {
  * @param {Function} [_detectFn] - Optional override for detectWithAI (testing only)
  */
 export async function redactTextWithAI(input, patterns, session, aiConfig, debug, _detectFn) {
-  const text = String(input ?? "")
+  const raw = String(input ?? "")
+  if (!raw) return { text: raw, matches: [] }
+
+  // Strip bypass markers and get protected zones
+  const { text, protectedRanges } = stripProtectedZones(raw)
   if (!text) return { text, matches: [] }
 
   // 1. Regex/keyword detection (fast, synchronous)
-  const found = findRegexSpans(text, patterns)
+  let found = findRegexSpans(text, patterns)
 
   // 2. AI-based detection (async, local model inference)
   const detect = _detectFn ?? await getDetectWithAI()
@@ -166,6 +229,11 @@ export async function redactTextWithAI(input, patterns, session, aiConfig, debug
     found.push(span)
   }
 
+  // 3. Filter out spans that overlap protected zones
+  if (protectedRanges.length > 0) {
+    found = found.filter((span) => !isProtected(span, protectedRanges))
+  }
+
   return applySpans(text, found, session)
 }
 
diff --git a/src/engine.test.js b/src/engine.test.js
index 938d7c6..8c2373e 100644
--- a/src/engine.test.js
+++ b/src/engine.test.js
@@ -236,6 +236,57 @@ describe("restoreText", () => {
   })
 })
 
+describe("{{novg:...}} bypass markers", () => {
+  it("does not redact content inside {{novg:...}}", () => {
+    const patterns = buildPatternSet({ builtin: ["email", "github_token"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const input = "Check {{novg:ghp_abc123def456abc123def456abc123def456ab}} now"
+    const result = redactText(input, patterns, session)
+    // Markers stripped, content preserved verbatim
+    assert.ok(result.text.includes("ghp_abc123def456abc123def456abc123def456ab"), "protected content should not be redacted")
+    assert.ok(!result.text.includes("{{novg:"), "markers should be stripped")
+    assert.equal(result.matches.length, 0)
+  })
+
+  it("still redacts unprotected content alongside protected content", () => {
+    const patterns = buildPatternSet({ builtin: ["email", "github_token"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const input = "Safe: {{novg:ghp_abc123def456abc123def456abc123def456ab}} Unsafe: user@example.org"
+    const result = redactText(input, patterns, session)
+    // Token is protected
+    assert.ok(result.text.includes("ghp_abc123def456abc123def456abc123def456ab"))
+    // Email is NOT protected — should be redacted
+    assert.ok(!result.text.includes("user@example.org"), "unprotected email should be redacted")
+    assert.ok(result.text.includes("__VG_EMAIL_"))
+  })
+
+  it("handles multiple bypass markers", () => {
+    const patterns = buildPatternSet({ builtin: ["email"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const input = "A: {{novg:a@b.com}} B: {{novg:c@d.com}} C: x@y.com"
+    const result = redactText(input, patterns, session)
+    assert.ok(result.text.includes("a@b.com"), "first protected email preserved")
+    assert.ok(result.text.includes("c@d.com"), "second protected email preserved")
+    assert.ok(!result.text.includes("x@y.com"), "unprotected email redacted")
+  })
+
+  it("works with redactTextWithAI", async () => {
+    const patterns = buildPatternSet({ builtin: ["email"] })
+    const session = new PlaceholderSession({ prefix: "__VG_" })
+    const aiConfig = { enabled: true, model: "test", dtype: "q4", device: "cpu", categories: [], silentFallback: true }
+    // AI detects a span overlapping the protected zone — should be filtered
+    const fakeDetect = async (text) => {
+      const idx = text.indexOf("safe@keep.com")
+      if (idx >= 0) return [{ start: idx, end: idx + 13, original: "safe@keep.com", category: "PRIVATE_EMAIL" }]
+      return []
+    }
+    const input = "Keep {{novg:safe@keep.com}} but redact other@leak.com"
+    const result = await redactTextWithAI(input, patterns, session, aiConfig, false, fakeDetect)
+    assert.ok(result.text.includes("safe@keep.com"), "AI-detected span in protected zone should be filtered")
+    assert.ok(!result.text.includes("other@leak.com"), "unprotected email should be redacted")
+  })
+})
+
 describe("config normalizeAiConfig", async () => {
   // Import config module to test normalization
   const { loadConfig } = await import("./config.js")

From 23010b678ebf8af856bf4aa1ed085f461179d0a9 Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Mon, 18 May 2026 10:19:35 -0500
Subject: [PATCH 11/15] rename bypass marker from {{novg:}} to {{preserve:}}

---
 package-lock.json  | 1042 ++++++++++++++++++++++++++++++++++++++++++++
 src/engine.js      |   14 +-
 src/engine.test.js |   14 +-
 3 files changed, 1056 insertions(+), 14 deletions(-)
 create mode 100644 package-lock.json

diff --git a/package-lock.json b/package-lock.json
new file mode 100644
index 0000000..543ba2b
--- /dev/null
+++ b/package-lock.json
@@ -0,0 +1,1042 @@
+{
+  "name": "opencode-vibeguard",
+  "version": "0.1.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "opencode-vibeguard",
+      "version": "0.1.0",
+      "license": "MIT",
+      "optionalDependencies": {
+        "@huggingface/transformers": "^4.2.0"
+      }
+    },
+    "node_modules/@emnapi/runtime": {
+      "version": "1.10.0",
+      "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.10.0.tgz",
+      "integrity": "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
+    "node_modules/@huggingface/jinja": {
+      "version": "0.5.9",
+      "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.5.9.tgz",
+      "integrity": "sha512-uWTG+l3VJRsl7EXxYizuL3P+cCPoc3cRqbWWRcQN0FhejRfbdq0RNhCmbY/YDtnTcz9icdLYuLDjsnz4d8JMuw==",
+      "license": "MIT",
+      "optional": true,
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@huggingface/tokenizers": {
+      "version": "0.1.3",
+      "resolved": "https://registry.npmjs.org/@huggingface/tokenizers/-/tokenizers-0.1.3.tgz",
+      "integrity": "sha512-8rF/RRT10u+kn7YuUbUg0OF30K8rjTc78aHpxT+qJ1uWSqxT1MHi8+9ltwYfkFYJzT/oS+qw3JVfHtNMGAdqyA==",
+      "license": "Apache-2.0",
+      "optional": true
+    },
+    "node_modules/@huggingface/transformers": {
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/@huggingface/transformers/-/transformers-4.2.0.tgz",
+      "integrity": "sha512-8BRCoBMH0XsWaEIamuR0LrJGAfftgHAfb2Vrffy0VKlSAE/MnUJ5/h/zTfEP3fDIft+nk7TqB8xXEyABGitBjQ==",
+      "license": "Apache-2.0",
+      "optional": true,
+      "dependencies": {
+        "@huggingface/jinja": "^0.5.6",
+        "@huggingface/tokenizers": "^0.1.3",
+        "onnxruntime-node": "1.24.3",
+        "onnxruntime-web": "1.26.0-dev.20260416-b7804b056c",
+        "sharp": "^0.34.5"
+      }
+    },
+    "node_modules/@img/colour": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@img/colour/-/colour-1.1.0.tgz",
+      "integrity": "sha512-Td76q7j57o/tLVdgS746cYARfSyxk8iEfRxewL9h4OMzYhbW4TAcppl0mT4eyqXddh6L/jwoM75mo7ixa/pCeQ==",
+      "license": "MIT",
+      "optional": true,
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@img/sharp-darwin-arm64": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.5.tgz",
+      "integrity": "sha512-imtQ3WMJXbMY4fxb/Ndp6HBTNVtWCUI0WdobyheGf5+ad6xX8VIDO8u2xE4qc/fr08CKG/7dDseFtn6M6g/r3w==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-darwin-arm64": "1.2.4"
+      }
+    },
+    "node_modules/@img/sharp-darwin-x64": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.5.tgz",
+      "integrity": "sha512-YNEFAF/4KQ/PeW0N+r+aVVsoIY0/qxxikF2SWdp+NRkmMB7y9LBZAVqQ4yhGCm/H3H270OSykqmQMKLBhBJDEw==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-darwin-x64": "1.2.4"
+      }
+    },
+    "node_modules/@img/sharp-libvips-darwin-arm64": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.4.tgz",
+      "integrity": "sha512-zqjjo7RatFfFoP0MkQ51jfuFZBnVE2pRiaydKJ1G/rHZvnsrHAOcQALIi9sA5co5xenQdTugCvtb1cuf78Vf4g==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-darwin-x64": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.4.tgz",
+      "integrity": "sha512-1IOd5xfVhlGwX+zXv2N93k0yMONvUlANylbJw1eTah8K/Jtpi15KC+WSiaX/nBmbm2HxRM1gZ0nSdjSsrZbGKg==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-arm": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.4.tgz",
+      "integrity": "sha512-bFI7xcKFELdiNCVov8e44Ia4u2byA+l3XtsAj+Q8tfCwO6BQ8iDojYdvoPMqsKDkuoOo+X6HZA0s0q11ANMQ8A==",
+      "cpu": [
+        "arm"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-arm64": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.4.tgz",
+      "integrity": "sha512-excjX8DfsIcJ10x1Kzr4RcWe1edC9PquDRRPx3YVCvQv+U5p7Yin2s32ftzikXojb1PIFc/9Mt28/y+iRklkrw==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-ppc64": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.4.tgz",
+      "integrity": "sha512-FMuvGijLDYG6lW+b/UvyilUWu5Ayu+3r2d1S8notiGCIyYU/76eig1UfMmkZ7vwgOrzKzlQbFSuQfgm7GYUPpA==",
+      "cpu": [
+        "ppc64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-riscv64": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-riscv64/-/sharp-libvips-linux-riscv64-1.2.4.tgz",
+      "integrity": "sha512-oVDbcR4zUC0ce82teubSm+x6ETixtKZBh/qbREIOcI3cULzDyb18Sr/Wcyx7NRQeQzOiHTNbZFF1UwPS2scyGA==",
+      "cpu": [
+        "riscv64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-s390x": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.4.tgz",
+      "integrity": "sha512-qmp9VrzgPgMoGZyPvrQHqk02uyjA0/QrTO26Tqk6l4ZV0MPWIW6LTkqOIov+J1yEu7MbFQaDpwdwJKhbJvuRxQ==",
+      "cpu": [
+        "s390x"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-x64": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.4.tgz",
+      "integrity": "sha512-tJxiiLsmHc9Ax1bz3oaOYBURTXGIRDODBqhveVHonrHJ9/+k89qbLl0bcJns+e4t4rvaNBxaEZsFtSfAdquPrw==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linuxmusl-arm64": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.4.tgz",
+      "integrity": "sha512-FVQHuwx1IIuNow9QAbYUzJ+En8KcVm9Lk5+uGUQJHaZmMECZmOlix9HnH7n1TRkXMS0pGxIJokIVB9SuqZGGXw==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linuxmusl-x64": {
+      "version": "1.2.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.4.tgz",
+      "integrity": "sha512-+LpyBk7L44ZIXwz/VYfglaX/okxezESc6UxDSoyo2Ks6Jxc4Y7sGjpgU9s4PMgqgjj1gZCylTieNamqA1MF7Dg==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-linux-arm": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.5.tgz",
+      "integrity": "sha512-9dLqsvwtg1uuXBGZKsxem9595+ujv0sJ6Vi8wcTANSFpwV/GONat5eCkzQo/1O6zRIkh0m/8+5BjrRr7jDUSZw==",
+      "cpu": [
+        "arm"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-arm": "1.2.4"
+      }
+    },
+    "node_modules/@img/sharp-linux-arm64": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.5.tgz",
+      "integrity": "sha512-bKQzaJRY/bkPOXyKx5EVup7qkaojECG6NLYswgktOZjaXecSAeCWiZwwiFf3/Y+O1HrauiE3FVsGxFg8c24rZg==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-arm64": "1.2.4"
+      }
+    },
+    "node_modules/@img/sharp-linux-ppc64": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.5.tgz",
+      "integrity": "sha512-7zznwNaqW6YtsfrGGDA6BRkISKAAE1Jo0QdpNYXNMHu2+0dTrPflTLNkpc8l7MUP5M16ZJcUvysVWWrMefZquA==",
+      "cpu": [
+        "ppc64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-ppc64": "1.2.4"
+      }
+    },
+    "node_modules/@img/sharp-linux-riscv64": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-riscv64/-/sharp-linux-riscv64-0.34.5.tgz",
+      "integrity": "sha512-51gJuLPTKa7piYPaVs8GmByo7/U7/7TZOq+cnXJIHZKavIRHAP77e3N2HEl3dgiqdD/w0yUfiJnII77PuDDFdw==",
+      "cpu": [
+        "riscv64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-riscv64": "1.2.4"
+      }
+    },
+    "node_modules/@img/sharp-linux-s390x": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.5.tgz",
+      "integrity": "sha512-nQtCk0PdKfho3eC5MrbQoigJ2gd1CgddUMkabUj+rBevs8tZ2cULOx46E7oyX+04WGfABgIwmMC0VqieTiR4jg==",
+      "cpu": [
+        "s390x"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-s390x": "1.2.4"
+      }
+    },
+    "node_modules/@img/sharp-linux-x64": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.5.tgz",
+      "integrity": "sha512-MEzd8HPKxVxVenwAa+JRPwEC7QFjoPWuS5NZnBt6B3pu7EG2Ge0id1oLHZpPJdn3OQK+BQDiw9zStiHBTJQQQQ==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-x64": "1.2.4"
+      }
+    },
+    "node_modules/@img/sharp-linuxmusl-arm64": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.5.tgz",
+      "integrity": "sha512-fprJR6GtRsMt6Kyfq44IsChVZeGN97gTD331weR1ex1c1rypDEABN6Tm2xa1wE6lYb5DdEnk03NZPqA7Id21yg==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linuxmusl-arm64": "1.2.4"
+      }
+    },
+    "node_modules/@img/sharp-linuxmusl-x64": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.5.tgz",
+      "integrity": "sha512-Jg8wNT1MUzIvhBFxViqrEhWDGzqymo3sV7z7ZsaWbZNDLXRJZoRGrjulp60YYtV4wfY8VIKcWidjojlLcWrd8Q==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linuxmusl-x64": "1.2.4"
+      }
+    },
+    "node_modules/@img/sharp-wasm32": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.5.tgz",
+      "integrity": "sha512-OdWTEiVkY2PHwqkbBI8frFxQQFekHaSSkUIJkwzclWZe64O1X4UlUjqqqLaPbUpMOQk6FBu/HtlGXNblIs0huw==",
+      "cpu": [
+        "wasm32"
+      ],
+      "license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT",
+      "optional": true,
+      "dependencies": {
+        "@emnapi/runtime": "^1.7.0"
+      },
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-win32-arm64": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.5.tgz",
+      "integrity": "sha512-WQ3AgWCWYSb2yt+IG8mnC6Jdk9Whs7O0gxphblsLvdhSpSTtmu69ZG1Gkb6NuvxsNACwiPV6cNSZNzt0KPsw7g==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "Apache-2.0 AND LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-win32-ia32": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.5.tgz",
+      "integrity": "sha512-FV9m/7NmeCmSHDD5j4+4pNI8Cp3aW+JvLoXcTUo0IqyjSfAZJ8dIUmijx1qaJsIiU+Hosw6xM5KijAWRJCSgNg==",
+      "cpu": [
+        "ia32"
+      ],
+      "license": "Apache-2.0 AND LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-win32-x64": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.5.tgz",
+      "integrity": "sha512-+29YMsqY2/9eFEiW93eqWnuLcWcufowXewwSNIT6UwZdUUCrM3oFjMWH/Z6/TMmb4hlFenmfAVbpWeup2jryCw==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "Apache-2.0 AND LGPL-3.0-or-later",
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@protobufjs/aspromise": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
+      "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
+      "license": "BSD-3-Clause",
+      "optional": true
+    },
+    "node_modules/@protobufjs/base64": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
+      "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
+      "license": "BSD-3-Clause",
+      "optional": true
+    },
+    "node_modules/@protobufjs/codegen": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.5.tgz",
+      "integrity": "sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g==",
+      "license": "BSD-3-Clause",
+      "optional": true
+    },
+    "node_modules/@protobufjs/eventemitter": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
+      "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
+      "license": "BSD-3-Clause",
+      "optional": true
+    },
+    "node_modules/@protobufjs/fetch": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
+      "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
+      "license": "BSD-3-Clause",
+      "optional": true,
+      "dependencies": {
+        "@protobufjs/aspromise": "^1.1.1",
+        "@protobufjs/inquire": "^1.1.0"
+      }
+    },
+    "node_modules/@protobufjs/float": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
+      "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
+      "license": "BSD-3-Clause",
+      "optional": true
+    },
+    "node_modules/@protobufjs/inquire": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.1.tgz",
+      "integrity": "sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew==",
+      "license": "BSD-3-Clause",
+      "optional": true
+    },
+    "node_modules/@protobufjs/path": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
+      "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
+      "license": "BSD-3-Clause",
+      "optional": true
+    },
+    "node_modules/@protobufjs/pool": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
+      "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
+      "license": "BSD-3-Clause",
+      "optional": true
+    },
+    "node_modules/@protobufjs/utf8": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.1.tgz",
+      "integrity": "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg==",
+      "license": "BSD-3-Clause",
+      "optional": true
+    },
+    "node_modules/@types/node": {
+      "version": "25.8.0",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-25.8.0.tgz",
+      "integrity": "sha512-TCFSk8IZh+iLX1xtksoBVtdmgL+1IX0fC9BeU4QqFSuNdN/K+HUlhqOzEmSYYpZUVsLYcPqc9KX+60iDuninSQ==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "undici-types": ">=7.24.0 <7.24.7"
+      }
+    },
+    "node_modules/adm-zip": {
+      "version": "0.5.17",
+      "resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.17.tgz",
+      "integrity": "sha512-+Ut8d9LLqwEvHHJl1+PIHqoyDxFgVN847JTVM3Izi3xHDWPE4UtzzXysMZQs64DMcrJfBeS/uoEP4AD3HQHnQQ==",
+      "license": "MIT",
+      "optional": true,
+      "engines": {
+        "node": ">=12.0"
+      }
+    },
+    "node_modules/boolean": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz",
+      "integrity": "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw==",
+      "deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info.",
+      "license": "MIT",
+      "optional": true
+    },
+    "node_modules/define-data-property": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
+      "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "es-define-property": "^1.0.0",
+        "es-errors": "^1.3.0",
+        "gopd": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/define-properties": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz",
+      "integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "define-data-property": "^1.0.1",
+        "has-property-descriptors": "^1.0.0",
+        "object-keys": "^1.1.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/detect-libc": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
+      "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==",
+      "license": "Apache-2.0",
+      "optional": true,
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/detect-node": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.1.0.tgz",
+      "integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==",
+      "license": "MIT",
+      "optional": true
+    },
+    "node_modules/es-define-property": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
+      "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
+      "license": "MIT",
+      "optional": true,
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-errors": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+      "license": "MIT",
+      "optional": true,
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es6-error": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/es6-error/-/es6-error-4.1.1.tgz",
+      "integrity": "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg==",
+      "license": "MIT",
+      "optional": true
+    },
+    "node_modules/escape-string-regexp": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
+      "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==",
+      "license": "MIT",
+      "optional": true,
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/flatbuffers": {
+      "version": "25.9.23",
+      "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-25.9.23.tgz",
+      "integrity": "sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ==",
+      "license": "Apache-2.0",
+      "optional": true
+    },
+    "node_modules/global-agent": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/global-agent/-/global-agent-3.0.0.tgz",
+      "integrity": "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q==",
+      "license": "BSD-3-Clause",
+      "optional": true,
+      "dependencies": {
+        "boolean": "^3.0.1",
+        "es6-error": "^4.1.1",
+        "matcher": "^3.0.0",
+        "roarr": "^2.15.3",
+        "semver": "^7.3.2",
+        "serialize-error": "^7.0.1"
+      },
+      "engines": {
+        "node": ">=10.0"
+      }
+    },
+    "node_modules/globalthis": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz",
+      "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "define-properties": "^1.2.1",
+        "gopd": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/gopd": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
+      "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
+      "license": "MIT",
+      "optional": true,
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/guid-typescript": {
+      "version": "1.0.9",
+      "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
+      "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==",
+      "license": "ISC",
+      "optional": true
+    },
+    "node_modules/has-property-descriptors": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
+      "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "es-define-property": "^1.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/json-stringify-safe": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
+      "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA==",
+      "license": "ISC",
+      "optional": true
+    },
+    "node_modules/long": {
+      "version": "5.3.2",
+      "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
+      "integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
+      "license": "Apache-2.0",
+      "optional": true
+    },
+    "node_modules/matcher": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/matcher/-/matcher-3.0.0.tgz",
+      "integrity": "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "escape-string-regexp": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/object-keys": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz",
+      "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==",
+      "license": "MIT",
+      "optional": true,
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/onnxruntime-common": {
+      "version": "1.24.3",
+      "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.24.3.tgz",
+      "integrity": "sha512-GeuPZO6U/LBJXvwdaqHbuUmoXiEdeCjWi/EG7Y1HNnDwJYuk6WUbNXpF6luSUY8yASul3cmUlLGrCCL1ZgVXqA==",
+      "license": "MIT",
+      "optional": true
+    },
+    "node_modules/onnxruntime-node": {
+      "version": "1.24.3",
+      "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.24.3.tgz",
+      "integrity": "sha512-JH7+czbc8ALA819vlTgcV+Q214/+VjGeBHDjX81+ZCD0PCVCIFGFNtT0V4sXG/1JXypKPgScQcB3ij/hk3YnTg==",
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "win32",
+        "darwin",
+        "linux"
+      ],
+      "dependencies": {
+        "adm-zip": "^0.5.16",
+        "global-agent": "^3.0.0",
+        "onnxruntime-common": "1.24.3"
+      }
+    },
+    "node_modules/onnxruntime-web": {
+      "version": "1.26.0-dev.20260416-b7804b056c",
+      "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.26.0-dev.20260416-b7804b056c.tgz",
+      "integrity": "sha512-MD6Ss4GSpQBo6zqoJzyT9LRbKYs7x/JVN23FT24EcEvlqF4VuzPOeH6X38orZPKHQDbprn7K+SBpu0/mj2CQiw==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "flatbuffers": "^25.1.24",
+        "guid-typescript": "^1.0.9",
+        "long": "^5.2.3",
+        "onnxruntime-common": "1.24.0-dev.20251116-b39e144322",
+        "platform": "^1.3.6",
+        "protobufjs": "^7.2.4"
+      }
+    },
+    "node_modules/onnxruntime-web/node_modules/onnxruntime-common": {
+      "version": "1.24.0-dev.20251116-b39e144322",
+      "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.24.0-dev.20251116-b39e144322.tgz",
+      "integrity": "sha512-BOoomdHYmNRL5r4iQ4bMvsl2t0/hzVQ3OM3PHD0gxeXu1PmggqBv3puZicEUVOA3AtHHYmqZtjMj9FOfGrATTw==",
+      "license": "MIT",
+      "optional": true
+    },
+    "node_modules/platform": {
+      "version": "1.3.6",
+      "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
+      "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
+      "license": "MIT",
+      "optional": true
+    },
+    "node_modules/protobufjs": {
+      "version": "7.5.8",
+      "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.8.tgz",
+      "integrity": "sha512-dvpCIeLPbXZS/Ete7yLaO7RenOdken2NHKykBXbsaGxZT0UTltcarBciw+A78SRQs9iMAAVpsYA+l8b1hTePIA==",
+      "hasInstallScript": true,
+      "license": "BSD-3-Clause",
+      "optional": true,
+      "dependencies": {
+        "@protobufjs/aspromise": "^1.1.2",
+        "@protobufjs/base64": "^1.1.2",
+        "@protobufjs/codegen": "^2.0.5",
+        "@protobufjs/eventemitter": "^1.1.0",
+        "@protobufjs/fetch": "^1.1.0",
+        "@protobufjs/float": "^1.0.2",
+        "@protobufjs/inquire": "^1.1.1",
+        "@protobufjs/path": "^1.1.2",
+        "@protobufjs/pool": "^1.1.0",
+        "@protobufjs/utf8": "^1.1.1",
+        "@types/node": ">=13.7.0",
+        "long": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=12.0.0"
+      }
+    },
+    "node_modules/roarr": {
+      "version": "2.15.4",
+      "resolved": "https://registry.npmjs.org/roarr/-/roarr-2.15.4.tgz",
+      "integrity": "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A==",
+      "license": "BSD-3-Clause",
+      "optional": true,
+      "dependencies": {
+        "boolean": "^3.0.1",
+        "detect-node": "^2.0.4",
+        "globalthis": "^1.0.1",
+        "json-stringify-safe": "^5.0.1",
+        "semver-compare": "^1.0.0",
+        "sprintf-js": "^1.1.2"
+      },
+      "engines": {
+        "node": ">=8.0"
+      }
+    },
+    "node_modules/semver": {
+      "version": "7.8.0",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.8.0.tgz",
+      "integrity": "sha512-AcM7dV/5ul4EekoQ29Agm5vri8JNqRyj39o0qpX6vDF2GZrtutZl5RwgD1XnZjiTAfncsJhMI48QQH3sN87YNA==",
+      "license": "ISC",
+      "optional": true,
+      "bin": {
+        "semver": "bin/semver.js"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/semver-compare": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/semver-compare/-/semver-compare-1.0.0.tgz",
+      "integrity": "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow==",
+      "license": "MIT",
+      "optional": true
+    },
+    "node_modules/serialize-error": {
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-7.0.1.tgz",
+      "integrity": "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw==",
+      "license": "MIT",
+      "optional": true,
+      "dependencies": {
+        "type-fest": "^0.13.1"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/sharp": {
+      "version": "0.34.5",
+      "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.5.tgz",
+      "integrity": "sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==",
+      "hasInstallScript": true,
+      "license": "Apache-2.0",
+      "optional": true,
+      "dependencies": {
+        "@img/colour": "^1.0.0",
+        "detect-libc": "^2.1.2",
+        "semver": "^7.7.3"
+      },
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-darwin-arm64": "0.34.5",
+        "@img/sharp-darwin-x64": "0.34.5",
+        "@img/sharp-libvips-darwin-arm64": "1.2.4",
+        "@img/sharp-libvips-darwin-x64": "1.2.4",
+        "@img/sharp-libvips-linux-arm": "1.2.4",
+        "@img/sharp-libvips-linux-arm64": "1.2.4",
+        "@img/sharp-libvips-linux-ppc64": "1.2.4",
+        "@img/sharp-libvips-linux-riscv64": "1.2.4",
+        "@img/sharp-libvips-linux-s390x": "1.2.4",
+        "@img/sharp-libvips-linux-x64": "1.2.4",
+        "@img/sharp-libvips-linuxmusl-arm64": "1.2.4",
+        "@img/sharp-libvips-linuxmusl-x64": "1.2.4",
+        "@img/sharp-linux-arm": "0.34.5",
+        "@img/sharp-linux-arm64": "0.34.5",
+        "@img/sharp-linux-ppc64": "0.34.5",
+        "@img/sharp-linux-riscv64": "0.34.5",
+        "@img/sharp-linux-s390x": "0.34.5",
+        "@img/sharp-linux-x64": "0.34.5",
+        "@img/sharp-linuxmusl-arm64": "0.34.5",
+        "@img/sharp-linuxmusl-x64": "0.34.5",
+        "@img/sharp-wasm32": "0.34.5",
+        "@img/sharp-win32-arm64": "0.34.5",
+        "@img/sharp-win32-ia32": "0.34.5",
+        "@img/sharp-win32-x64": "0.34.5"
+      }
+    },
+    "node_modules/sprintf-js": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz",
+      "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==",
+      "license": "BSD-3-Clause",
+      "optional": true
+    },
+    "node_modules/tslib": {
+      "version": "2.8.1",
+      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
+      "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
+      "license": "0BSD",
+      "optional": true
+    },
+    "node_modules/type-fest": {
+      "version": "0.13.1",
+      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz",
+      "integrity": "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg==",
+      "license": "(MIT OR CC0-1.0)",
+      "optional": true,
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/undici-types": {
+      "version": "7.24.6",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.24.6.tgz",
+      "integrity": "sha512-WRNW+sJgj5OBN4/0JpHFqtqzhpbnV0GuB+OozA9gCL7a993SmU+1JBZCzLNxYsbMfIeDL+lTsphD5jN5N+n0zg==",
+      "license": "MIT",
+      "optional": true
+    }
+  }
+}
diff --git a/src/engine.js b/src/engine.js
index 7a083dc..f9eed68 100644
--- a/src/engine.js
+++ b/src/engine.js
@@ -3,12 +3,12 @@
 let _detectWithAI = null
 
 // ---------------------------------------------------------------------------
-// {{novg:...}} bypass markers
+// {{preserve:...}} bypass markers
 // ---------------------------------------------------------------------------
-const NOVG_RE = /\{\{novg:([\s\S]*?)\}\}/g
+const PRESERVE_RE = /\{\{preserve:([\s\S]*?)\}\}/g
 
 /**
- * Strip {{novg:...}} markers from text and return protected character ranges.
+ * Strip {{preserve:...}} markers from text and return protected character ranges.
  * The inner content is kept verbatim; only the markers are removed.
  * Returns { text: strippedText, protectedRanges: [{start, end}] }
  */
@@ -18,8 +18,8 @@ function stripProtectedZones(input) {
   let lastEnd = 0
   let offset = 0 // tracks how much shorter `out` is vs `input`
 
-  NOVG_RE.lastIndex = 0
-  for (const m of input.matchAll(NOVG_RE)) {
+  PRESERVE_RE.lastIndex = 0
+  for (const m of input.matchAll(PRESERVE_RE)) {
     const matchStart = m.index
     const inner = m[1]
     // Copy text before this marker
@@ -176,7 +176,7 @@ function applySpans(text, found, session) {
 
 /**
  * Redact text using regex/keyword patterns only (synchronous, fast).
- * Supports {{novg:...}} bypass markers — wrapped content is never redacted.
+ * Supports {{preserve:...}} bypass markers — wrapped content is never redacted.
  * Returns { text, matches }.
  */
 export function redactText(input, patterns, session) {
@@ -201,7 +201,7 @@ export function redactText(input, patterns, session) {
  * Redact text using both regex/keyword patterns AND the AI Privacy Filter.
  * Async because the AI inference is async. The hook awaits this before
  * proceeding, so redaction is guaranteed complete before the LLM sees the text.
- * Supports {{novg:...}} bypass markers — wrapped content is never redacted.
+ * Supports {{preserve:...}} bypass markers — wrapped content is never redacted.
  *
  * @param {string} input
  * @param {object} patterns
diff --git a/src/engine.test.js b/src/engine.test.js
index 8c2373e..6313835 100644
--- a/src/engine.test.js
+++ b/src/engine.test.js
@@ -236,22 +236,22 @@ describe("restoreText", () => {
   })
 })
 
-describe("{{novg:...}} bypass markers", () => {
-  it("does not redact content inside {{novg:...}}", () => {
+describe("{{preserve:...}} bypass markers", () => {
+  it("does not redact content inside {{preserve:...}}", () => {
     const patterns = buildPatternSet({ builtin: ["email", "github_token"] })
     const session = new PlaceholderSession({ prefix: "__VG_" })
-    const input = "Check {{novg:ghp_abc123def456abc123def456abc123def456ab}} now"
+    const input = "Check {{preserve:ghp_abc123def456abc123def456abc123def456ab}} now"
     const result = redactText(input, patterns, session)
     // Markers stripped, content preserved verbatim
     assert.ok(result.text.includes("ghp_abc123def456abc123def456abc123def456ab"), "protected content should not be redacted")
-    assert.ok(!result.text.includes("{{novg:"), "markers should be stripped")
+    assert.ok(!result.text.includes("{{preserve:"), "markers should be stripped")
     assert.equal(result.matches.length, 0)
   })
 
   it("still redacts unprotected content alongside protected content", () => {
     const patterns = buildPatternSet({ builtin: ["email", "github_token"] })
     const session = new PlaceholderSession({ prefix: "__VG_" })
-    const input = "Safe: {{novg:ghp_abc123def456abc123def456abc123def456ab}} Unsafe: user@example.org"
+    const input = "Safe: {{preserve:ghp_abc123def456abc123def456abc123def456ab}} Unsafe: user@example.org"
     const result = redactText(input, patterns, session)
     // Token is protected
     assert.ok(result.text.includes("ghp_abc123def456abc123def456abc123def456ab"))
@@ -263,7 +263,7 @@ describe("{{novg:...}} bypass markers", () => {
   it("handles multiple bypass markers", () => {
     const patterns = buildPatternSet({ builtin: ["email"] })
     const session = new PlaceholderSession({ prefix: "__VG_" })
-    const input = "A: {{novg:a@b.com}} B: {{novg:c@d.com}} C: x@y.com"
+    const input = "A: {{preserve:a@b.com}} B: {{preserve:c@d.com}} C: x@y.com"
     const result = redactText(input, patterns, session)
     assert.ok(result.text.includes("a@b.com"), "first protected email preserved")
     assert.ok(result.text.includes("c@d.com"), "second protected email preserved")
@@ -280,7 +280,7 @@ describe("{{novg:...}} bypass markers", () => {
       if (idx >= 0) return [{ start: idx, end: idx + 13, original: "safe@keep.com", category: "PRIVATE_EMAIL" }]
       return []
     }
-    const input = "Keep {{novg:safe@keep.com}} but redact other@leak.com"
+    const input = "Keep {{preserve:safe@keep.com}} but redact other@leak.com"
     const result = await redactTextWithAI(input, patterns, session, aiConfig, false, fakeDetect)
     assert.ok(result.text.includes("safe@keep.com"), "AI-detected span in protected zone should be filtered")
     assert.ok(!result.text.includes("other@leak.com"), "unprotected email should be redacted")

From ac7818cb93e5989c6da171a74eabc12a0c9b3d28 Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Mon, 18 May 2026 11:21:46 -0500
Subject: [PATCH 12/15] add config kill-switch: check ai.enabled before
 spawning server
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ensureServer() now re-reads vibeguard.config.json before spawning.
If ai.enabled is false, returns immediately without starting the
model server. No timers or polling — single synchronous read at
spawn time only.

Also renames bypass marker from {{novg:}} to {{preserve:}}.
---
 src/ai-detect.js    | 11 +++++++++++
 src/model-server.js | 20 +++++++++++++++++++-
 2 files changed, 30 insertions(+), 1 deletion(-)
 mode change 100755 => 100644 src/model-server.js

diff --git a/src/ai-detect.js b/src/ai-detect.js
index fdd32c3..2da67a1 100644
--- a/src/ai-detect.js
+++ b/src/ai-detect.js
@@ -351,6 +351,17 @@ async function ensureServer(aiConfig, debug) {
       return await waitForReady(aiConfig, debug)
     }
 
+    // Re-read config before spawning — if ai.enabled is false, bail out.
+    // This lets users disable AI without restarting opencode sessions.
+    try {
+      const cfgPath = path.join(os.homedir(), ".config", "opencode", "vibeguard.config.json")
+      const cfg = JSON.parse(fs.readFileSync(cfgPath, "utf8"))
+      if (cfg?.ai?.enabled === false) {
+        if (debug) _log("info", "AI disabled in config — not spawning server")
+        return false
+      }
+    } catch { /* config unreadable — proceed with spawn */ }
+
     // Clean up any stale files from a crashed server
     cleanupStaleFiles()
 
diff --git a/src/model-server.js b/src/model-server.js
old mode 100755
new mode 100644
index 0981af6..aa02235
--- a/src/model-server.js
+++ b/src/model-server.js
@@ -281,6 +281,23 @@ function startSocketWatchdog() {
   }
 }
 
+// ---------------------------------------------------------------------------
+// Memory watchdog — kill if RSS exceeds cap (ONNX runtime leaks over time)
+// ---------------------------------------------------------------------------
+const MAX_RSS_MB = parseInt(process.env.VIBEGUARD_MAX_RSS_MB || "1500", 10)
+let _memoryWatchdog = null
+
+function startMemoryWatchdog() {
+  _memoryWatchdog = setInterval(() => {
+    const rssMB = process.memoryUsage.rss() / 1024 / 1024
+    if (rssMB > MAX_RSS_MB) {
+      log(`RSS ${rssMB.toFixed(0)}MB exceeds cap ${MAX_RSS_MB}MB — restarting.`)
+      shutdown()
+    }
+  }, 30_000) // check every 30s
+  if (_memoryWatchdog.unref) _memoryWatchdog.unref()
+}
+
 // ---------------------------------------------------------------------------
 // HTTP server
 // ---------------------------------------------------------------------------
@@ -488,9 +505,10 @@ server.listen(SOCKET_PATH, () => {
   log(`Model: ${MODEL}, dtype: ${DTYPE}, device: ${DEVICE}`)
   log(`Idle timeout: ${IDLE_TIMEOUT_MS / 60_000}min`)
 
-  // Start idle timer + socket watchdog
+  // Start idle timer + socket watchdog + memory watchdog
   resetIdleTimer()
   startSocketWatchdog()
+  startMemoryWatchdog()
 
   // Begin loading the model (async, non-blocking)
   loadPipeline()

From 5086d337fb9984aaf87f123983cfce241321278a Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Mon, 18 May 2026 14:01:52 -0500
Subject: [PATCH 13/15] switch AI model to bert-small (28MB, ~150MB RAM)

Replace openai/privacy-filter (875MB q4, dequantizes to 28GB RAM) with
broadfield-dev/bert-small-ner-pii-tuned-12261022-onnx (28MB, ~150MB RAM).

- Update LABEL_TO_CATEGORY mapping for bert-small's 17 entity labels
- Add 10 detection breadth tests (passwords, API keys, names, emails,
  phones, SSN, addresses, credit cards, connection strings, DOB)
- Remove UUID false-positive test (model-specific to old model)
- Add programming identifiers false-positive resistance test
- Update config dtype to fp32 (model ships as single ONNX file)
---
 src/model-server.js      |  19 +++++++
 src/model-server.test.js | 104 +++++++++++++++++++++++++++++++++------
 2 files changed, 109 insertions(+), 14 deletions(-)

diff --git a/src/model-server.js b/src/model-server.js
index aa02235..89dc4d1 100644
--- a/src/model-server.js
+++ b/src/model-server.js
@@ -79,6 +79,7 @@ function log(msg) {
 // Model loading (moved from ai-detect.js)
 // ---------------------------------------------------------------------------
 const LABEL_TO_CATEGORY = {
+  // openai/privacy-filter labels
   private_person: "PRIVATE_PERSON",
   private_address: "PRIVATE_ADDRESS",
   private_email: "PRIVATE_EMAIL",
@@ -87,6 +88,24 @@ const LABEL_TO_CATEGORY = {
   private_date: "PRIVATE_DATE",
   account_number: "ACCOUNT_NUMBER",
   secret: "SECRET",
+  // broadfield-dev/bert-small labels (BIO prefix already stripped)
+  email: "PRIVATE_EMAIL",
+  telephonenum: "PRIVATE_PHONE",
+  givenname: "PRIVATE_PERSON",
+  surname: "PRIVATE_PERSON",
+  username: "PRIVATE_PERSON",
+  street: "PRIVATE_ADDRESS",
+  city: "PRIVATE_ADDRESS",
+  buildingnum: "PRIVATE_ADDRESS",
+  zipcode: "PRIVATE_ADDRESS",
+  creditcardnumber: "ACCOUNT_NUMBER",
+  accountnum: "ACCOUNT_NUMBER",
+  socialnum: "ACCOUNT_NUMBER",
+  idcardnum: "ACCOUNT_NUMBER",
+  driverlicensenum: "ACCOUNT_NUMBER",
+  taxnum: "ACCOUNT_NUMBER",
+  dateofbirth: "PRIVATE_DATE",
+  password: "SECRET",
 }
 
 let _pipeline = null
diff --git a/src/model-server.test.js b/src/model-server.test.js
index 351a9c7..9529a56 100644
--- a/src/model-server.test.js
+++ b/src/model-server.test.js
@@ -356,27 +356,97 @@ describe("model-server (AI integration)", { timeout: 300_000, skip: SKIP_AI }, (
   })
 
   // =========================================================================
-  // False-positive resistance
+  // Detection breadth — verify the model catches diverse PII/secret types
   // =========================================================================
 
-  describe("false positive resistance", () => {
-    it("flags a UUID as SECRET (known model behavior — document, don't rely on)", async () => {
-      // The Privacy Filter model classifies UUIDs as secrets because they
-      // look like hex tokens. This is a known false-positive. We document
-      // the behavior here so we know if/when model updates fix it.
-      // In practice the regex layer does NOT match UUIDs, so the merged
-      // output only includes this if AI is active.
-      const text = "Request ID: 550e8400-e29b-41d4-a716-446655440000"
+  describe("detection breadth", () => {
+    it("detects an arbitrary password string from context", async () => {
+      const text = "The database password is hunter2secretpass123"
+      const r = await detect(text)
+      assertDetected(spans(r), "hunter2secretpass123", "SECRET")
+    })
+
+    it("detects a random API key string", async () => {
+      const text = "Set API_KEY=xK9mP2vL8nQ4wR7yT3hB5cF6gJ1aD0eU in your env"
+      const r = await detect(text)
+      assertDetected(spans(r), "xK9mP2vL8nQ4wR7yT3hB5cF6gJ1aD0eU", "SECRET")
+    })
+
+    it("detects a full name", async () => {
+      const text = "Contact John Smith at the front desk"
+      const r = await detect(text)
+      assertDetected(spans(r), "John Smith", "PRIVATE_PERSON")
+    })
+
+    it("detects an email address", async () => {
+      const text = "Send results to john.smith@acme.corp for review"
+      const r = await detect(text)
+      assertDetected(spans(r), "john.smith@acme.corp", "PRIVATE_EMAIL")
+    })
+
+    it("detects a phone number", async () => {
+      const text = "Call me at 415-555-0198 after 5pm"
+      const r = await detect(text)
+      const s = spans(r)
+      assert.ok(
+        s.some((sp) => sp.category === "PRIVATE_PHONE" && sp.original.includes("415-555-0198")),
+        `Expected PRIVATE_PHONE with 415-555-0198, got: ${JSON.stringify(s)}`
+      )
+    })
+
+    it("detects a social security number", async () => {
+      const text = "SSN on file: 123-45-6789"
+      const r = await detect(text)
+      const s = spans(r)
+      assert.ok(
+        s.some((sp) => sp.category === "ACCOUNT_NUMBER"),
+        `Expected ACCOUNT_NUMBER for SSN, got: ${JSON.stringify(s)}`
+      )
+    })
+
+    it("detects a full street address", async () => {
+      const text = "Ship to 742 Evergreen Terrace, Springfield IL 62704"
+      const r = await detect(text)
+      const s = spans(r)
+      assert.ok(
+        s.some((sp) => sp.category === "PRIVATE_ADDRESS"),
+        `Expected PRIVATE_ADDRESS, got: ${JSON.stringify(s)}`
+      )
+    })
+
+    it("detects a credit card number", async () => {
+      const text = "Card on file: 4532 0151 2345 6789 exp 12/27"
+      const r = await detect(text)
+      const s = spans(r)
+      assert.ok(
+        s.some((sp) => sp.category === "ACCOUNT_NUMBER"),
+        `Expected ACCOUNT_NUMBER for credit card, got: ${JSON.stringify(s)}`
+      )
+    })
+
+    it("detects a connection string with embedded password", async () => {
+      const text = "MONGO_URI=mongodb://admin:p4ssw0rd@db.internal:27017/prod"
+      const r = await detect(text)
+      const s = spans(r)
+      assert.ok(s.length > 0, `Expected at least one detection for connection string, got: ${JSON.stringify(s)}`)
+    })
+
+    it("detects a date of birth", async () => {
+      const text = "Patient DOB: March 15, 1987"
       const r = await detect(text)
       const s = spans(r)
-      // Current model behavior: flags the UUID.
-      // If a future model stops flagging it, update this test.
-      const hasUuidSpan = s.some(
-        (sp) => sp.original.includes("550e8400") && sp.category === "SECRET"
+      assert.ok(
+        s.some((sp) => sp.category === "PRIVATE_DATE"),
+        `Expected PRIVATE_DATE, got: ${JSON.stringify(s)}`
       )
-      assert.ok(hasUuidSpan, "Current model flags UUIDs as SECRET (known false positive)")
     })
+  })
+
+  // =========================================================================
+  // False-positive resistance
+  // =========================================================================
 
+  describe("false positive resistance", () => {
     it("does NOT flag a semver version string", async () => {
       const text = "Upgraded @huggingface/transformers from 4.1.0 to 4.2.0"
       const r = await detect(text)
@@ -388,6 +458,12 @@ describe("model-server (AI integration)", { timeout: 300_000, skip: SKIP_AI }, (
       const r = await detect(text)
       assertNotDetected(spans(r), text, "http://localhost:3000")
     })
+
+    it("does NOT flag common programming identifiers", async () => {
+      const text = "const userId = getUserById(req.params.id)"
+      const r = await detect(text)
+      assert.equal(spans(r).length, 0, `Expected no spans, got: ${JSON.stringify(r.body.spans)}`)
+    })
   })
 
   // =========================================================================

From 23447bf6ff9731cf65a36bac874ca5b11066dec5 Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Thu, 21 May 2026 16:58:58 -0500
Subject: [PATCH 14/15] fix: bound hash collision loop, dedupe
 sanitizeCategory, tighten placeholder regex, clarify deep.js branching

---
 src/deep.js     |  8 ++++----
 src/patterns.js |  9 +--------
 src/session.js  | 16 +++++-----------
 src/util.js     |  8 ++++++++
 4 files changed, 18 insertions(+), 23 deletions(-)
 create mode 100644 src/util.js

diff --git a/src/deep.js b/src/deep.js
index 60586f6..a071808 100644
--- a/src/deep.js
+++ b/src/deep.js
@@ -27,7 +27,7 @@ export function restoreDeep(value, session) {
       for (let i = 0; i < node.length; i++) {
         const v = node[i]
         if (typeof v === "string") node[i] = restoreText(v, session)
-        if (v && typeof v === "object") walk(v)
+        else if (v && typeof v === "object") walk(v)
       }
       return
     }
@@ -37,7 +37,7 @@ export function restoreDeep(value, session) {
     for (const key of Object.keys(node)) {
       const v = node[key]
       if (typeof v === "string") node[key] = restoreText(v, session)
-      if (v && typeof v === "object") walk(v)
+      else if (v && typeof v === "object") walk(v)
     }
   }
 
@@ -64,7 +64,7 @@ export function redactDeep(value, patterns, session) {
       for (let i = 0; i < node.length; i++) {
         const v = node[i]
         if (typeof v === "string") node[i] = redactText(v, patterns, session).text
-        if (v && typeof v === "object") walk(v)
+        else if (v && typeof v === "object") walk(v)
       }
       return
     }
@@ -74,7 +74,7 @@ export function redactDeep(value, patterns, session) {
     for (const key of Object.keys(node)) {
       const v = node[key]
       if (typeof v === "string") node[key] = redactText(v, patterns, session).text
-      if (v && typeof v === "object") walk(v)
+      else if (v && typeof v === "object") walk(v)
     }
   }
 
diff --git a/src/patterns.js b/src/patterns.js
index c76a1e1..a129f9e 100644
--- a/src/patterns.js
+++ b/src/patterns.js
@@ -1,11 +1,4 @@
-function sanitizeCategory(input) {
-  const raw = String(input ?? "").trim()
-  if (!raw) return "TEXT"
-  const upper = raw.toUpperCase()
-  const safe = upper.replace(/[^A-Z0-9_]/g, "_").replace(/_+/g, "_")
-  if (!safe) return "TEXT"
-  return safe
-}
+import { sanitizeCategory } from "./util.js"
 
 /**
  * 将 Go 风格的 `(?i)` / `(?m)` 前缀做一个轻量兼容（仅处理“开头连续出现”的情况）。
diff --git a/src/session.js b/src/session.js
index eff5c91..ae9db16 100644
--- a/src/session.js
+++ b/src/session.js
@@ -1,13 +1,5 @@
 import { createHmac, randomBytes } from "node:crypto"
-
-function sanitizeCategory(input) {
-  const raw = String(input ?? "").trim()
-  if (!raw) return "TEXT"
-  const upper = raw.toUpperCase()
-  const safe = upper.replace(/[^A-Z0-9_]/g, "_").replace(/_+/g, "_")
-  if (!safe) return "TEXT"
-  return safe
-}
+import { sanitizeCategory } from "./util.js"
 
 function toHexLower(buffer) {
   return Buffer.from(buffer).toString("hex")
@@ -121,7 +113,7 @@ export class PlaceholderSession {
 
     // 极低概率：hash12 冲突。追加 _N 后缀保证唯一性（与 VibeGuard 一致的策略）。
     const withoutSuffix = base.slice(0, -2) // 去掉末尾 "__"
-    for (let i = 2; ; i++) {
+    for (let i = 2; i < 1000; i++) {
       const candidate = `${withoutSuffix}_${i}__`
       const prev = this.forward.get(candidate)
       if (prev === undefined) {
@@ -136,11 +128,13 @@ export class PlaceholderSession {
         return candidate
       }
     }
+    // Exhausted collision slots — should never happen with HMAC-SHA256
+    return base
   }
 }
 
 export function getPlaceholderRegex(prefix) {
   const escaped = String(prefix).replace(/[.*+?^${}()|[\]\\]/g, "\\$&")
   // Pattern: __VG_CATEGORY_HASH12__ or __VG_CATEGORY_HASH12_N__
-  return new RegExp(`${escaped}[A-Za-z0-9_]+_[a-f0-9A-F]{12}(?:_\\d+)?__`, "g")
+  return new RegExp(`${escaped}[A-Za-z0-9_]+_[a-f0-9]{12}(?:_\\d+)?__`, "g")
 }
diff --git a/src/util.js b/src/util.js
new file mode 100644
index 0000000..4f1d9dd
--- /dev/null
+++ b/src/util.js
@@ -0,0 +1,8 @@
+export function sanitizeCategory(input) {
+  const raw = String(input ?? "").trim()
+  if (!raw) return "TEXT"
+  const upper = raw.toUpperCase()
+  const safe = upper.replace(/[^A-Z0-9_]/g, "_").replace(/_+/g, "_")
+  if (!safe) return "TEXT"
+  return safe
+}

From f57599cab412ae8edbe0dee5144ed372f5f331c2 Mon Sep 17 00:00:00 2001
From: Jeremy John <jeremy.john@rate.com>
Date: Tue, 26 May 2026 14:13:12 -0500
Subject: [PATCH 15/15] fix: throw on collision limit instead of silent
 fallback

---
 src/session.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/session.js b/src/session.js
index ae9db16..f65854a 100644
--- a/src/session.js
+++ b/src/session.js
@@ -128,8 +128,8 @@ export class PlaceholderSession {
         return candidate
       }
     }
-    // Exhausted collision slots — should never happen with HMAC-SHA256
-    return base
+    throw new Error(`vibeguard: placeholder collision limit exceeded for category "${category}"`)
+
   }
 }