initializ · initializ-mk · Jun 14, 2026 · Jun 14, 2026 · Jun 14, 2026
diff --git a/.claude/skills/forge.md b/.claude/skills/forge.md
@@ -924,7 +924,7 @@ when OTel tracing is enabled (OTel v1 / Phase 4 / #105). Both use
 | `AuditEgressBlocked` | `egress_blocked` | Outbound request blocked |
 | `AuditLLMCall` | `llm_call` | LLM provider call complete; `model`, `provider`, `input_tokens`, `output_tokens`, `duration_ms`, `request_id` |
 | `AuditLLMCallCancelled` | `llm_call_cancelled` | Streaming call aborted mid-flight; partial usage counts |
-| `AuditGuardrail` | `guardrail_check` | Guardrail evaluation result |
+| `AuditGuardrail` | `guardrail_check` | Mask / block / warn decision. Fields: `direction` (`inbound` / `outbound` / `tool_output`), `decision` (`masked` / `warned` / `blocked`), `guardrail`, `category`, `violation_count`. Opt-in `evidence` (redacted + truncated triggering text) via `FORGE_GUARDRAIL_CAPTURE_EVIDENCE=true` |
 | `AuditScheduleFire` | `schedule_fire` | Cron task triggered |
 | `AuditScheduleComplete` | `schedule_complete` | Cron task finished |
 | `AuditScheduleSkip` | `schedule_skip` | Cron task skipped (e.g. agent busy) |

diff --git a/docs/security/audit-logging.md b/docs/security/audit-logging.md
@@ -21,7 +21,7 @@ All runtime security events are emitted as structured NDJSON to stderr with corr
 | `llm_call_cancelled` | Streaming LLM call cancelled mid-flight; carries partial token counts captured up to cancellation. |
 | `invocation_complete` | A2A invocation finished (auth → dispatch → engine → response). Carries `duration_ms` (wall-clock) plus aggregated `input_tokens_total` / `output_tokens_total` / `llm_call_count` / `model` / `provider`. |
 | `invocation_cancelled` | A2A invocation cancelled mid-flight via `tasks/cancel` (or internal cancellation like parent ctx deadline). Carries `fields.reason` (one of `workflow_failure` / `cost_limit_exceeded` / `timeout` / `external_signal`), `duration_ms` up to cancellation, and any partial token totals consumed before the signal. See [Cancellation](#cancellation). |
-| `guardrail_check` | Guardrail evaluation result |
+| `guardrail_check` | Guardrail mask / block / warn decision. Carries `fields.direction` (`inbound` / `outbound` / `tool_output`), `fields.decision` (`masked` / `warned` / `blocked`), `fields.guardrail` + `fields.category` from the triggering violation, and `fields.violation_count`. With `FORGE_GUARDRAIL_CAPTURE_EVIDENCE=true` operators also opt into `fields.evidence` carrying the redacted + truncated triggering text. See [Guardrails — Audit Events](guardrails.md#audit-events). |
 | `auth_verify` | Inbound request authenticated successfully (with `provider`, `user_id`, `org_id`, `token_kind`) |
 | `auth_fail` | Inbound request rejected (with `reason`, `token_kind`) |
 | `agent_card_published` | Agent Card finalized at startup or hot-reload (with `name`, `version`, `protocol_version`, `url`, `skill_count`, `capabilities`, `security_schemes`, `card_size_bytes`, `card_sha256`). See [Agent Card reference](../reference/a2a-agent-card.md). |

diff --git a/docs/security/guardrails.md b/docs/security/guardrails.md
@@ -503,12 +503,95 @@ The `cli_execute` tool blocks arguments containing `file://` URLs (case-insensit
 
 ## Audit Events
 
-Guardrail evaluations are logged as structured audit events:
+Every mask / block / warn decision emits a `guardrail_check` audit
+event through the configured Forge audit sink stack (stderr safety
+net + the optional Unix socket / HTTP sink wired via
+`FORGE_AUDIT_SOCKET` / `FORGE_AUDIT_HTTP_ENDPOINT`). The event
+carries the per-invocation `correlation_id`, `task_id`, sequence
+number, and workflow-correlation tags so consumers can join it to
+the `session_start` / `llm_call` / `invocation_complete` rows for
+the same request.
+
+Default shape (metadata-only):
 
 ```json
-{"ts":"2026-02-28T10:00:00Z","event":"guardrail_check","correlation_id":"a1b2c3d4","fields":{"guardrail":"pii","direction":"inbound","result":"masked"}}
+{
+  "ts": "2026-06-14T10:00:00Z",
+  "event": "guardrail_check",
+  "schema_version": "1.0",
+  "seq": 2,
+  "correlation_id": "a1b2c3d4",
+  "task_id": "slack-...",
+  "fields": {
+    "direction": "inbound",
+    "decision": "masked",
+    "guardrail": "pii",
+    "category": "ssn",
+    "violation_count": 1
+  }
+}
 ```
 
-In DB mode, the guardrails library writes audit records to MongoDB automatically when `EnableAudit` is set.
-
-See [Security Overview](overview.md) for the full security architecture.
+Field reference:
+
+| Field | Values | Meaning |
+|-------|--------|---------|
+| `direction` | `inbound` / `outbound` / `tool_output` | Which gate fired |
+| `decision` | `masked` / `warned` / `blocked` | Library decision after policy resolution |
+| `guardrail` | `pii` / `moderation` / `security` / `none` / … | First violation's `Type` (`none` when violations list is empty) |
+| `category` | `ssn` / `email` / `hate_speech` / … | First violation's `Category`; omitted when empty |
+| `violation_count` | integer ≥ 0 | Length of `result.Violations` |
+| `tool` | string | Tool name; present only when `direction=tool_output` |
+| `evidence` | string | Captured triggering text; present only when opt-in is on (see below) |
+
+### Evidence capture (opt-in)
+
+The default posture is **metadata-only**: the offending text never
+travels through the audit pipeline. Operators who need it (false-
+positive triage, compliance evidence, pattern tuning) opt in per-
+deployment via:
+
+| Env var | Default | Meaning |
+|---------|---------|---------|
+| `FORGE_GUARDRAIL_CAPTURE_EVIDENCE` | `false` | Include `fields.evidence` in the emitted event |
+| `FORGE_GUARDRAIL_REDACT` | `true` | Run a vendor-secret regex scrub over the captured evidence before emission |
+| `FORGE_GUARDRAIL_MAX_BYTES` | `4096` | Per-event soft cap; overage is truncated with a `…[truncated:N]` marker |
+
+`Redact` is on whenever `CaptureEvidence` is on unless you explicitly
+disable it. The scrub matches obvious vendor token shapes (Anthropic
+`sk-ant-…`, OpenAI `sk-…`, GitHub `ghp_/gho_/ghs_/github_pat_…`, AWS
+`AKIA…`, Slack `xox[bp]-…`, private-key PEM headers, Telegram bot
+tokens) and replaces each match with `[REDACTED]`. It is defense-
+in-depth — the guardrail library has usually already masked these,
+but an unmasked input that hit a different rule (e.g. moderation)
+would otherwise carry secrets through verbatim.
+
+The size envelope and `[REDACTED]` marker match the OTel span
+content-capture pipeline (issue #130) so the same string travels
+through both pipelines under one contract.
+
+#### What evidence actually contains
+
+| Decision | Evidence source |
+|----------|-----------------|
+| `masked` | The **post-mask** content (`Result.MaskedContent`) — the same payload the LLM saw downstream. PII the library already masked stays masked in the audit stream. |
+| `warned` | The original triggering content. No mask was produced (the library only generates a masked variant for `mask` decisions). The redact pass still runs. |
+| `blocked` | The original triggering content. Same rationale as `warned`. |
+
+This means a typical PII-mask event emits the redacted version of the
+prompt as evidence, not the raw text. Operators auditing for "did our
+agent ever see PII?" should treat a `decision=blocked` row as the
+only one that can carry plain-text PII through the stream, and gate
+their export pipeline accordingly.
+
+### Mode-specific behavior
+
+- **File mode** — every event flows through the Forge audit pipeline.
+- **DB mode** — the guardrails library also writes audit records to
+  MongoDB when `EnableAudit` is set. Forge still emits the
+  `guardrail_check` event on its own audit sinks so SIEM consumers
+  reading the export socket see parity regardless of mode.
+
+See [Security Overview](overview.md) for the full security architecture
+and [Audit Logging](audit-logging.md) for the sink stack and schema
+contract.
diff --git a/forge-cli/runtime/guardrails_audit.go b/forge-cli/runtime/guardrails_audit.go
@@ -0,0 +1,178 @@
+package runtime
+
+import (
+	"context"
+	"os"
+	"regexp"
+	"strconv"
+
+	"github.com/initializ/guardrails"
+
+	coreruntime "github.com/initializ/forge/forge-core/runtime"
+)
+
+// GuardrailAuditConfig controls how the LibraryGuardrailEngine emits
+// guardrail_check audit events. The default zero value preserves the
+// pre-#155 metadata-only posture: an emitted event carries direction,
+// decision, guardrail type, and violation count, but never the raw
+// content that triggered the rule.
+//
+// Operators who need the offending text (to tune patterns, debug
+// false positives, or satisfy compliance evidence requirements) opt
+// in by flipping CaptureEvidence to true. The Redact knob is on by
+// default and runs an obvious-secret scrub even on the captured
+// evidence, so a leaked API key in a prompt does not get re-published
+// into the audit stream verbatim. MaxBytes bounds the captured
+// substring per event; zero falls back to DefaultGuardrailEvidenceCapBytes.
+//
+// Same posture as the #130 OTel content-capture work: default off,
+// opt-in per-deployment, redact-then-truncate when on.
+type GuardrailAuditConfig struct {
+	// CaptureEvidence includes the raw triggering content in the
+	// emitted guardrail_check event's `fields.evidence`. OFF by default.
+	CaptureEvidence bool
+
+	// Redact runs a known-secret regex pass on the captured evidence
+	// before truncation. ON by default. Disable only when consuming
+	// in an environment that has its own scrubbing layer (e.g. a
+	// platform-side SIEM normalizer).
+	Redact bool
+
+	// MaxBytes is the soft cap on the captured evidence string. Zero
+	// uses DefaultGuardrailEvidenceCapBytes (4 KiB).
+	MaxBytes int
+}
+
+// DefaultGuardrailEvidenceCapBytes is the per-event cap for captured
+// evidence when GuardrailAuditConfig.MaxBytes is unset. 4 KiB matches
+// the OTel span attribute soft cap so the same content travels through
+// both pipelines under the same size envelope.
+const DefaultGuardrailEvidenceCapBytes = 4 << 10
+
+// Environment variable names mirror the existing audit/export pattern.
+// The CLI surfaces these via run/serve flags or operators can set them
+// directly on the agent process.
+const (
+	EnvGuardrailCaptureEvidence = "FORGE_GUARDRAIL_CAPTURE_EVIDENCE"
+	EnvGuardrailRedact          = "FORGE_GUARDRAIL_REDACT"
+	EnvGuardrailMaxBytes        = "FORGE_GUARDRAIL_MAX_BYTES"
+)
+
+// GuardrailAuditConfigFromEnv reads the env vars and returns a populated
+// config. Redact defaults to true so flipping CaptureEvidence on without
+// touching Redact preserves the safer posture.
+func GuardrailAuditConfigFromEnv() GuardrailAuditConfig {
+	cfg := GuardrailAuditConfig{Redact: true}
+	if v := os.Getenv(EnvGuardrailCaptureEvidence); v != "" {
+		if b, err := strconv.ParseBool(v); err == nil {
+			cfg.CaptureEvidence = b
+		}
+	}
+	if v := os.Getenv(EnvGuardrailRedact); v != "" {
+		if b, err := strconv.ParseBool(v); err == nil {
+			cfg.Redact = b
+		}
+	}
+	if v := os.Getenv(EnvGuardrailMaxBytes); v != "" {
+		if n, err := strconv.Atoi(v); err == nil && n > 0 {
+			cfg.MaxBytes = n
+		}
+	}
+	return cfg
+}
+
+// secretRedactPatterns are the vendor token shapes scrubbed when
+// GuardrailAuditConfig.Redact is on. Same set as the OTel content
+// redaction pass (issue #130) so the audit and trace pipelines stay
+// consistent. Defence-in-depth only: the guardrail library may already
+// have masked these, but an unmasked input that hit a different rule
+// (e.g. moderation) would otherwise carry secrets through verbatim.
+var secretRedactPatterns = []*regexp.Regexp{
+	regexp.MustCompile(`sk-ant-[A-Za-z0-9\-]{20,}`),
+	regexp.MustCompile(`sk-[A-Za-z0-9]{20,}`),
+	regexp.MustCompile(`ghp_[A-Za-z0-9]{36}`),
+	regexp.MustCompile(`gho_[A-Za-z0-9]{36}`),
+	regexp.MustCompile(`ghs_[A-Za-z0-9]{36}`),
+	regexp.MustCompile(`github_pat_[A-Za-z0-9_]{22,}`),
+	regexp.MustCompile(`AKIA[0-9A-Z]{16}`),
+	regexp.MustCompile(`xox[bp]-[0-9]{10,}-[A-Za-z0-9-]+`),
+	regexp.MustCompile(`-----BEGIN (RSA|EC|OPENSSH|PRIVATE) .*KEY-----`),
+	regexp.MustCompile(`[0-9]{8,10}:[A-Za-z0-9_-]{35,}`),
+}
+
+// redactSecrets replaces any known secret-token shape with [REDACTED].
+// Mirrors the marker used by the FWS-8 capture path so audit consumers
+// see one consistent token across both pipelines.
+func redactSecrets(s string) string {
+	for _, re := range secretRedactPatterns {
+		s = re.ReplaceAllString(s, "[REDACTED]")
+	}
+	return s
+}
+
+// prepareEvidence applies redact (if on) then byte-truncates to the
+// configured cap. Returns "" when input is "" so callers can drop the
+// field cleanly.
+func prepareEvidence(s string, cfg GuardrailAuditConfig) string {
+	if s == "" {
+		return ""
+	}
+	if cfg.Redact {
+		s = redactSecrets(s)
+	}
+	cap := cfg.MaxBytes
+	if cap <= 0 {
+		cap = DefaultGuardrailEvidenceCapBytes
+	}
+	return coreruntime.TruncateForAudit(s, cap)
+}
+
+// emitGuardrailEvent builds and emits a guardrail_check audit event for
+// one mask/block/warn decision. Routed through EmitFromContext so the
+// per-invocation correlation_id, task_id, sequence number, and workflow
+// tags auto-attach from the request context.
+//
+// Behavior matrix:
+//
+//   - audit logger nil → no-op (DB mode with platform-side audit only,
+//     or unit tests with no logger wired)
+//   - res nil          → no-op (defensive; emit only when we have a
+//     guardrail Result to summarize)
+//   - CaptureEvidence on AND content non-empty → fields.evidence is
+//     set (redacted + truncated per cfg)
+//   - CaptureEvidence off → fields.evidence omitted entirely
+func (e *LibraryGuardrailEngine) emitGuardrailEvent(
+	ctx context.Context,
+	direction, tool, content string,
+	decision string,
+	res *guardrails.Result,
+) {
+	if e.auditLogger == nil || res == nil {
+		return
+	}
+	fields := map[string]any{
+		"direction":       direction,
+		"decision":        decision,
+		"violation_count": len(res.Violations),
+	}
+	if len(res.Violations) > 0 {
+		fields["guardrail"] = res.Violations[0].Type
+		if cat := res.Violations[0].Category; cat != "" {
+			fields["category"] = cat
+		}
+	} else {
+		fields["guardrail"] = "none"
+	}
+	if tool != "" {
+		fields["tool"] = tool
+	}
+	if e.auditCfg.CaptureEvidence {
+		if ev := prepareEvidence(content, e.auditCfg); ev != "" {
+			fields["evidence"] = ev
+		}
+	}
+	e.auditLogger.EmitFromContext(ctx, coreruntime.AuditEvent{
+		Event:  coreruntime.AuditGuardrail,
+		Fields: fields,
+	})
+}