diff --git a/.env.example b/.env.example
index 6a4e090..7f3ff44 100644
--- a/.env.example
+++ b/.env.example
@@ -269,6 +269,18 @@ AGENTS_DEFAULT_MODEL=haiku
 AGENTS_MAX_STEPS=15
 AGENTS_TIMEOUT=300000
 
+# ==============================================================================
+# Task Decomposition (opt-in; requires AGENTS_ENABLED=true)
+# ==============================================================================
+# Breaks complex, divisible tasks into focused subtasks run with isolated
+# context (parallel where independent), then synthesizes the result. A cost-aware
+# gate decides WHEN to decompose — decomposition can cost MORE than it saves on
+# small/indivisible tasks, so it only triggers on complex, large, divisible work.
+# Exposed as the DecomposeTask tool. All other settings (models, gate thresholds,
+# shadow mode) are hardcoded in src/config/index.js.
+
+TASK_DECOMPOSITION_ENABLED=false
+
 # ==============================================================================
 # MCP Sandbox Configuration
 # ==============================================================================
diff --git a/Dockerfile b/Dockerfile
index 29adbf0..bc2b111 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -23,7 +23,7 @@ FROM node:24-alpine AS runtime
 
 ARG VCS_REF
 ARG BUILD_DATE
-ARG VERSION=9.5.0
+ARG VERSION=9.6.0
 
 LABEL org.opencontainers.image.title="Lynkr" \
       org.opencontainers.image.description="Universal LLM proxy for Claude Code, Cursor, and AI coding tools" \
diff --git a/docker-compose.yml b/docker-compose.yml
index 88a255d..e161cec 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,7 +3,7 @@ services:
   lynkr:
     build: .
     container_name: lynkr
-    image: lynkr:9.5.0
+    image: lynkr:9.6.0
     ports:
       - "8081:8081"
     extra_hosts:
@@ -329,7 +329,7 @@ services:
       retries: 3
       start_period: 40s
     labels:
-      - "com.lynkr.version=9.5.0"
+      - "com.lynkr.version=9.6.0"
       - "com.lynkr.description=Claude Code proxy with multi-provider support"
     # Uncomment to set resource limits
     # deploy:
diff --git a/package.json b/package.json
index 3f2df02..a46fee0 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "lynkr",
-  "version": "9.5.0",
+  "version": "9.6.0",
   "description": "Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.",
   "main": "index.js",
   "bin": {
@@ -16,7 +16,7 @@
     "dev": "nodemon index.js",
     "lint": "eslint src index.js",
     "test": "npm run test:unit && npm run test:performance",
-    "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/toon-compression.test.js test/llamacpp-integration.test.js test/resilience.test.js test/telemetry-routing.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js test/distill.test.js test/large-payload.test.js test/code-mode.test.js test/prompt-cache-injection.test.js test/risk-analyzer.test.js test/interaction-block.test.js test/preflight.test.js test/token-reduction.test.js test/session-affinity.test.js test/model-registry-cost.test.js",
+    "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/toon-compression.test.js test/llamacpp-integration.test.js test/resilience.test.js test/telemetry-routing.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js test/distill.test.js test/large-payload.test.js test/code-mode.test.js test/prompt-cache-injection.test.js test/risk-analyzer.test.js test/interaction-block.test.js test/preflight.test.js test/token-reduction.test.js test/session-affinity.test.js test/model-registry-cost.test.js test/task-decomposition.test.js test/output-format-guard.test.js test/tier-fallback.test.js",
     "test:memory": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
     "test:new-features": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js",
     "test:performance": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/hybrid-routing-performance.test.js && DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-tests.js",
diff --git a/src/agents/decomposition/dispatcher.js b/src/agents/decomposition/dispatcher.js
new file mode 100644
index 0000000..09b05cc
--- /dev/null
+++ b/src/agents/decomposition/dispatcher.js
@@ -0,0 +1,185 @@
+/**
+ * Subtask dispatcher (Phase 3).
+ *
+ * Executes a validated plan respecting its dependency DAG:
+ *   - subtasks are grouped into topological "levels" (Kahn's algorithm)
+ *   - subtasks in the same level have no dependency on each other → run in
+ *     parallel via the existing ParallelCoordinator (spawnParallel)
+ *   - a subtask receives ONLY its own prompt plus the compressed results of the
+ *     subtasks it depends on (context isolation — the token win)
+ *
+ * The spawn functions are injectable for testing.
+ */
+
+const logger = require("../../logger");
+
+// Cap how much of a dependency's result we forward, to preserve the
+// context-isolation savings (subagents already return summaries; this bounds
+// pathological cases).
+const MAX_CONTEXT_CHARS = 2000;
+
+/**
+ * Group subtasks into dependency levels. Returns array of arrays of ids.
+ * Throws if the graph is unresolvable (should not happen — planner validated).
+ */
+function topologicalLevels(subtasks) {
+  const byId = new Map(subtasks.map((s) => [s.id, s]));
+  const indegree = new Map(subtasks.map((s) => [s.id, 0]));
+  const dependents = new Map(subtasks.map((s) => [s.id, []]));
+
+  for (const s of subtasks) {
+    for (const dep of s.dependsOn) {
+      if (!byId.has(dep)) continue;
+      indegree.set(s.id, indegree.get(s.id) + 1);
+      dependents.get(dep).push(s.id);
+    }
+  }
+
+  const levels = [];
+  let frontier = subtasks.filter((s) => indegree.get(s.id) === 0).map((s) => s.id);
+  const resolved = new Set();
+
+  while (frontier.length > 0) {
+    levels.push(frontier);
+    const next = [];
+    for (const id of frontier) {
+      resolved.add(id);
+      for (const child of dependents.get(id)) {
+        indegree.set(child, indegree.get(child) - 1);
+        if (indegree.get(child) === 0) next.push(child);
+      }
+    }
+    frontier = next;
+  }
+
+  if (resolved.size !== subtasks.length) {
+    throw new Error("Unresolvable subtask graph (cycle or dangling dependency)");
+  }
+  return levels;
+}
+
+function compressResult(text) {
+  if (typeof text !== "string") return String(text ?? "");
+  if (text.length <= MAX_CONTEXT_CHARS) return text;
+  return text.slice(0, MAX_CONTEXT_CHARS) + "\n…[truncated]";
+}
+
+function buildContextForSubtask(subtask, resultsById) {
+  if (!subtask.dependsOn || subtask.dependsOn.length === 0) return null;
+  const parts = [];
+  for (const dep of subtask.dependsOn) {
+    const r = resultsById.get(dep);
+    if (r && r.success && r.result) {
+      parts.push(`Result of subtask ${dep}:\n${compressResult(r.result)}`);
+    } else if (r) {
+      parts.push(`Subtask ${dep} did not complete successfully.`);
+    }
+  }
+  return parts.length > 0 ? parts.join("\n\n") : null;
+}
+
+/**
+ * Dispatch a validated plan.
+ * @param {Object} plan - { subtasks: [...] }
+ * @param {Object} [options]
+ * @param {string} [options.sessionId]
+ * @param {string} [options.cwd]
+ * @param {Function} [options.spawnParallel] - (agentTypes[], prompts[], opts) => results[]
+ * @returns {Promise<{results: Array, levels: Array, stats: Object}>}
+ */
+async function dispatchPlan(plan, options = {}) {
+  const spawnParallel = options.spawnParallel || require("../index").spawnParallel;
+  const subtasks = plan.subtasks;
+  const byId = new Map(subtasks.map((s) => [s.id, s]));
+  const levels = topologicalLevels(subtasks);
+  const resultsById = new Map();
+
+  let totalInputTokens = 0;
+  let totalOutputTokens = 0;
+  let totalSubagents = 0;
+
+  for (let li = 0; li < levels.length; li++) {
+    const levelIds = levels[li];
+    const levelSubtasks = levelIds.map((id) => byId.get(id));
+
+    const agentTypes = levelSubtasks.map((s) => s.agentType);
+    const prompts = levelSubtasks.map((s) => s.prompt);
+    const perTaskContext = levelSubtasks.map((s) => buildContextForSubtask(s, resultsById));
+
+    logger.info(
+      { level: li, count: levelIds.length, ids: levelIds },
+      "[Decomposition] Dispatching subtask level"
+    );
+
+    // spawnParallel shares one options object; pass per-task context by spawning
+    // the level as individual parallel calls when contexts differ.
+    const levelResults = await runLevel(
+      spawnParallel,
+      agentTypes,
+      prompts,
+      perTaskContext,
+      options
+    );
+
+    levelResults.forEach((res, idx) => {
+      const st = levelSubtasks[idx];
+      totalSubagents += 1;
+      totalInputTokens += res?.stats?.inputTokens || 0;
+      totalOutputTokens += res?.stats?.outputTokens || 0;
+      resultsById.set(st.id, {
+        id: st.id,
+        agentType: st.agentType,
+        success: !!res?.success,
+        result: res?.success ? res.result : null,
+        error: res?.success ? null : res?.error || "unknown error",
+        stats: res?.stats || {},
+      });
+    });
+  }
+
+  const results = subtasks.map((s) => resultsById.get(s.id));
+  return {
+    results,
+    levels,
+    stats: {
+      subagents: totalSubagents,
+      inputTokens: totalInputTokens,
+      outputTokens: totalOutputTokens,
+    },
+  };
+}
+
+/**
+ * Run one level. When subtasks in the level have differing injected contexts we
+ * spawn them as separate parallel calls (each with its own mainContext), then
+ * await all. When none need context, a single spawnParallel batch is used.
+ */
+async function runLevel(spawnParallel, agentTypes, prompts, perTaskContext, options) {
+  const anyContext = perTaskContext.some((c) => c);
+
+  if (!anyContext) {
+    return spawnParallel(agentTypes, prompts, {
+      sessionId: options.sessionId,
+      cwd: options.cwd,
+    });
+  }
+
+  // Mixed/with-context: one spawnParallel call per subtask so each gets its own
+  // mainContext, executed concurrently.
+  const calls = agentTypes.map((type, i) =>
+    spawnParallel([type], [prompts[i]], {
+      sessionId: options.sessionId,
+      cwd: options.cwd,
+      mainContext: perTaskContext[i] ? { relevant_context: perTaskContext[i] } : undefined,
+    }).then((arr) => arr[0])
+  );
+  return Promise.all(calls);
+}
+
+module.exports = {
+  dispatchPlan,
+  topologicalLevels,
+  buildContextForSubtask,
+  compressResult,
+  MAX_CONTEXT_CHARS,
+};
diff --git a/src/agents/decomposition/gate.js b/src/agents/decomposition/gate.js
new file mode 100644
index 0000000..0182f14
--- /dev/null
+++ b/src/agents/decomposition/gate.js
@@ -0,0 +1,136 @@
+/**
+ * Decomposition gate (Phase 1).
+ *
+ * Decides whether breaking a task into isolated-context subtasks is actually
+ * worth it. This is the make-or-break of the feature: naive "decompose
+ * everything" loses money, because every subagent carries fixed overhead
+ * (planning + per-agent handoff/summarisation). Decomposition only pays off
+ * when the task is (a) genuinely complex, (b) large enough to amortise that
+ * overhead, and (c) divisible into reasonably independent units.
+ *
+ * Pure and synchronous so it can be unit-tested without a model. The caller
+ * supplies a pre-computed complexity `analysis` (from routing/complexity-analyzer)
+ * and the raw payload.
+ */
+
+const DEFAULTS = {
+  minComplexity: 60, // 0-100; only decompose genuinely complex work
+  minTokens: 3000, // estimated monolithic tokens; below this the overhead wins
+  minIndependentUnits: 2, // need at least 2 separable pieces to bother
+  maxSubtasks: 6,
+};
+
+const ENUMERATION_RE = /^\s*(?:[-*+]|\d+[.)]|step\s+\d+\b)/gim;
+const CONJUNCTION_RE = /\b(?:and then|then|also|additionally|as well as|after that|finally|next,)\b/gi;
+const IMPERATIVE_RE = /\b(?:add|create|build|implement|write|refactor|update|fix|remove|delete|migrate|test|document|configure|set up|wire|integrate|generate)\b/gi;
+const FILE_PATH_RE = /\b[\w./-]+\.(?:js|ts|tsx|jsx|py|go|rs|java|rb|c|cpp|h|json|yaml|yml|md|sql|sh|css|html)\b/gi;
+
+function _uniqueMatches(text, re) {
+  const set = new Set();
+  const matches = text.match(re) || [];
+  for (const m of matches) set.add(m.toLowerCase().trim());
+  return set;
+}
+
+/**
+ * Heuristically estimate how many independent units a task contains.
+ * Conservative: takes the strongest of several weak signals rather than summing
+ * them, so a single rambling sentence doesn't look like five subtasks.
+ * @param {string} text
+ * @returns {number}
+ */
+function estimateIndependentUnits(text) {
+  if (!text || typeof text !== "string") return 1;
+
+  const enumerated = (text.match(ENUMERATION_RE) || []).length;
+  const conjunctions = (text.match(CONJUNCTION_RE) || []).length;
+  const imperatives = _uniqueMatches(text, IMPERATIVE_RE).size;
+  const files = _uniqueMatches(text, FILE_PATH_RE).size;
+
+  // Each signal is an independent lower-bound estimate of separable units.
+  const signals = [
+    enumerated, // explicit list items
+    conjunctions + 1, // "do A and then B" → 2 units
+    imperatives, // distinct action verbs
+    files, // distinct files usually map to distinct work
+  ];
+
+  const estimate = Math.max(...signals, 1);
+  return estimate;
+}
+
+/**
+ * Decide whether to decompose.
+ * @param {Object} analysis - result of analyzeComplexity(payload)
+ * @param {Object} payload - the request payload
+ * @param {Object} [options]
+ * @param {Object} [options.config] - threshold overrides (see DEFAULTS)
+ * @param {string} [options.riskLevel] - 'low'|'medium'|'high'; 'high' disables decomposition
+ * @param {string} [options.taskText] - explicit task text (else derived from analysis/payload)
+ * @returns {{ decompose: boolean, reason: string, signals: Object }}
+ */
+function shouldDecompose(analysis, payload = {}, options = {}) {
+  const cfg = { ...DEFAULTS, ...(options.config || {}) };
+
+  const score = Number(analysis?.score ?? 0);
+  const estimatedTokens = Number(
+    analysis?.breakdown?.tokens?.estimated ?? options.estimatedTokens ?? 0
+  );
+
+  const taskText =
+    options.taskText ||
+    analysis?.content ||
+    _firstUserText(payload) ||
+    "";
+
+  const independentUnits = estimateIndependentUnits(taskText);
+
+  const signals = {
+    score,
+    estimatedTokens,
+    independentUnits,
+    riskLevel: options.riskLevel || "low",
+    thresholds: cfg,
+  };
+
+  // Never decompose high-risk work — keep it in one capable context where the
+  // exempt-from-laziness concerns (validation/security) stay coherent.
+  if (options.riskLevel === "high") {
+    return { decompose: false, reason: "high_risk_skip", signals };
+  }
+
+  if (score < cfg.minComplexity) {
+    return { decompose: false, reason: "below_complexity_threshold", signals };
+  }
+
+  if (estimatedTokens < cfg.minTokens) {
+    return { decompose: false, reason: "too_small_to_amortise_overhead", signals };
+  }
+
+  if (independentUnits < cfg.minIndependentUnits) {
+    return { decompose: false, reason: "not_divisible", signals };
+  }
+
+  return { decompose: true, reason: "decompose_worthwhile", signals };
+}
+
+function _firstUserText(payload) {
+  const messages = payload?.messages;
+  if (!Array.isArray(messages)) return "";
+  const user = [...messages].reverse().find((m) => m.role === "user");
+  if (!user) return "";
+  if (typeof user.content === "string") return user.content;
+  if (Array.isArray(user.content)) {
+    return user.content
+      .filter((b) => b?.type === "text" || typeof b?.text === "string")
+      .map((b) => b.text || "")
+      .join("\n");
+  }
+  return "";
+}
+
+module.exports = {
+  shouldDecompose,
+  estimateIndependentUnits,
+  DEFAULTS,
+};
diff --git a/src/agents/decomposition/index.js b/src/agents/decomposition/index.js
new file mode 100644
index 0000000..df220db
--- /dev/null
+++ b/src/agents/decomposition/index.js
@@ -0,0 +1,183 @@
+/**
+ * Task decomposition — orchestration entry point.
+ *
+ * Ties the phases together:
+ *   1. gate        — decide if decomposing is worth it (cost-aware)
+ *   2. planner     — produce a validated subtask DAG (one model call)
+ *   3. dispatcher  — run subtasks (parallel within dependency levels, isolated context)
+ *   4. synthesizer — combine results into the final answer (one model call)
+ *   5. quality     — confidence-score the synthesis; flag low-confidence output
+ *   6. telemetry   — record decision + estimated net token savings; shadow mode
+ *
+ * Opt-in via TASK_DECOMPOSITION_ENABLED=true. Requires AGENTS_ENABLED=true
+ * (it builds on the subagent machinery). Any failure degrades gracefully to a
+ * non-decomposed result so the caller can solve monolithically.
+ */
+
+const config = require("../../config");
+const logger = require("../../logger");
+const { shouldDecompose } = require("./gate");
+const { generatePlan } = require("./planner");
+const { dispatchPlan } = require("./dispatcher");
+const { synthesize } = require("./synthesizer");
+const telemetry = require("./telemetry");
+const { analyzeComplexity } = require("../../routing/complexity-analyzer");
+const confidenceScorer = require("../../routing/confidence-scorer");
+
+const CODE_HINT_RE = /\b(code|function|implement|refactor|bug|class|api|module|test)\b/i;
+
+function getConfig() {
+  return (
+    config.taskDecomposition || {
+      enabled: false,
+      shadow: false,
+      planModel: "sonnet",
+      synthModel: "sonnet",
+      minConfidence: 0.5,
+      gate: {},
+    }
+  );
+}
+
+/**
+ * @param {string} task - the task text to (maybe) decompose
+ * @param {Object} [options]
+ * @param {string} [options.sessionId]
+ * @param {string} [options.cwd]
+ * @param {string} [options.riskLevel] - 'high' disables decomposition
+ * @param {Object} [options._inject] - test seams { generatePlan, dispatchPlan, synthesize, analyze }
+ * @returns {Promise<Object>} result object (see below)
+ */
+async function runDecomposedTask(task, options = {}) {
+  const cfg = getConfig();
+  const inject = options._inject || {};
+
+  if (!cfg.enabled) {
+    return { decomposed: false, reason: "disabled" };
+  }
+  if (!config.agents?.enabled) {
+    return { decomposed: false, reason: "agents_disabled" };
+  }
+  if (!task || typeof task !== "string") {
+    return { decomposed: false, reason: "empty_task" };
+  }
+
+  const payload = { messages: [{ role: "user", content: task }] };
+
+  let analysis;
+  try {
+    analysis = await (inject.analyze || analyzeComplexity)(payload);
+  } catch (err) {
+    logger.warn({ err: err.message }, "[Decomposition] Complexity analysis failed");
+    return { decomposed: false, reason: "analysis_failed" };
+  }
+
+  const monolithicTokens =
+    analysis?.breakdown?.tokens?.estimated || telemetry.estimateTokens(task);
+
+  const gate = shouldDecompose(analysis, payload, {
+    config: cfg.gate,
+    riskLevel: options.riskLevel,
+    taskText: task,
+  });
+
+  // Shadow mode: record what we WOULD do, but never actually decompose.
+  if (cfg.shadow) {
+    telemetry.record({
+      mode: "shadow",
+      sessionId: options.sessionId,
+      gate,
+      monolithicTokens,
+    });
+    return { decomposed: false, reason: "shadow_mode", gate };
+  }
+
+  if (!gate.decompose) {
+    telemetry.record({ mode: "live", decision: "skip", gate, monolithicTokens });
+    return { decomposed: false, reason: gate.reason, gate };
+  }
+
+  // Phase 2: plan
+  const plan = await (inject.generatePlan || generatePlan)({
+    task,
+    model: cfg.planModel,
+    maxSubtasks: cfg.gate?.maxSubtasks || 6,
+  });
+  if (!plan) {
+    telemetry.record({ mode: "live", decision: "plan_failed", gate, monolithicTokens });
+    return { decomposed: false, reason: "plan_failed", gate };
+  }
+
+  // Phase 3: dispatch
+  const dispatch = await (inject.dispatchPlan || dispatchPlan)(plan, {
+    sessionId: options.sessionId,
+    cwd: options.cwd,
+  });
+
+  // Phase 4: synthesize
+  const synth = await (inject.synthesize || synthesize)({
+    task,
+    subtaskResults: dispatch.results,
+    model: cfg.synthModel,
+  });
+
+  // Phase 5: quality gate
+  const taskType = CODE_HINT_RE.test(task) ? "code" : "reasoning";
+  let confidence = 1;
+  try {
+    confidence = await confidenceScorer.score(
+      { content: [{ type: "text", text: synth.text }] },
+      { taskType }
+    );
+  } catch (err) {
+    logger.debug({ err: err.message }, "[Decomposition] Confidence scoring failed");
+  }
+  const belowThreshold = confidence < (cfg.minConfidence ?? 0.5);
+
+  const savings = telemetry.estimateSavings({
+    monolithicTokens,
+    planUsage: plan.usage,
+    dispatchStats: dispatch.stats,
+    synthUsage: synth.usage,
+  });
+
+  telemetry.record({
+    mode: "live",
+    decision: "decomposed",
+    sessionId: options.sessionId,
+    gate,
+    subtasks: plan.subtasks.length,
+    levels: dispatch.levels.length,
+    strategy: plan.strategy,
+    confidence,
+    belowThreshold,
+    synthesisFallback: synth.fallback,
+    savings,
+  });
+
+  logger.info(
+    {
+      subtasks: plan.subtasks.length,
+      levels: dispatch.levels.length,
+      confidence: confidence.toFixed(2),
+      savedTokens: savings.savedTokens,
+    },
+    "[Decomposition] Task decomposed"
+  );
+
+  return {
+    decomposed: true,
+    result: synth.text,
+    reason: "decomposed",
+    plan,
+    subtaskResults: dispatch.results,
+    quality: { confidence, belowThreshold, taskType },
+    // When confidence is low, the caller should prefer a monolithic re-solve.
+    recommendFallback: belowThreshold,
+    stats: { ...dispatch.stats, levels: dispatch.levels.length },
+    savings,
+    gate,
+  };
+}
+
+module.exports = { runDecomposedTask, getConfig };
diff --git a/src/agents/decomposition/model-call.js b/src/agents/decomposition/model-call.js
new file mode 100644
index 0000000..5e9760f
--- /dev/null
+++ b/src/agents/decomposition/model-call.js
@@ -0,0 +1,75 @@
+/**
+ * Thin model-call helper for the decomposition planner/synthesizer.
+ *
+ * Mirrors the provider-forcing logic in agents/executor.js so planning and
+ * synthesis use the configured MODEL_PROVIDER rather than hard-falling back to
+ * Azure. The actual invoker is injectable (`opts.invoke`) so the planner and
+ * synthesizer can be unit-tested without a live provider.
+ */
+
+const logger = require("../../logger");
+
+function resolveForceProvider(model) {
+  const modelLower = String(model || "").toLowerCase();
+  const isClaudeFamily =
+    modelLower.includes("claude") ||
+    modelLower.includes("sonnet") ||
+    modelLower.includes("haiku") ||
+    modelLower.includes("opus");
+  const isGptFamily = modelLower.includes("gpt");
+
+  if (isClaudeFamily || isGptFamily) {
+    const config = require("../../config");
+    return config.modelProvider?.type || config.modelProvider?.provider || null;
+  }
+  return null;
+}
+
+/**
+ * Call the model and return the Anthropic-format response JSON.
+ * @param {Object} params
+ * @param {Array} params.messages
+ * @param {string} params.model
+ * @param {number} [params.maxTokens=2048]
+ * @param {number} [params.temperature=0.2]
+ * @param {Function} [params.invoke] - injectable invoker (default: clients/databricks.invokeModel)
+ * @returns {Promise<Object>} Anthropic-format response JSON
+ */
+async function callModel({ messages, model, maxTokens = 2048, temperature = 0.2, invoke } = {}) {
+  const invoker = invoke || require("../../clients/databricks").invokeModel;
+  const payload = {
+    model,
+    messages,
+    max_tokens: maxTokens,
+    temperature,
+  };
+  const forceProvider = resolveForceProvider(model);
+
+  const response = await invoker(payload, { forceProvider });
+  if (!response || !response.json) {
+    throw new Error("Invalid model response in decomposition model-call");
+  }
+  return response.json;
+}
+
+/**
+ * Extract concatenated text from an Anthropic-format response.
+ */
+function extractText(responseJson) {
+  const content = responseJson?.content;
+  if (!Array.isArray(content)) return "";
+  return content
+    .filter((b) => b?.type === "text")
+    .map((b) => b.text || "")
+    .join("\n")
+    .trim();
+}
+
+function sumUsage(responseJson) {
+  return {
+    inputTokens: responseJson?.usage?.input_tokens || 0,
+    outputTokens: responseJson?.usage?.output_tokens || 0,
+  };
+}
+
+module.exports = { callModel, extractText, sumUsage, resolveForceProvider, logger };
diff --git a/src/agents/decomposition/planner.js b/src/agents/decomposition/planner.js
new file mode 100644
index 0000000..b18457c
--- /dev/null
+++ b/src/agents/decomposition/planner.js
@@ -0,0 +1,223 @@
+/**
+ * Decomposition planner (Phase 2).
+ *
+ * Turns a complex task into a small subtask DAG using a single model call
+ * (plan-and-solve style — plan generated in one shot). Output is validated:
+ * ids unique, dependencies reference real ids, no cycles, subtask count capped.
+ * If anything fails to parse/validate the planner returns null and the caller
+ * falls back to a monolithic solve.
+ */
+
+const { callModel, extractText } = require("./model-call");
+const logger = require("../../logger");
+
+// Agent types the dispatcher knows how to spawn. Planner is steered toward
+// these; unknown types are coerced to general-purpose at dispatch time.
+const KNOWN_AGENT_TYPES = [
+  "Explore",
+  "Plan",
+  "general-purpose",
+  "Test",
+  "Debug",
+  "Fix",
+  "Refactor",
+  "Documentation",
+];
+
+function buildPlannerPrompt(task, maxSubtasks) {
+  return `You are a task-decomposition planner. Break the task below into the SMALLEST set of focused subtasks that can be solved with isolated context. Fewer is better — do NOT over-split. If the task is not genuinely divisible, return a single subtask.
+
+Rules:
+- At most ${maxSubtasks} subtasks.
+- Each subtask must be independently solvable given only its prompt plus the results of the subtasks it depends on.
+- Mark dependencies via "dependsOn" (array of subtask ids). Independent subtasks (empty dependsOn) will run in parallel.
+- Prefer assigning each subtask an agent type from: ${KNOWN_AGENT_TYPES.join(", ")}.
+- Keep each subtask prompt self-contained and specific.
+
+Respond with ONLY a JSON object, no prose, in exactly this shape:
+{
+  "strategy": "one short sentence on how you split it",
+  "subtasks": [
+    { "id": "s1", "agentType": "Explore", "prompt": "...", "dependsOn": [] }
+  ]
+}
+
+TASK:
+${task}`;
+}
+
+/**
+ * Extract the first balanced JSON object from a string (handles models that
+ * wrap JSON in prose or ```json fences).
+ */
+function extractJsonObject(text) {
+  if (!text) return null;
+  const fenced = text.match(/```(?:json)?\s*([\s\S]*?)```/i);
+  const candidate = fenced ? fenced[1] : text;
+
+  const start = candidate.indexOf("{");
+  if (start === -1) return null;
+
+  let depth = 0;
+  let inString = false;
+  let escape = false;
+  for (let i = start; i < candidate.length; i++) {
+    const ch = candidate[i];
+    if (escape) {
+      escape = false;
+      continue;
+    }
+    if (ch === "\\") {
+      escape = true;
+      continue;
+    }
+    if (ch === '"') {
+      inString = !inString;
+      continue;
+    }
+    if (inString) continue;
+    if (ch === "{") depth++;
+    else if (ch === "}") {
+      depth--;
+      if (depth === 0) {
+        const slice = candidate.slice(start, i + 1);
+        try {
+          return JSON.parse(slice);
+        } catch {
+          return null;
+        }
+      }
+    }
+  }
+  return null;
+}
+
+/**
+ * Validate and normalise a parsed plan. Returns a clean plan or null.
+ */
+function validatePlan(parsed, maxSubtasks) {
+  if (!parsed || !Array.isArray(parsed.subtasks) || parsed.subtasks.length === 0) {
+    return null;
+  }
+
+  const subtasks = [];
+  const seenIds = new Set();
+
+  for (let i = 0; i < parsed.subtasks.length && subtasks.length < maxSubtasks; i++) {
+    const st = parsed.subtasks[i];
+    if (!st || typeof st.prompt !== "string" || st.prompt.trim().length === 0) {
+      return null; // malformed subtask → reject whole plan, fall back
+    }
+    const id = typeof st.id === "string" && st.id.trim() ? st.id.trim() : `s${i + 1}`;
+    if (seenIds.has(id)) return null; // duplicate ids
+    seenIds.add(id);
+
+    const agentType = KNOWN_AGENT_TYPES.includes(st.agentType)
+      ? st.agentType
+      : "general-purpose";
+
+    const dependsOn = Array.isArray(st.dependsOn)
+      ? st.dependsOn.filter((d) => typeof d === "string")
+      : [];
+
+    subtasks.push({ id, agentType, prompt: st.prompt.trim(), dependsOn });
+  }
+
+  // Dependencies must reference real ids and contain no cycles.
+  for (const st of subtasks) {
+    for (const dep of st.dependsOn) {
+      if (!seenIds.has(dep)) return null; // dangling dependency
+    }
+  }
+  if (hasCycle(subtasks)) return null;
+
+  return {
+    strategy: typeof parsed.strategy === "string" ? parsed.strategy : "",
+    subtasks,
+  };
+}
+
+/**
+ * Cycle detection via DFS colouring.
+ */
+function hasCycle(subtasks) {
+  const byId = new Map(subtasks.map((s) => [s.id, s]));
+  const state = new Map(); // id → 0 unvisited, 1 visiting, 2 done
+
+  function visit(id) {
+    const cur = state.get(id) || 0;
+    if (cur === 1) return true; // back-edge → cycle
+    if (cur === 2) return false;
+    state.set(id, 1);
+    const node = byId.get(id);
+    for (const dep of node?.dependsOn || []) {
+      if (visit(dep)) return true;
+    }
+    state.set(id, 2);
+    return false;
+  }
+
+  for (const s of subtasks) {
+    if (visit(s.id)) return true;
+  }
+  return false;
+}
+
+/**
+ * Generate a validated plan for a task.
+ * @param {Object} params
+ * @param {string} params.task
+ * @param {string} [params.model="sonnet"] - planning needs reasoning; use a capable model
+ * @param {number} [params.maxSubtasks=6]
+ * @param {Function} [params.invoke] - injectable model invoker (for tests)
+ * @returns {Promise<{strategy:string, subtasks:Array, usage:Object}|null>}
+ */
+async function generatePlan({ task, model = "sonnet", maxSubtasks = 6, invoke } = {}) {
+  if (!task || typeof task !== "string") return null;
+
+  let responseJson;
+  try {
+    responseJson = await callModel({
+      messages: [{ role: "user", content: buildPlannerPrompt(task, maxSubtasks) }],
+      model,
+      maxTokens: 1500,
+      temperature: 0.1,
+      invoke,
+    });
+  } catch (err) {
+    logger.warn({ err: err.message }, "[Decomposition] Planner model call failed");
+    return null;
+  }
+
+  const text = extractText(responseJson);
+  const parsed = extractJsonObject(text);
+  const plan = validatePlan(parsed, maxSubtasks);
+
+  if (!plan) {
+    logger.warn(
+      { preview: text.slice(0, 200) },
+      "[Decomposition] Plan failed validation — will fall back to monolithic"
+    );
+    return null;
+  }
+
+  plan.usage = {
+    inputTokens: responseJson?.usage?.input_tokens || 0,
+    outputTokens: responseJson?.usage?.output_tokens || 0,
+  };
+
+  logger.info(
+    { subtasks: plan.subtasks.length, strategy: plan.strategy },
+    "[Decomposition] Plan generated"
+  );
+  return plan;
+}
+
+module.exports = {
+  generatePlan,
+  validatePlan,
+  extractJsonObject,
+  hasCycle,
+  buildPlannerPrompt,
+  KNOWN_AGENT_TYPES,
+};
diff --git a/src/agents/decomposition/synthesizer.js b/src/agents/decomposition/synthesizer.js
new file mode 100644
index 0000000..72afa02
--- /dev/null
+++ b/src/agents/decomposition/synthesizer.js
@@ -0,0 +1,89 @@
+/**
+ * Result synthesizer (Phase 4).
+ *
+ * Combines the (compressed) results of all subtasks into one coherent final
+ * answer to the original task. Single model call. If synthesis fails, the
+ * caller falls back to concatenating the subtask results.
+ */
+
+const { callModel, extractText } = require("./model-call");
+const logger = require("../../logger");
+
+const MAX_RESULT_CHARS = 4000;
+
+function buildSynthesisPrompt(task, subtaskResults) {
+  const blocks = subtaskResults
+    .map((r) => {
+      const status = r.success ? "OK" : `FAILED (${r.error})`;
+      const body = r.success
+        ? truncate(r.result, MAX_RESULT_CHARS)
+        : "(no result)";
+      return `### Subtask ${r.id} [${r.agentType}] — ${status}\n${body}`;
+    })
+    .join("\n\n");
+
+  return `You are synthesizing the results of several subtasks into one final answer for the original request. Integrate the findings into a single coherent response. Resolve overlaps, note any subtask that failed, and do not invent results that no subtask produced.
+
+ORIGINAL TASK:
+${task}
+
+SUBTASK RESULTS:
+${blocks}
+
+Write the final answer now.`;
+}
+
+function truncate(text, max) {
+  if (typeof text !== "string") return String(text ?? "");
+  return text.length <= max ? text : text.slice(0, max) + "\n…[truncated]";
+}
+
+/**
+ * Concatenation fallback used when the synthesis model call fails.
+ */
+function concatFallback(subtaskResults) {
+  return subtaskResults
+    .filter((r) => r.success && r.result)
+    .map((r) => `## ${r.id} (${r.agentType})\n${r.result}`)
+    .join("\n\n");
+}
+
+/**
+ * @param {Object} params
+ * @param {string} params.task
+ * @param {Array} params.subtaskResults - from dispatcher
+ * @param {string} [params.model="sonnet"]
+ * @param {Function} [params.invoke]
+ * @returns {Promise<{text:string, fallback:boolean, usage:Object}>}
+ */
+async function synthesize({ task, subtaskResults, model = "sonnet", invoke } = {}) {
+  const anySuccess = subtaskResults.some((r) => r.success);
+  if (!anySuccess) {
+    return { text: "All subtasks failed; no result could be produced.", fallback: true, usage: {} };
+  }
+
+  try {
+    const responseJson = await callModel({
+      messages: [{ role: "user", content: buildSynthesisPrompt(task, subtaskResults) }],
+      model,
+      maxTokens: 4096,
+      temperature: 0.3,
+      invoke,
+    });
+    const text = extractText(responseJson);
+    if (!text) throw new Error("Empty synthesis");
+    return {
+      text,
+      fallback: false,
+      usage: {
+        inputTokens: responseJson?.usage?.input_tokens || 0,
+        outputTokens: responseJson?.usage?.output_tokens || 0,
+      },
+    };
+  } catch (err) {
+    logger.warn({ err: err.message }, "[Decomposition] Synthesis failed — concatenating results");
+    return { text: concatFallback(subtaskResults), fallback: true, usage: {} };
+  }
+}
+
+module.exports = { synthesize, buildSynthesisPrompt, concatFallback };
diff --git a/src/agents/decomposition/telemetry.js b/src/agents/decomposition/telemetry.js
new file mode 100644
index 0000000..6500351
--- /dev/null
+++ b/src/agents/decomposition/telemetry.js
@@ -0,0 +1,55 @@
+/**
+ * Decomposition telemetry + shadow mode (Phase 6).
+ *
+ * Appends one JSON line per decomposition decision to
+ * data/decomposition-decisions.jsonl so the net token effect can be audited.
+ * Because the research is clear that decomposition can COST more than it saves,
+ * a shadow mode (TASK_DECOMPOSITION_SHADOW=true) runs the gate + records what it
+ * WOULD have done without actually decomposing — so savings can be validated on
+ * real traffic before enabling for real.
+ */
+
+const fs = require("fs");
+const path = require("path");
+const logger = require("../../logger");
+
+const LOG_PATH = path.join(__dirname, "../../../data/decomposition-decisions.jsonl");
+
+function estimateTokens(text) {
+  if (typeof text !== "string") return 0;
+  return Math.ceil(text.length / 4);
+}
+
+/**
+ * Rough net-savings estimate.
+ * monolithic ≈ what one big context would have cost (estimated input tokens).
+ * decomposed ≈ planning + Σ(subagent in+out) + synthesis.
+ * Positive `savedTokens` = decomposition was cheaper.
+ */
+function estimateSavings({ monolithicTokens, planUsage, dispatchStats, synthUsage }) {
+  const decomposed =
+    (planUsage?.inputTokens || 0) +
+    (planUsage?.outputTokens || 0) +
+    (dispatchStats?.inputTokens || 0) +
+    (dispatchStats?.outputTokens || 0) +
+    (synthUsage?.inputTokens || 0) +
+    (synthUsage?.outputTokens || 0);
+  return {
+    monolithicTokens: monolithicTokens || 0,
+    decomposedTokens: decomposed,
+    savedTokens: (monolithicTokens || 0) - decomposed,
+  };
+}
+
+function record(entry) {
+  const line = { timestamp: Date.now(), ...entry };
+  try {
+    fs.mkdirSync(path.dirname(LOG_PATH), { recursive: true });
+    fs.appendFileSync(LOG_PATH, JSON.stringify(line) + "\n");
+  } catch (err) {
+    logger.debug({ err: err.message }, "[Decomposition] Telemetry append failed");
+  }
+  return line;
+}
+
+module.exports = { record, estimateSavings, estimateTokens, LOG_PATH };
diff --git a/src/clients/databricks.js b/src/clients/databricks.js
index 92f4ac4..6c2bac2 100644
--- a/src/clients/databricks.js
+++ b/src/clients/databricks.js
@@ -2185,6 +2185,13 @@ async function invokeModel(body, options = {}) {
   const { injectPromptCaching } = require('./prompt-cache-injection');
   injectPromptCaching(body, initialProvider);
 
+  // Always-on markdown formatting guard. Stops formatting-weak backends
+  // (Moonshot/Kimi, Ollama, etc.) from emitting mangled ASCII box-drawing
+  // "diagrams". Keyed off the routing-resolved provider/model; skipped for
+  // Claude-family backends which already format cleanly.
+  const { injectFormatGuard } = require('../context/output-format-guard');
+  injectFormatGuard(body, { provider: initialProvider, model: tierSelectedModel });
+
   // Build routing decision object for response headers
   const routingDecision = {
     provider: initialProvider,
@@ -2384,6 +2391,71 @@ async function invokeModel(body, options = {}) {
     healthTracker.recordFailure(initialProvider, err, err.status);
     getLatencyTracker().record(initialProvider, routingDecision?.model, failLatency);
 
+    // Tier-aware escalate-then-demote fallback (TIER_FALLBACK_ENABLED).
+    // On failure, try a MORE capable tier first (climb toward REASONING); only
+    // if every higher tier is unavailable do we fall downward to SIMPLE/local.
+    // Runs before the flat global fallback below and is never silent.
+    if (config.tierFallback?.enabled && !options._tierFallbackInner && routingDecision.tier) {
+      const { getFallbackChain } = require("../routing/tier-fallback");
+      const chain = getFallbackChain(routingDecision.tier);
+      for (const cand of chain) {
+        try {
+          logger.warn({
+            fromTier: routingDecision.tier,
+            fromProvider: initialProvider,
+            toTier: cand.tier,
+            toProvider: cand.provider,
+            toModel: cand.model,
+            direction: cand.direction,
+          }, "[TierFallback] Primary tier failed — attempting tier fallback");
+
+          const attempt = await invokeModel(
+            { ...body, _tierModel: cand.model },
+            {
+              forceProvider: cand.provider,
+              _tierFallbackInner: true,
+              disableFallback: true,
+              _cascadeInner: true,
+              workspace,
+              tenantPolicy,
+            }
+          );
+
+          metricsCollector.recordFallbackAttempt(initialProvider, cand.provider, "tier_fallback");
+          logger.warn({
+            servedTier: cand.tier,
+            servedProvider: cand.provider,
+            fromTier: routingDecision.tier,
+            direction: cand.direction,
+          }, "[TierFallback] Served by tier fallback");
+
+          return {
+            ...attempt,
+            actualProvider: cand.provider,
+            routingDecision: {
+              ...routingDecision,
+              provider: cand.provider,
+              model: cand.model,
+              servedTier: cand.tier,
+              fromTier: routingDecision.tier,
+              fallback: true,
+              fallbackDirection: cand.direction,
+              method: "tier_fallback",
+            },
+          };
+        } catch (innerErr) {
+          logger.warn(
+            { toProvider: cand.provider, error: innerErr.message },
+            "[TierFallback] Candidate failed, trying next"
+          );
+        }
+      }
+      logger.warn(
+        { fromTier: routingDecision.tier },
+        "[TierFallback] All tier candidates exhausted — falling through"
+      );
+    }
+
     // Check if we should fallback (any provider can fall back, not just ollama)
     const shouldFallback =
       isFallbackEnabled() &&
diff --git a/src/config/index.js b/src/config/index.js
index e4ac410..829be3a 100644
--- a/src/config/index.js
+++ b/src/config/index.js
@@ -518,6 +518,12 @@ const agentsDefaultModel = process.env.AGENTS_DEFAULT_MODEL ?? "haiku";
 const agentsMaxSteps = Number.parseInt(process.env.AGENTS_MAX_STEPS ?? "15", 10);
 const agentsTimeout = Number.parseInt(process.env.AGENTS_TIMEOUT ?? "120000", 10);
 
+// Task decomposition configuration (opt-in; builds on the agents subsystem).
+// Single env toggle; everything else is hardcoded below.
+const taskDecompositionEnabled = process.env.TASK_DECOMPOSITION_ENABLED === "true";
+
+// Tier-aware fallback (escalate-then-demote) is always on (hardcoded).
+
 // LLM Audit logging configuration
 const auditEnabled = process.env.LLM_AUDIT_ENABLED === "true"; // default false
 const auditLogFile = process.env.LLM_AUDIT_LOG_FILE ?? path.join(process.cwd(), "logs", "llm-audit.log");
@@ -775,6 +781,23 @@ var config = {
     maxSteps: Number.isNaN(agentsMaxSteps) ? 15 : agentsMaxSteps,
     timeout: Number.isNaN(agentsTimeout) ? 120000 : agentsTimeout,
   },
+  taskDecomposition: {
+    enabled: taskDecompositionEnabled, // only env-driven knob (TASK_DECOMPOSITION_ENABLED)
+    // Hardcoded defaults — tune here if needed.
+    shadow: false,
+    planModel: "sonnet",
+    synthModel: "sonnet",
+    minConfidence: 0.5,
+    gate: {
+      minComplexity: 60,
+      minTokens: 3000,
+      maxSubtasks: 6,
+      minIndependentUnits: 2,
+    },
+  },
+  tierFallback: {
+    enabled: true, // always on (hardcoded): escalate-then-demote on provider failure; floor = SIMPLE
+  },
   tests: {
     defaultCommand: testDefaultCommand ? testDefaultCommand.trim() : null,
     defaultArgs: testDefaultArgs,
diff --git a/src/context/output-format-guard.js b/src/context/output-format-guard.js
new file mode 100644
index 0000000..d1ee600
--- /dev/null
+++ b/src/context/output-format-guard.js
@@ -0,0 +1,99 @@
+/**
+ * Output Format Guard
+ *
+ * Appends a short formatting instruction to the system prompt so weaker
+ * backends (Moonshot/Kimi, Ollama, etc.) stop emitting mangled ASCII/Unicode
+ * box-drawing "diagrams" that render as garbage in clients. The actual backend
+ * is decided by tier routing — so even when the client asks for "claude-opus",
+ * the request may be served by a model that formats poorly. This normalizes the
+ * presentation without changing which model serves the request.
+ *
+ * Always-on (no env flag). It is skipped only for Claude-family backends, which
+ * already produce clean GitHub-flavored markdown — injecting there would just
+ * waste tokens. The skip biases toward injecting: if we can't tell, we inject
+ * (a false-inject is harmless ~50 tokens; a false-skip leaves the garble).
+ *
+ * Keyed off the ROUTING-RESOLVED provider/model, never the client's requested
+ * body.model (which is just a label once tier routing is on).
+ *
+ * @module context/output-format-guard
+ */
+
+const logger = require("../logger");
+
+const MARKER = "[fmt-guard]";
+
+// Model names that already produce clean markdown.
+const CLEAN_FORMATTER_RE = /\b(claude|sonnet|opus|haiku)\b/i;
+// Providers that are always Claude-backed.
+const CLEAN_PROVIDERS = new Set(["azure-anthropic"]);
+
+const GUARD_TEXT =
+  `${MARKER} Formatting rules for your response: use plain GitHub-flavored markdown only. ` +
+  "Do NOT draw diagrams or boxes with ASCII or Unicode line-drawing characters " +
+  "(such as ┌ ─ │ └ ├ ┤ ┬ ┴ ╔ ═ ║), and do NOT wrap headings or code in decorative borders. " +
+  "Represent structure and relationships with normal markdown headings, nested bullet lists, " +
+  "numbered lists, or tables. Use standard triple-backtick fenced code blocks for code. " +
+  "Keep code, file paths, commands, and URLs exact.";
+
+/**
+ * Whether the resolved backend already formats cleanly (→ skip injection).
+ * @param {string} provider - routing-resolved provider
+ * @param {string} model - routing-resolved model (NOT the client's requested model)
+ * @returns {boolean}
+ */
+function producesCleanMarkdown(provider, model) {
+  if (CLEAN_PROVIDERS.has(String(provider || "").toLowerCase())) return true;
+  if (model && CLEAN_FORMATTER_RE.test(String(model))) return true;
+  return false;
+}
+
+/**
+ * Append the guard text to a system prompt that may be a string or an array of
+ * Anthropic content blocks. Idempotent via MARKER. Pure — returns the new value.
+ */
+function appendToSystem(system, text) {
+  // String (or empty) system prompt.
+  if (system == null || typeof system === "string") {
+    const base = system || "";
+    if (base.includes(MARKER)) return base;
+    return base ? `${base}\n\n${text}` : text;
+  }
+  // Anthropic array-of-blocks system prompt.
+  if (Array.isArray(system)) {
+    const already = system.some(
+      (b) => typeof b?.text === "string" && b.text.includes(MARKER)
+    );
+    if (already) return system;
+    return [...system, { type: "text", text }];
+  }
+  return system;
+}
+
+/**
+ * Inject the formatting guard into body.system in place, unless the resolved
+ * backend already formats cleanly. Always-on; idempotent.
+ *
+ * @param {object} body - request body (mutated in place)
+ * @param {object} [opts]
+ * @param {string} [opts.provider] - routing-resolved provider
+ * @param {string} [opts.model] - routing-resolved model
+ * @returns {object} body
+ */
+function injectFormatGuard(body, opts = {}) {
+  if (!body) return body;
+  const { provider, model } = opts;
+  if (producesCleanMarkdown(provider, model)) return body;
+
+  body.system = appendToSystem(body.system, GUARD_TEXT);
+  logger.debug({ provider, model }, "[FormatGuard] Injected markdown formatting guard");
+  return body;
+}
+
+module.exports = {
+  injectFormatGuard,
+  producesCleanMarkdown,
+  appendToSystem,
+  MARKER,
+  GUARD_TEXT,
+};
diff --git a/src/routing/index.js b/src/routing/index.js
index b760fc3..2d3f5b5 100644
--- a/src/routing/index.js
+++ b/src/routing/index.js
@@ -697,6 +697,14 @@ function getRoutingHeaders(decision) {
     headers['X-Lynkr-Cost-Optimized'] = 'true';
   }
 
+  // Tier-aware fallback surfacing (never silent).
+  if (decision.fallback) {
+    headers['X-Lynkr-Fallback'] = 'true';
+    if (decision.fromTier) headers['X-Lynkr-Fallback-From-Tier'] = decision.fromTier;
+    if (decision.servedTier) headers['X-Lynkr-Served-Tier'] = decision.servedTier;
+    if (decision.fallbackDirection) headers['X-Lynkr-Fallback-Direction'] = decision.fallbackDirection;
+  }
+
   if (decision.risk?.level) {
     headers['X-Lynkr-Risk'] = decision.risk.level;
     const hits = Array.from(new Set([
diff --git a/src/routing/tier-fallback.js b/src/routing/tier-fallback.js
new file mode 100644
index 0000000..f4361f6
--- /dev/null
+++ b/src/routing/tier-fallback.js
@@ -0,0 +1,91 @@
+/**
+ * Tier-aware fallback chain (escalate-then-demote).
+ *
+ * When a tier's provider fails, prefer a MORE capable tier first (climb toward
+ * REASONING), and only if every higher tier is also unavailable do we fall
+ * downward — all the way to the local SIMPLE tier as a last resort. This biases
+ * for correctness/availability over cost, matching a conservative routing policy.
+ *
+ * Example ladder: SIMPLE → MEDIUM → COMPLEX → REASONING
+ *   - COMPLEX fails  → [REASONING, MEDIUM, SIMPLE]
+ *   - REASONING fails → [COMPLEX, MEDIUM, SIMPLE]
+ *   - MEDIUM fails    → [COMPLEX, REASONING, SIMPLE]
+ *
+ * Pure and dependency-injectable so it can be unit-tested without real providers.
+ */
+
+const logger = require("../logger");
+
+const TIER_ORDER = ["SIMPLE", "MEDIUM", "COMPLEX", "REASONING"];
+
+/** Default availability check: a provider is unavailable if its circuit is OPEN. */
+function defaultIsProviderAvailable(provider) {
+  try {
+    const { getCircuitBreakerRegistry } = require("../clients/circuit-breaker");
+    const registry = getCircuitBreakerRegistry();
+    const all = typeof registry.getAll === "function" ? registry.getAll() : [];
+    const entry = Array.isArray(all) ? all.find((b) => b.name === provider) : null;
+    return !(entry && entry.state === "OPEN");
+  } catch {
+    return true; // fail open — never block fallback on a health-check error
+  }
+}
+
+/** Resolve a tier name to { tier, provider, model }, or null if not configured. */
+function resolveTier(tier, selector) {
+  try {
+    const sel = selector || require("./model-tiers").getModelTierSelector();
+    const r = sel.selectModel(tier);
+    if (!r || !r.provider || !r.model) return null;
+    return { tier, provider: r.provider, model: r.model };
+  } catch {
+    return null; // tier not configured (TIER_<X> unset) — skip it
+  }
+}
+
+/**
+ * Build the escalate-then-demote fallback chain for a failed tier.
+ *
+ * @param {string} currentTier - the tier whose provider just failed
+ * @param {Object} [opts]
+ * @param {Object} [opts.selector] - model-tiers selector (injected for tests)
+ * @param {Function} [opts.isProviderAvailable] - (provider) => boolean (injected for tests)
+ * @returns {Array<{tier,provider,model,demotedFrom,direction}>} ordered candidates
+ */
+function getFallbackChain(currentTier, opts = {}) {
+  const isAvailable = opts.isProviderAvailable || defaultIsProviderAvailable;
+  const idx = TIER_ORDER.indexOf(currentTier);
+  if (idx === -1) return [];
+
+  const higher = TIER_ORDER.slice(idx + 1); // ascending toward REASONING
+  const lower = TIER_ORDER.slice(0, idx).reverse(); // descending toward SIMPLE
+  const ordered = [...higher, ...lower];
+
+  const seen = new Set();
+  // Never re-attempt the exact provider:model that just failed.
+  const current = resolveTier(currentTier, opts.selector);
+  if (current) seen.add(`${current.provider}:${current.model}`);
+
+  const chain = [];
+  for (const tier of ordered) {
+    const resolved = resolveTier(tier, opts.selector);
+    if (!resolved) continue;
+    const key = `${resolved.provider}:${resolved.model}`;
+    if (seen.has(key)) continue;
+    seen.add(key);
+    if (!isAvailable(resolved.provider)) continue;
+    chain.push({
+      ...resolved,
+      demotedFrom: currentTier,
+      direction: TIER_ORDER.indexOf(tier) > idx ? "up" : "down",
+    });
+  }
+
+  logger.debug(
+    { currentTier, chain: chain.map((c) => `${c.tier}:${c.provider}`) },
+    "[TierFallback] Built fallback chain"
+  );
+  return chain;
+}
+
+module.exports = { getFallbackChain, resolveTier, TIER_ORDER };
diff --git a/src/server.js b/src/server.js
index 8af4e98..7b99096 100644
--- a/src/server.js
+++ b/src/server.js
@@ -29,6 +29,7 @@ const { registerTaskTools } = require("./tools/tasks");
 const { registerTestTools } = require("./tools/tests");
 const { registerMcpTools } = require("./tools/mcp");
 const { registerAgentTaskTool } = require("./tools/agent-task");
+const { registerDecomposeTool } = require("./tools/decompose");
 const { initConfigWatcher, getConfigWatcher } = require("./config/watcher");
 const { initializeHeadroom, shutdownHeadroom, getHeadroomManager } = require("./headroom");
 const { getWorkerPool, isWorkerPoolReady } = require("./workers/pool");
@@ -62,6 +63,7 @@ if (LAZY_TOOLS_ENABLED) {
   registerTestTools();
   registerMcpTools();
   registerAgentTaskTool();
+  registerDecomposeTool();
   logger.info({ mode: "eager" }, "All tools loaded at startup");
 }
 
diff --git a/src/tools/decompose.js b/src/tools/decompose.js
new file mode 100644
index 0000000..5d30d88
--- /dev/null
+++ b/src/tools/decompose.js
@@ -0,0 +1,91 @@
+const { registerTool } = require(".");
+const { runDecomposedTask } = require("../agents/decomposition");
+const logger = require("../logger");
+
+/**
+ * DecomposeTask tool — breaks a complex task into focused subtasks with isolated
+ * context, runs them (parallel where independent), and synthesizes the result.
+ *
+ * Opt-in: requires TASK_DECOMPOSITION_ENABLED=true and AGENTS_ENABLED=true.
+ * Degrades gracefully — if the gate decides decomposition isn't worth it (or
+ * planning fails), it returns ok:true with decomposed:false and a reason so the
+ * caller can solve the task monolithically.
+ */
+function registerDecomposeTool() {
+  registerTool(
+    "DecomposeTask",
+    async ({ args = {} }, context = {}) => {
+      const task = args.task || args.prompt || args.description;
+
+      if (!task || typeof task !== "string") {
+        return {
+          ok: false,
+          status: 400,
+          content: JSON.stringify({ error: "task is required" }, null, 2),
+        };
+      }
+
+      logger.info(
+        { task: task.slice(0, 100), sessionId: context.sessionId },
+        "DecomposeTask: evaluating task for decomposition"
+      );
+
+      try {
+        const result = await runDecomposedTask(task, {
+          sessionId: context.sessionId,
+          cwd: context.cwd,
+          riskLevel: args.riskLevel || context.riskLevel,
+        });
+
+        if (result.decomposed) {
+          return {
+            ok: true,
+            status: 200,
+            content: result.result,
+            metadata: {
+              decomposed: true,
+              subtasks: result.plan?.subtasks?.length,
+              levels: result.stats?.levels,
+              strategy: result.plan?.strategy,
+              confidence: result.quality?.confidence,
+              recommendFallback: result.recommendFallback,
+              savedTokens: result.savings?.savedTokens,
+            },
+          };
+        }
+
+        // Not decomposed — signal the caller to solve monolithically.
+        return {
+          ok: true,
+          status: 200,
+          content: JSON.stringify(
+            {
+              decomposed: false,
+              reason: result.reason,
+              guidance: "Solve this task directly without decomposition.",
+            },
+            null,
+            2
+          ),
+          metadata: { decomposed: false, reason: result.reason },
+        };
+      } catch (error) {
+        logger.error({ error: error.message }, "DecomposeTask: error");
+        return {
+          ok: false,
+          status: 500,
+          content: JSON.stringify(
+            { error: "Decomposition error", message: error.message },
+            null,
+            2
+          ),
+        };
+      }
+    },
+    { category: "decompose" }
+  );
+
+  logger.info("DecomposeTask tool registered");
+}
+
+module.exports = { registerDecomposeTool };
diff --git a/src/tools/lazy-loader.js b/src/tools/lazy-loader.js
index fcb3e04..c03c9a6 100644
--- a/src/tools/lazy-loader.js
+++ b/src/tools/lazy-loader.js
@@ -77,6 +77,11 @@ const TOOL_CATEGORIES = {
     loader: () => require('./agent-task').registerAgentTaskTool,
     priority: 2,
   },
+  decompose: {
+    keywords: ['decompose', 'subtask', 'break down', 'break into', 'split task', 'plan and execute'],
+    loader: () => require('./decompose').registerDecomposeTool,
+    priority: 2,
+  },
   'code-mode': {
     keywords: ['mcp', 'execute', 'server', 'tool', 'code mode'],
     loader: () => require('./code-mode').registerCodeModeTools,
@@ -296,6 +301,9 @@ function loadCategoryForTool(toolName) {
     // TinyFish (web agent)
     'web_agent': 'tinyfish',
     'agent_task': 'agentTask',
+
+    // Task decomposition
+    'decomposetask': 'decompose',
   };
 
   // Direct mapping
diff --git a/test/output-format-guard.test.js b/test/output-format-guard.test.js
new file mode 100644
index 0000000..74b92c8
--- /dev/null
+++ b/test/output-format-guard.test.js
@@ -0,0 +1,95 @@
+/**
+ * Tests for the output formatting guard (src/context/output-format-guard.js).
+ */
+
+const { describe, it } = require("node:test");
+const assert = require("node:assert/strict");
+
+const {
+  injectFormatGuard,
+  producesCleanMarkdown,
+  appendToSystem,
+  MARKER,
+} = require("../src/context/output-format-guard");
+
+describe("output format guard", () => {
+  describe("producesCleanMarkdown", () => {
+    it("treats Claude-family models as clean", () => {
+      assert.equal(producesCleanMarkdown("openrouter", "anthropic/claude-3.5-sonnet"), true);
+      assert.equal(producesCleanMarkdown("bedrock", "claude-opus-4"), true);
+      assert.equal(producesCleanMarkdown("databricks", "databricks-claude-sonnet-4-5"), true);
+    });
+
+    it("treats azure-anthropic provider as clean regardless of model", () => {
+      assert.equal(producesCleanMarkdown("azure-anthropic", null), true);
+    });
+
+    it("treats non-Claude backends as needing the guard", () => {
+      assert.equal(producesCleanMarkdown("moonshot", "kimi-k2-turbo-preview"), false);
+      assert.equal(producesCleanMarkdown("ollama", "qwen2.5-coder:7b"), false);
+      assert.equal(producesCleanMarkdown("azure-openai", "gpt-4o"), false);
+    });
+
+    it("biases toward injecting when the model is unknown", () => {
+      // Unknown model on a provider that could serve either → inject (not clean).
+      assert.equal(producesCleanMarkdown("openrouter", null), false);
+    });
+  });
+
+  describe("appendToSystem", () => {
+    it("appends to a string system prompt", () => {
+      const out = appendToSystem("You are helpful.", "GUARD");
+      assert.match(out, /You are helpful\./);
+      assert.match(out, /GUARD/);
+    });
+
+    it("becomes the system prompt when empty", () => {
+      assert.equal(appendToSystem("", "GUARD"), "GUARD");
+      assert.equal(appendToSystem(null, "GUARD"), "GUARD");
+    });
+
+    it("appends a block to an array system prompt", () => {
+      const arr = [{ type: "text", text: "base" }];
+      const out = appendToSystem(arr, "GUARD");
+      assert.equal(out.length, 2);
+      assert.equal(out[1].text, "GUARD");
+    });
+  });
+
+  describe("injectFormatGuard", () => {
+    it("injects for a non-Claude backend (string system)", () => {
+      const body = { system: "base prompt", model: "claude-opus-4-5" };
+      injectFormatGuard(body, { provider: "moonshot", model: "kimi-k2-turbo-preview" });
+      assert.match(body.system, new RegExp(MARKER.replace(/[[\]]/g, "\\$&")));
+      assert.match(body.system, /box-drawing|line-drawing/);
+    });
+
+    it("does NOT inject for a Claude backend even if client asked via label", () => {
+      const body = { system: "base prompt", model: "claude-opus-4-5" };
+      injectFormatGuard(body, { provider: "azure-anthropic", model: null });
+      assert.equal(body.system, "base prompt");
+    });
+
+    it("keys off resolved model, not the client's requested body.model", () => {
+      // Client requested claude (label) but Lynkr resolved to kimi → must inject.
+      const body = { system: "base", model: "claude-opus-4-5" };
+      injectFormatGuard(body, { provider: "moonshot", model: "kimi-k2-turbo-preview" });
+      assert.match(body.system, new RegExp(MARKER.replace(/[[\]]/g, "\\$&")));
+    });
+
+    it("is idempotent (no double injection)", () => {
+      const body = { system: "base", model: "kimi" };
+      injectFormatGuard(body, { provider: "moonshot", model: "kimi" });
+      const once = body.system;
+      injectFormatGuard(body, { provider: "moonshot", model: "kimi" });
+      assert.equal(body.system, once);
+    });
+
+    it("handles array-format system prompts", () => {
+      const body = { system: [{ type: "text", text: "base" }] };
+      injectFormatGuard(body, { provider: "ollama", model: "qwen2.5-coder" });
+      assert.equal(body.system.length, 2);
+      assert.match(body.system[1].text, new RegExp(MARKER.replace(/[[\]]/g, "\\$&")));
+    });
+  });
+});
diff --git a/test/task-decomposition.test.js b/test/task-decomposition.test.js
new file mode 100644
index 0000000..ca5ea05
--- /dev/null
+++ b/test/task-decomposition.test.js
@@ -0,0 +1,315 @@
+/**
+ * Tests for the task-decomposition feature (src/agents/decomposition/*).
+ *
+ * Everything here is deterministic and offline — model calls are injected.
+ */
+
+// Must be set before requiring config-dependent modules.
+process.env.TASK_DECOMPOSITION_ENABLED = "true";
+process.env.AGENTS_ENABLED = "true";
+process.env.DATABRICKS_API_KEY = process.env.DATABRICKS_API_KEY || "test-key";
+process.env.DATABRICKS_API_BASE = process.env.DATABRICKS_API_BASE || "http://test.com";
+
+const { describe, it } = require("node:test");
+const assert = require("node:assert/strict");
+
+const gate = require("../src/agents/decomposition/gate");
+const planner = require("../src/agents/decomposition/planner");
+const dispatcher = require("../src/agents/decomposition/dispatcher");
+const synthesizer = require("../src/agents/decomposition/synthesizer");
+const telemetry = require("../src/agents/decomposition/telemetry");
+const config = require("../src/config");
+const { runDecomposedTask } = require("../src/agents/decomposition");
+
+// ── Phase 1: gate ───────────────────────────────────────────────────
+
+describe("decomposition gate", () => {
+  const bigAnalysis = (score, tokens) => ({
+    score,
+    breakdown: { tokens: { estimated: tokens } },
+  });
+
+  it("decomposes a complex, large, divisible task", () => {
+    const task =
+      "Add a login endpoint in auth.js, then write tests in auth.test.js, and update the README.md docs.";
+    const res = gate.shouldDecompose(bigAnalysis(75, 5000), {}, { taskText: task });
+    assert.equal(res.decompose, true);
+    assert.equal(res.reason, "decompose_worthwhile");
+  });
+
+  it("skips low-complexity tasks", () => {
+    const res = gate.shouldDecompose(bigAnalysis(20, 5000), {}, {
+      taskText: "fix typo in readme and update docs",
+    });
+    assert.equal(res.decompose, false);
+    assert.equal(res.reason, "below_complexity_threshold");
+  });
+
+  it("skips tasks too small to amortise overhead", () => {
+    const res = gate.shouldDecompose(bigAnalysis(80, 500), {}, {
+      taskText: "implement A and then implement B and update C.js",
+    });
+    assert.equal(res.decompose, false);
+    assert.equal(res.reason, "too_small_to_amortise_overhead");
+  });
+
+  it("skips non-divisible tasks", () => {
+    const res = gate.shouldDecompose(bigAnalysis(80, 8000), {}, {
+      taskText: "explain how this system works",
+    });
+    assert.equal(res.decompose, false);
+    assert.equal(res.reason, "not_divisible");
+  });
+
+  it("never decomposes high-risk tasks", () => {
+    const task = "implement A and then implement B and update C.js";
+    const res = gate.shouldDecompose(bigAnalysis(90, 9000), {}, {
+      taskText: task,
+      riskLevel: "high",
+    });
+    assert.equal(res.decompose, false);
+    assert.equal(res.reason, "high_risk_skip");
+  });
+
+  it("estimateIndependentUnits counts enumerated and file signals", () => {
+    assert.ok(gate.estimateIndependentUnits("1. do x\n2. do y\n3. do z") >= 3);
+    assert.ok(gate.estimateIndependentUnits("update a.js and b.js and c.py") >= 3);
+    assert.equal(gate.estimateIndependentUnits("a single vague sentence"), 1);
+  });
+});
+
+// ── Phase 2: planner ────────────────────────────────────────────────
+
+describe("decomposition planner", () => {
+  const fakeInvoke = (jsonText) => async () => ({
+    json: { content: [{ type: "text", text: jsonText }], usage: { input_tokens: 10, output_tokens: 20 } },
+  });
+
+  it("parses and validates a well-formed plan", async () => {
+    const json = JSON.stringify({
+      strategy: "split by file",
+      subtasks: [
+        { id: "s1", agentType: "Explore", prompt: "find the auth code", dependsOn: [] },
+        { id: "s2", agentType: "Fix", prompt: "patch it", dependsOn: ["s1"] },
+      ],
+    });
+    const plan = await planner.generatePlan({ task: "x", invoke: fakeInvoke(json) });
+    assert.ok(plan);
+    assert.equal(plan.subtasks.length, 2);
+    assert.equal(plan.subtasks[1].dependsOn[0], "s1");
+    assert.equal(plan.usage.outputTokens, 20);
+  });
+
+  it("extracts JSON from prose + fences", () => {
+    const text = "Sure!\n```json\n{\"strategy\":\"s\",\"subtasks\":[{\"id\":\"s1\",\"prompt\":\"p\"}]}\n```\nDone.";
+    const obj = planner.extractJsonObject(text);
+    assert.equal(obj.subtasks[0].id, "s1");
+  });
+
+  it("returns null on dangling dependency", () => {
+    const bad = { subtasks: [{ id: "s1", prompt: "p", dependsOn: ["nope"] }] };
+    assert.equal(planner.validatePlan(bad, 6), null);
+  });
+
+  it("returns null on duplicate ids", () => {
+    const bad = {
+      subtasks: [
+        { id: "s1", prompt: "a" },
+        { id: "s1", prompt: "b" },
+      ],
+    };
+    assert.equal(planner.validatePlan(bad, 6), null);
+  });
+
+  it("coerces unknown agent types to general-purpose", () => {
+    const ok = { subtasks: [{ id: "s1", agentType: "Wizard", prompt: "p", dependsOn: [] }] };
+    const plan = planner.validatePlan(ok, 6);
+    assert.equal(plan.subtasks[0].agentType, "general-purpose");
+  });
+
+  it("detects cycles", () => {
+    const cyclic = [
+      { id: "a", prompt: "a", dependsOn: ["b"] },
+      { id: "b", prompt: "b", dependsOn: ["a"] },
+    ];
+    assert.equal(planner.hasCycle(cyclic), true);
+  });
+
+  it("returns null when model output is not JSON", async () => {
+    const plan = await planner.generatePlan({ task: "x", invoke: fakeInvoke("no json here") });
+    assert.equal(plan, null);
+  });
+});
+
+// ── Phase 3: dispatcher ─────────────────────────────────────────────
+
+describe("decomposition dispatcher", () => {
+  it("orders subtasks into dependency levels", () => {
+    const subtasks = [
+      { id: "s1", dependsOn: [] },
+      { id: "s2", dependsOn: ["s1"] },
+      { id: "s3", dependsOn: ["s1"] },
+      { id: "s4", dependsOn: ["s2", "s3"] },
+    ];
+    const levels = dispatcher.topologicalLevels(subtasks);
+    assert.deepEqual(levels[0], ["s1"]);
+    assert.deepEqual(levels[1].sort(), ["s2", "s3"]);
+    assert.deepEqual(levels[2], ["s4"]);
+  });
+
+  it("runs levels in order and forwards dependency results as context", async () => {
+    const calls = [];
+    const spawnParallel = async (agentTypes, prompts, opts) => {
+      calls.push({ agentTypes, prompts, mainContext: opts.mainContext });
+      return prompts.map((p) => ({
+        success: true,
+        result: `done:${p}`,
+        stats: { inputTokens: 5, outputTokens: 3 },
+      }));
+    };
+
+    const plan = {
+      subtasks: [
+        { id: "s1", agentType: "Explore", prompt: "explore", dependsOn: [] },
+        { id: "s2", agentType: "Fix", prompt: "fix", dependsOn: ["s1"] },
+      ],
+    };
+
+    const out = await dispatcher.dispatchPlan(plan, { spawnParallel });
+    assert.equal(out.results.length, 2);
+    assert.equal(out.results[0].success, true);
+    assert.equal(out.stats.subagents, 2);
+
+    // s2 ran after s1 and received s1's result as injected context.
+    const s2Call = calls.find((c) => c.prompts[0] === "fix");
+    assert.ok(s2Call.mainContext);
+    assert.match(s2Call.mainContext.relevant_context, /done:explore/);
+  });
+
+  it("buildContextForSubtask returns null for independent subtasks", () => {
+    const ctx = dispatcher.buildContextForSubtask({ dependsOn: [] }, new Map());
+    assert.equal(ctx, null);
+  });
+});
+
+// ── Phase 4: synthesizer ────────────────────────────────────────────
+
+describe("decomposition synthesizer", () => {
+  it("synthesizes via the model", async () => {
+    const invoke = async () => ({
+      json: { content: [{ type: "text", text: "final answer" }], usage: {} },
+    });
+    const out = await synthesizer.synthesize({
+      task: "t",
+      subtaskResults: [{ id: "s1", agentType: "Fix", success: true, result: "r1" }],
+      invoke,
+    });
+    assert.equal(out.text, "final answer");
+    assert.equal(out.fallback, false);
+  });
+
+  it("falls back to concatenation when synthesis throws", async () => {
+    const invoke = async () => {
+      throw new Error("boom");
+    };
+    const out = await synthesizer.synthesize({
+      task: "t",
+      subtaskResults: [{ id: "s1", agentType: "Fix", success: true, result: "r1" }],
+      invoke,
+    });
+    assert.equal(out.fallback, true);
+    assert.match(out.text, /r1/);
+  });
+
+  it("reports failure when all subtasks failed", async () => {
+    const out = await synthesizer.synthesize({
+      task: "t",
+      subtaskResults: [{ id: "s1", agentType: "Fix", success: false, error: "x" }],
+    });
+    assert.equal(out.fallback, true);
+  });
+});
+
+// ── Phase 6: telemetry ──────────────────────────────────────────────
+
+describe("decomposition telemetry", () => {
+  it("computes net savings (positive = cheaper)", () => {
+    const s = telemetry.estimateSavings({
+      monolithicTokens: 10000,
+      planUsage: { inputTokens: 100, outputTokens: 200 },
+      dispatchStats: { inputTokens: 1000, outputTokens: 500 },
+      synthUsage: { inputTokens: 300, outputTokens: 400 },
+    });
+    assert.equal(s.decomposedTokens, 2500);
+    assert.equal(s.savedTokens, 7500);
+  });
+});
+
+// ── Orchestration ───────────────────────────────────────────────────
+
+describe("runDecomposedTask orchestration", () => {
+  const inject = {
+    analyze: async () => ({ score: 80, breakdown: { tokens: { estimated: 8000 } } }),
+    generatePlan: async () => ({
+      strategy: "split",
+      subtasks: [
+        { id: "s1", agentType: "Explore", prompt: "p1", dependsOn: [] },
+        { id: "s2", agentType: "Fix", prompt: "p2", dependsOn: ["s1"] },
+      ],
+      usage: { inputTokens: 50, outputTokens: 60 },
+    }),
+    dispatchPlan: async () => ({
+      results: [
+        { id: "s1", agentType: "Explore", success: true, result: "found it" },
+        { id: "s2", agentType: "Fix", success: true, result: "fixed it" },
+      ],
+      levels: [["s1"], ["s2"]],
+      stats: { subagents: 2, inputTokens: 200, outputTokens: 100 },
+    }),
+    synthesize: async () => ({ text: "all done, fixed it correctly", fallback: false, usage: { inputTokens: 80, outputTokens: 40 } }),
+  };
+
+  it("decomposes a qualifying task end-to-end", async () => {
+    const task = "implement A in a.js and then test it in a.test.js and document it in README.md";
+    const res = await runDecomposedTask(task, { _inject: inject });
+    assert.equal(res.decomposed, true);
+    assert.match(res.result, /fixed it/);
+    assert.equal(res.plan.subtasks.length, 2);
+    assert.ok(typeof res.quality.confidence === "number");
+    assert.ok(res.savings.savedTokens !== undefined);
+  });
+
+  it("skips via the real gate for trivial tasks", async () => {
+    const res = await runDecomposedTask("say hello", {
+      _inject: { ...inject, analyze: async () => ({ score: 10, breakdown: { tokens: { estimated: 100 } } }) },
+    });
+    assert.equal(res.decomposed, false);
+    assert.equal(res.reason, "below_complexity_threshold");
+  });
+
+  it("respects shadow mode (logs but does not decompose)", async () => {
+    const original = config.taskDecomposition.shadow;
+    config.taskDecomposition.shadow = true;
+    try {
+      const task = "implement A in a.js and then test it in a.test.js and document README.md";
+      const res = await runDecomposedTask(task, { _inject: inject });
+      assert.equal(res.decomposed, false);
+      assert.equal(res.reason, "shadow_mode");
+      assert.ok(res.gate);
+    } finally {
+      config.taskDecomposition.shadow = original;
+    }
+  });
+
+  it("returns disabled when feature flag is off", async () => {
+    const original = config.taskDecomposition.enabled;
+    config.taskDecomposition.enabled = false;
+    try {
+      const res = await runDecomposedTask("anything", { _inject: inject });
+      assert.equal(res.decomposed, false);
+      assert.equal(res.reason, "disabled");
+    } finally {
+      config.taskDecomposition.enabled = original;
+    }
+  });
+});
diff --git a/test/tier-fallback.test.js b/test/tier-fallback.test.js
new file mode 100644
index 0000000..0954402
--- /dev/null
+++ b/test/tier-fallback.test.js
@@ -0,0 +1,100 @@
+/**
+ * Tests for tier-aware escalate-then-demote fallback (src/routing/tier-fallback.js).
+ */
+
+process.env.DATABRICKS_API_KEY = process.env.DATABRICKS_API_KEY || "test-key";
+process.env.DATABRICKS_API_BASE = process.env.DATABRICKS_API_BASE || "http://test.com";
+
+const { describe, it } = require("node:test");
+const assert = require("node:assert/strict");
+
+const { getFallbackChain, TIER_ORDER } = require("../src/routing/tier-fallback");
+
+// Fake selector mapping tiers → provider:model (distinct providers per tier).
+const MAP = {
+  SIMPLE: { provider: "ollama", model: "minimax" },
+  MEDIUM: { provider: "llamacpp", model: "qwen" },
+  COMPLEX: { provider: "moonshot", model: "kimi" },
+  REASONING: { provider: "azure-openai", model: "gpt-5" },
+};
+const selector = { selectModel: (tier) => MAP[tier] };
+const allAvailable = () => true;
+
+function chainTiers(tier, opts = {}) {
+  return getFallbackChain(tier, { selector, isProviderAvailable: allAvailable, ...opts }).map(
+    (c) => c.tier
+  );
+}
+
+describe("tier-fallback chain (escalate-then-demote)", () => {
+  it("COMPLEX → up to REASONING, then down to MEDIUM, SIMPLE", () => {
+    assert.deepEqual(chainTiers("COMPLEX"), ["REASONING", "MEDIUM", "SIMPLE"]);
+  });
+
+  it("REASONING (top) → only downward", () => {
+    assert.deepEqual(chainTiers("REASONING"), ["COMPLEX", "MEDIUM", "SIMPLE"]);
+  });
+
+  it("MEDIUM → up (COMPLEX, REASONING) then down (SIMPLE)", () => {
+    assert.deepEqual(chainTiers("MEDIUM"), ["COMPLEX", "REASONING", "SIMPLE"]);
+  });
+
+  it("SIMPLE (floor) → only upward", () => {
+    assert.deepEqual(chainTiers("SIMPLE"), ["MEDIUM", "COMPLEX", "REASONING"]);
+  });
+
+  it("marks direction up/down correctly", () => {
+    const chain = getFallbackChain("COMPLEX", { selector, isProviderAvailable: allAvailable });
+    assert.equal(chain.find((c) => c.tier === "REASONING").direction, "up");
+    assert.equal(chain.find((c) => c.tier === "SIMPLE").direction, "down");
+  });
+
+  it("skips providers whose circuit is unavailable", () => {
+    const isAvailable = (p) => p !== "azure-openai"; // REASONING down
+    const chain = getFallbackChain("COMPLEX", { selector, isProviderAvailable: isAvailable });
+    assert.deepEqual(chain.map((c) => c.tier), ["MEDIUM", "SIMPLE"]);
+  });
+
+  it("dedups identical provider:model across tiers", () => {
+    const dupMap = {
+      SIMPLE: { provider: "ollama", model: "minimax" },
+      MEDIUM: { provider: "ollama", model: "minimax" }, // same as SIMPLE
+      COMPLEX: { provider: "moonshot", model: "kimi" },
+      REASONING: { provider: "azure-openai", model: "gpt-5" },
+    };
+    const chain = getFallbackChain("COMPLEX", {
+      selector: { selectModel: (t) => dupMap[t] },
+      isProviderAvailable: allAvailable,
+    });
+    // MEDIUM and SIMPLE collapse to one ollama:minimax entry.
+    const ollama = chain.filter((c) => c.provider === "ollama");
+    assert.equal(ollama.length, 1);
+  });
+
+  it("never re-attempts the failed tier's own provider:model", () => {
+    // If REASONING maps to the same provider:model as COMPLEX, it's excluded.
+    const sameMap = { ...MAP, REASONING: { provider: "moonshot", model: "kimi" } };
+    const chain = getFallbackChain("COMPLEX", {
+      selector: { selectModel: (t) => sameMap[t] },
+      isProviderAvailable: allAvailable,
+    });
+    assert.ok(!chain.some((c) => c.provider === "moonshot"));
+  });
+
+  it("skips tiers that aren't configured (selector throws)", () => {
+    const partial = { selectModel: (t) => {
+      if (t === "REASONING") throw new Error("TIER_REASONING not configured");
+      return MAP[t];
+    } };
+    const chain = getFallbackChain("COMPLEX", { selector: partial, isProviderAvailable: allAvailable });
+    assert.deepEqual(chain.map((c) => c.tier), ["MEDIUM", "SIMPLE"]);
+  });
+
+  it("returns empty for an unknown tier", () => {
+    assert.deepEqual(getFallbackChain("BOGUS", { selector, isProviderAvailable: allAvailable }), []);
+  });
+
+  it("TIER_ORDER is SIMPLE→REASONING", () => {
+    assert.deepEqual(TIER_ORDER, ["SIMPLE", "MEDIUM", "COMPLEX", "REASONING"]);
+  });
+});