diff --git a/.env.example b/.env.example index 6a4e090..7f3ff44 100644 --- a/.env.example +++ b/.env.example @@ -269,6 +269,18 @@ AGENTS_DEFAULT_MODEL=haiku AGENTS_MAX_STEPS=15 AGENTS_TIMEOUT=300000 +# ============================================================================== +# Task Decomposition (opt-in; requires AGENTS_ENABLED=true) +# ============================================================================== +# Breaks complex, divisible tasks into focused subtasks run with isolated +# context (parallel where independent), then synthesizes the result. A cost-aware +# gate decides WHEN to decompose — decomposition can cost MORE than it saves on +# small/indivisible tasks, so it only triggers on complex, large, divisible work. +# Exposed as the DecomposeTask tool. All other settings (models, gate thresholds, +# shadow mode) are hardcoded in src/config/index.js. + +TASK_DECOMPOSITION_ENABLED=false + # ============================================================================== # MCP Sandbox Configuration # ============================================================================== diff --git a/Dockerfile b/Dockerfile index 29adbf0..bc2b111 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,7 +23,7 @@ FROM node:24-alpine AS runtime ARG VCS_REF ARG BUILD_DATE -ARG VERSION=9.5.0 +ARG VERSION=9.6.0 LABEL org.opencontainers.image.title="Lynkr" \ org.opencontainers.image.description="Universal LLM proxy for Claude Code, Cursor, and AI coding tools" \ diff --git a/docker-compose.yml b/docker-compose.yml index 88a255d..e161cec 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,7 +3,7 @@ services: lynkr: build: . container_name: lynkr - image: lynkr:9.5.0 + image: lynkr:9.6.0 ports: - "8081:8081" extra_hosts: @@ -329,7 +329,7 @@ services: retries: 3 start_period: 40s labels: - - "com.lynkr.version=9.5.0" + - "com.lynkr.version=9.6.0" - "com.lynkr.description=Claude Code proxy with multi-provider support" # Uncomment to set resource limits # deploy: diff --git a/package.json b/package.json index 3f2df02..a46fee0 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "lynkr", - "version": "9.5.0", + "version": "9.6.0", "description": "Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.", "main": "index.js", "bin": { @@ -16,7 +16,7 @@ "dev": "nodemon index.js", "lint": "eslint src index.js", "test": "npm run test:unit && npm run test:performance", - "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/toon-compression.test.js test/llamacpp-integration.test.js test/resilience.test.js test/telemetry-routing.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js test/distill.test.js test/large-payload.test.js test/code-mode.test.js test/prompt-cache-injection.test.js test/risk-analyzer.test.js test/interaction-block.test.js test/preflight.test.js test/token-reduction.test.js test/session-affinity.test.js test/model-registry-cost.test.js", + "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/toon-compression.test.js test/llamacpp-integration.test.js test/resilience.test.js test/telemetry-routing.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js test/distill.test.js test/large-payload.test.js test/code-mode.test.js test/prompt-cache-injection.test.js test/risk-analyzer.test.js test/interaction-block.test.js test/preflight.test.js test/token-reduction.test.js test/session-affinity.test.js test/model-registry-cost.test.js test/task-decomposition.test.js test/output-format-guard.test.js test/tier-fallback.test.js", "test:memory": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js", "test:new-features": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js", "test:performance": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/hybrid-routing-performance.test.js && DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-tests.js", diff --git a/src/agents/decomposition/dispatcher.js b/src/agents/decomposition/dispatcher.js new file mode 100644 index 0000000..09b05cc --- /dev/null +++ b/src/agents/decomposition/dispatcher.js @@ -0,0 +1,185 @@ +/** + * Subtask dispatcher (Phase 3). + * + * Executes a validated plan respecting its dependency DAG: + * - subtasks are grouped into topological "levels" (Kahn's algorithm) + * - subtasks in the same level have no dependency on each other → run in + * parallel via the existing ParallelCoordinator (spawnParallel) + * - a subtask receives ONLY its own prompt plus the compressed results of the + * subtasks it depends on (context isolation — the token win) + * + * The spawn functions are injectable for testing. + */ + +const logger = require("../../logger"); + +// Cap how much of a dependency's result we forward, to preserve the +// context-isolation savings (subagents already return summaries; this bounds +// pathological cases). +const MAX_CONTEXT_CHARS = 2000; + +/** + * Group subtasks into dependency levels. Returns array of arrays of ids. + * Throws if the graph is unresolvable (should not happen — planner validated). + */ +function topologicalLevels(subtasks) { + const byId = new Map(subtasks.map((s) => [s.id, s])); + const indegree = new Map(subtasks.map((s) => [s.id, 0])); + const dependents = new Map(subtasks.map((s) => [s.id, []])); + + for (const s of subtasks) { + for (const dep of s.dependsOn) { + if (!byId.has(dep)) continue; + indegree.set(s.id, indegree.get(s.id) + 1); + dependents.get(dep).push(s.id); + } + } + + const levels = []; + let frontier = subtasks.filter((s) => indegree.get(s.id) === 0).map((s) => s.id); + const resolved = new Set(); + + while (frontier.length > 0) { + levels.push(frontier); + const next = []; + for (const id of frontier) { + resolved.add(id); + for (const child of dependents.get(id)) { + indegree.set(child, indegree.get(child) - 1); + if (indegree.get(child) === 0) next.push(child); + } + } + frontier = next; + } + + if (resolved.size !== subtasks.length) { + throw new Error("Unresolvable subtask graph (cycle or dangling dependency)"); + } + return levels; +} + +function compressResult(text) { + if (typeof text !== "string") return String(text ?? ""); + if (text.length <= MAX_CONTEXT_CHARS) return text; + return text.slice(0, MAX_CONTEXT_CHARS) + "\n…[truncated]"; +} + +function buildContextForSubtask(subtask, resultsById) { + if (!subtask.dependsOn || subtask.dependsOn.length === 0) return null; + const parts = []; + for (const dep of subtask.dependsOn) { + const r = resultsById.get(dep); + if (r && r.success && r.result) { + parts.push(`Result of subtask ${dep}:\n${compressResult(r.result)}`); + } else if (r) { + parts.push(`Subtask ${dep} did not complete successfully.`); + } + } + return parts.length > 0 ? parts.join("\n\n") : null; +} + +/** + * Dispatch a validated plan. + * @param {Object} plan - { subtasks: [...] } + * @param {Object} [options] + * @param {string} [options.sessionId] + * @param {string} [options.cwd] + * @param {Function} [options.spawnParallel] - (agentTypes[], prompts[], opts) => results[] + * @returns {Promise<{results: Array, levels: Array, stats: Object}>} + */ +async function dispatchPlan(plan, options = {}) { + const spawnParallel = options.spawnParallel || require("../index").spawnParallel; + const subtasks = plan.subtasks; + const byId = new Map(subtasks.map((s) => [s.id, s])); + const levels = topologicalLevels(subtasks); + const resultsById = new Map(); + + let totalInputTokens = 0; + let totalOutputTokens = 0; + let totalSubagents = 0; + + for (let li = 0; li < levels.length; li++) { + const levelIds = levels[li]; + const levelSubtasks = levelIds.map((id) => byId.get(id)); + + const agentTypes = levelSubtasks.map((s) => s.agentType); + const prompts = levelSubtasks.map((s) => s.prompt); + const perTaskContext = levelSubtasks.map((s) => buildContextForSubtask(s, resultsById)); + + logger.info( + { level: li, count: levelIds.length, ids: levelIds }, + "[Decomposition] Dispatching subtask level" + ); + + // spawnParallel shares one options object; pass per-task context by spawning + // the level as individual parallel calls when contexts differ. + const levelResults = await runLevel( + spawnParallel, + agentTypes, + prompts, + perTaskContext, + options + ); + + levelResults.forEach((res, idx) => { + const st = levelSubtasks[idx]; + totalSubagents += 1; + totalInputTokens += res?.stats?.inputTokens || 0; + totalOutputTokens += res?.stats?.outputTokens || 0; + resultsById.set(st.id, { + id: st.id, + agentType: st.agentType, + success: !!res?.success, + result: res?.success ? res.result : null, + error: res?.success ? null : res?.error || "unknown error", + stats: res?.stats || {}, + }); + }); + } + + const results = subtasks.map((s) => resultsById.get(s.id)); + return { + results, + levels, + stats: { + subagents: totalSubagents, + inputTokens: totalInputTokens, + outputTokens: totalOutputTokens, + }, + }; +} + +/** + * Run one level. When subtasks in the level have differing injected contexts we + * spawn them as separate parallel calls (each with its own mainContext), then + * await all. When none need context, a single spawnParallel batch is used. + */ +async function runLevel(spawnParallel, agentTypes, prompts, perTaskContext, options) { + const anyContext = perTaskContext.some((c) => c); + + if (!anyContext) { + return spawnParallel(agentTypes, prompts, { + sessionId: options.sessionId, + cwd: options.cwd, + }); + } + + // Mixed/with-context: one spawnParallel call per subtask so each gets its own + // mainContext, executed concurrently. + const calls = agentTypes.map((type, i) => + spawnParallel([type], [prompts[i]], { + sessionId: options.sessionId, + cwd: options.cwd, + mainContext: perTaskContext[i] ? { relevant_context: perTaskContext[i] } : undefined, + }).then((arr) => arr[0]) + ); + return Promise.all(calls); +} + +module.exports = { + dispatchPlan, + topologicalLevels, + buildContextForSubtask, + compressResult, + MAX_CONTEXT_CHARS, +}; diff --git a/src/agents/decomposition/gate.js b/src/agents/decomposition/gate.js new file mode 100644 index 0000000..0182f14 --- /dev/null +++ b/src/agents/decomposition/gate.js @@ -0,0 +1,136 @@ +/** + * Decomposition gate (Phase 1). + * + * Decides whether breaking a task into isolated-context subtasks is actually + * worth it. This is the make-or-break of the feature: naive "decompose + * everything" loses money, because every subagent carries fixed overhead + * (planning + per-agent handoff/summarisation). Decomposition only pays off + * when the task is (a) genuinely complex, (b) large enough to amortise that + * overhead, and (c) divisible into reasonably independent units. + * + * Pure and synchronous so it can be unit-tested without a model. The caller + * supplies a pre-computed complexity `analysis` (from routing/complexity-analyzer) + * and the raw payload. + */ + +const DEFAULTS = { + minComplexity: 60, // 0-100; only decompose genuinely complex work + minTokens: 3000, // estimated monolithic tokens; below this the overhead wins + minIndependentUnits: 2, // need at least 2 separable pieces to bother + maxSubtasks: 6, +}; + +const ENUMERATION_RE = /^\s*(?:[-*+]|\d+[.)]|step\s+\d+\b)/gim; +const CONJUNCTION_RE = /\b(?:and then|then|also|additionally|as well as|after that|finally|next,)\b/gi; +const IMPERATIVE_RE = /\b(?:add|create|build|implement|write|refactor|update|fix|remove|delete|migrate|test|document|configure|set up|wire|integrate|generate)\b/gi; +const FILE_PATH_RE = /\b[\w./-]+\.(?:js|ts|tsx|jsx|py|go|rs|java|rb|c|cpp|h|json|yaml|yml|md|sql|sh|css|html)\b/gi; + +function _uniqueMatches(text, re) { + const set = new Set(); + const matches = text.match(re) || []; + for (const m of matches) set.add(m.toLowerCase().trim()); + return set; +} + +/** + * Heuristically estimate how many independent units a task contains. + * Conservative: takes the strongest of several weak signals rather than summing + * them, so a single rambling sentence doesn't look like five subtasks. + * @param {string} text + * @returns {number} + */ +function estimateIndependentUnits(text) { + if (!text || typeof text !== "string") return 1; + + const enumerated = (text.match(ENUMERATION_RE) || []).length; + const conjunctions = (text.match(CONJUNCTION_RE) || []).length; + const imperatives = _uniqueMatches(text, IMPERATIVE_RE).size; + const files = _uniqueMatches(text, FILE_PATH_RE).size; + + // Each signal is an independent lower-bound estimate of separable units. + const signals = [ + enumerated, // explicit list items + conjunctions + 1, // "do A and then B" → 2 units + imperatives, // distinct action verbs + files, // distinct files usually map to distinct work + ]; + + const estimate = Math.max(...signals, 1); + return estimate; +} + +/** + * Decide whether to decompose. + * @param {Object} analysis - result of analyzeComplexity(payload) + * @param {Object} payload - the request payload + * @param {Object} [options] + * @param {Object} [options.config] - threshold overrides (see DEFAULTS) + * @param {string} [options.riskLevel] - 'low'|'medium'|'high'; 'high' disables decomposition + * @param {string} [options.taskText] - explicit task text (else derived from analysis/payload) + * @returns {{ decompose: boolean, reason: string, signals: Object }} + */ +function shouldDecompose(analysis, payload = {}, options = {}) { + const cfg = { ...DEFAULTS, ...(options.config || {}) }; + + const score = Number(analysis?.score ?? 0); + const estimatedTokens = Number( + analysis?.breakdown?.tokens?.estimated ?? options.estimatedTokens ?? 0 + ); + + const taskText = + options.taskText || + analysis?.content || + _firstUserText(payload) || + ""; + + const independentUnits = estimateIndependentUnits(taskText); + + const signals = { + score, + estimatedTokens, + independentUnits, + riskLevel: options.riskLevel || "low", + thresholds: cfg, + }; + + // Never decompose high-risk work — keep it in one capable context where the + // exempt-from-laziness concerns (validation/security) stay coherent. + if (options.riskLevel === "high") { + return { decompose: false, reason: "high_risk_skip", signals }; + } + + if (score < cfg.minComplexity) { + return { decompose: false, reason: "below_complexity_threshold", signals }; + } + + if (estimatedTokens < cfg.minTokens) { + return { decompose: false, reason: "too_small_to_amortise_overhead", signals }; + } + + if (independentUnits < cfg.minIndependentUnits) { + return { decompose: false, reason: "not_divisible", signals }; + } + + return { decompose: true, reason: "decompose_worthwhile", signals }; +} + +function _firstUserText(payload) { + const messages = payload?.messages; + if (!Array.isArray(messages)) return ""; + const user = [...messages].reverse().find((m) => m.role === "user"); + if (!user) return ""; + if (typeof user.content === "string") return user.content; + if (Array.isArray(user.content)) { + return user.content + .filter((b) => b?.type === "text" || typeof b?.text === "string") + .map((b) => b.text || "") + .join("\n"); + } + return ""; +} + +module.exports = { + shouldDecompose, + estimateIndependentUnits, + DEFAULTS, +}; diff --git a/src/agents/decomposition/index.js b/src/agents/decomposition/index.js new file mode 100644 index 0000000..df220db --- /dev/null +++ b/src/agents/decomposition/index.js @@ -0,0 +1,183 @@ +/** + * Task decomposition — orchestration entry point. + * + * Ties the phases together: + * 1. gate — decide if decomposing is worth it (cost-aware) + * 2. planner — produce a validated subtask DAG (one model call) + * 3. dispatcher — run subtasks (parallel within dependency levels, isolated context) + * 4. synthesizer — combine results into the final answer (one model call) + * 5. quality — confidence-score the synthesis; flag low-confidence output + * 6. telemetry — record decision + estimated net token savings; shadow mode + * + * Opt-in via TASK_DECOMPOSITION_ENABLED=true. Requires AGENTS_ENABLED=true + * (it builds on the subagent machinery). Any failure degrades gracefully to a + * non-decomposed result so the caller can solve monolithically. + */ + +const config = require("../../config"); +const logger = require("../../logger"); +const { shouldDecompose } = require("./gate"); +const { generatePlan } = require("./planner"); +const { dispatchPlan } = require("./dispatcher"); +const { synthesize } = require("./synthesizer"); +const telemetry = require("./telemetry"); +const { analyzeComplexity } = require("../../routing/complexity-analyzer"); +const confidenceScorer = require("../../routing/confidence-scorer"); + +const CODE_HINT_RE = /\b(code|function|implement|refactor|bug|class|api|module|test)\b/i; + +function getConfig() { + return ( + config.taskDecomposition || { + enabled: false, + shadow: false, + planModel: "sonnet", + synthModel: "sonnet", + minConfidence: 0.5, + gate: {}, + } + ); +} + +/** + * @param {string} task - the task text to (maybe) decompose + * @param {Object} [options] + * @param {string} [options.sessionId] + * @param {string} [options.cwd] + * @param {string} [options.riskLevel] - 'high' disables decomposition + * @param {Object} [options._inject] - test seams { generatePlan, dispatchPlan, synthesize, analyze } + * @returns {Promise} result object (see below) + */ +async function runDecomposedTask(task, options = {}) { + const cfg = getConfig(); + const inject = options._inject || {}; + + if (!cfg.enabled) { + return { decomposed: false, reason: "disabled" }; + } + if (!config.agents?.enabled) { + return { decomposed: false, reason: "agents_disabled" }; + } + if (!task || typeof task !== "string") { + return { decomposed: false, reason: "empty_task" }; + } + + const payload = { messages: [{ role: "user", content: task }] }; + + let analysis; + try { + analysis = await (inject.analyze || analyzeComplexity)(payload); + } catch (err) { + logger.warn({ err: err.message }, "[Decomposition] Complexity analysis failed"); + return { decomposed: false, reason: "analysis_failed" }; + } + + const monolithicTokens = + analysis?.breakdown?.tokens?.estimated || telemetry.estimateTokens(task); + + const gate = shouldDecompose(analysis, payload, { + config: cfg.gate, + riskLevel: options.riskLevel, + taskText: task, + }); + + // Shadow mode: record what we WOULD do, but never actually decompose. + if (cfg.shadow) { + telemetry.record({ + mode: "shadow", + sessionId: options.sessionId, + gate, + monolithicTokens, + }); + return { decomposed: false, reason: "shadow_mode", gate }; + } + + if (!gate.decompose) { + telemetry.record({ mode: "live", decision: "skip", gate, monolithicTokens }); + return { decomposed: false, reason: gate.reason, gate }; + } + + // Phase 2: plan + const plan = await (inject.generatePlan || generatePlan)({ + task, + model: cfg.planModel, + maxSubtasks: cfg.gate?.maxSubtasks || 6, + }); + if (!plan) { + telemetry.record({ mode: "live", decision: "plan_failed", gate, monolithicTokens }); + return { decomposed: false, reason: "plan_failed", gate }; + } + + // Phase 3: dispatch + const dispatch = await (inject.dispatchPlan || dispatchPlan)(plan, { + sessionId: options.sessionId, + cwd: options.cwd, + }); + + // Phase 4: synthesize + const synth = await (inject.synthesize || synthesize)({ + task, + subtaskResults: dispatch.results, + model: cfg.synthModel, + }); + + // Phase 5: quality gate + const taskType = CODE_HINT_RE.test(task) ? "code" : "reasoning"; + let confidence = 1; + try { + confidence = await confidenceScorer.score( + { content: [{ type: "text", text: synth.text }] }, + { taskType } + ); + } catch (err) { + logger.debug({ err: err.message }, "[Decomposition] Confidence scoring failed"); + } + const belowThreshold = confidence < (cfg.minConfidence ?? 0.5); + + const savings = telemetry.estimateSavings({ + monolithicTokens, + planUsage: plan.usage, + dispatchStats: dispatch.stats, + synthUsage: synth.usage, + }); + + telemetry.record({ + mode: "live", + decision: "decomposed", + sessionId: options.sessionId, + gate, + subtasks: plan.subtasks.length, + levels: dispatch.levels.length, + strategy: plan.strategy, + confidence, + belowThreshold, + synthesisFallback: synth.fallback, + savings, + }); + + logger.info( + { + subtasks: plan.subtasks.length, + levels: dispatch.levels.length, + confidence: confidence.toFixed(2), + savedTokens: savings.savedTokens, + }, + "[Decomposition] Task decomposed" + ); + + return { + decomposed: true, + result: synth.text, + reason: "decomposed", + plan, + subtaskResults: dispatch.results, + quality: { confidence, belowThreshold, taskType }, + // When confidence is low, the caller should prefer a monolithic re-solve. + recommendFallback: belowThreshold, + stats: { ...dispatch.stats, levels: dispatch.levels.length }, + savings, + gate, + }; +} + +module.exports = { runDecomposedTask, getConfig }; diff --git a/src/agents/decomposition/model-call.js b/src/agents/decomposition/model-call.js new file mode 100644 index 0000000..5e9760f --- /dev/null +++ b/src/agents/decomposition/model-call.js @@ -0,0 +1,75 @@ +/** + * Thin model-call helper for the decomposition planner/synthesizer. + * + * Mirrors the provider-forcing logic in agents/executor.js so planning and + * synthesis use the configured MODEL_PROVIDER rather than hard-falling back to + * Azure. The actual invoker is injectable (`opts.invoke`) so the planner and + * synthesizer can be unit-tested without a live provider. + */ + +const logger = require("../../logger"); + +function resolveForceProvider(model) { + const modelLower = String(model || "").toLowerCase(); + const isClaudeFamily = + modelLower.includes("claude") || + modelLower.includes("sonnet") || + modelLower.includes("haiku") || + modelLower.includes("opus"); + const isGptFamily = modelLower.includes("gpt"); + + if (isClaudeFamily || isGptFamily) { + const config = require("../../config"); + return config.modelProvider?.type || config.modelProvider?.provider || null; + } + return null; +} + +/** + * Call the model and return the Anthropic-format response JSON. + * @param {Object} params + * @param {Array} params.messages + * @param {string} params.model + * @param {number} [params.maxTokens=2048] + * @param {number} [params.temperature=0.2] + * @param {Function} [params.invoke] - injectable invoker (default: clients/databricks.invokeModel) + * @returns {Promise} Anthropic-format response JSON + */ +async function callModel({ messages, model, maxTokens = 2048, temperature = 0.2, invoke } = {}) { + const invoker = invoke || require("../../clients/databricks").invokeModel; + const payload = { + model, + messages, + max_tokens: maxTokens, + temperature, + }; + const forceProvider = resolveForceProvider(model); + + const response = await invoker(payload, { forceProvider }); + if (!response || !response.json) { + throw new Error("Invalid model response in decomposition model-call"); + } + return response.json; +} + +/** + * Extract concatenated text from an Anthropic-format response. + */ +function extractText(responseJson) { + const content = responseJson?.content; + if (!Array.isArray(content)) return ""; + return content + .filter((b) => b?.type === "text") + .map((b) => b.text || "") + .join("\n") + .trim(); +} + +function sumUsage(responseJson) { + return { + inputTokens: responseJson?.usage?.input_tokens || 0, + outputTokens: responseJson?.usage?.output_tokens || 0, + }; +} + +module.exports = { callModel, extractText, sumUsage, resolveForceProvider, logger }; diff --git a/src/agents/decomposition/planner.js b/src/agents/decomposition/planner.js new file mode 100644 index 0000000..b18457c --- /dev/null +++ b/src/agents/decomposition/planner.js @@ -0,0 +1,223 @@ +/** + * Decomposition planner (Phase 2). + * + * Turns a complex task into a small subtask DAG using a single model call + * (plan-and-solve style — plan generated in one shot). Output is validated: + * ids unique, dependencies reference real ids, no cycles, subtask count capped. + * If anything fails to parse/validate the planner returns null and the caller + * falls back to a monolithic solve. + */ + +const { callModel, extractText } = require("./model-call"); +const logger = require("../../logger"); + +// Agent types the dispatcher knows how to spawn. Planner is steered toward +// these; unknown types are coerced to general-purpose at dispatch time. +const KNOWN_AGENT_TYPES = [ + "Explore", + "Plan", + "general-purpose", + "Test", + "Debug", + "Fix", + "Refactor", + "Documentation", +]; + +function buildPlannerPrompt(task, maxSubtasks) { + return `You are a task-decomposition planner. Break the task below into the SMALLEST set of focused subtasks that can be solved with isolated context. Fewer is better — do NOT over-split. If the task is not genuinely divisible, return a single subtask. + +Rules: +- At most ${maxSubtasks} subtasks. +- Each subtask must be independently solvable given only its prompt plus the results of the subtasks it depends on. +- Mark dependencies via "dependsOn" (array of subtask ids). Independent subtasks (empty dependsOn) will run in parallel. +- Prefer assigning each subtask an agent type from: ${KNOWN_AGENT_TYPES.join(", ")}. +- Keep each subtask prompt self-contained and specific. + +Respond with ONLY a JSON object, no prose, in exactly this shape: +{ + "strategy": "one short sentence on how you split it", + "subtasks": [ + { "id": "s1", "agentType": "Explore", "prompt": "...", "dependsOn": [] } + ] +} + +TASK: +${task}`; +} + +/** + * Extract the first balanced JSON object from a string (handles models that + * wrap JSON in prose or ```json fences). + */ +function extractJsonObject(text) { + if (!text) return null; + const fenced = text.match(/```(?:json)?\s*([\s\S]*?)```/i); + const candidate = fenced ? fenced[1] : text; + + const start = candidate.indexOf("{"); + if (start === -1) return null; + + let depth = 0; + let inString = false; + let escape = false; + for (let i = start; i < candidate.length; i++) { + const ch = candidate[i]; + if (escape) { + escape = false; + continue; + } + if (ch === "\\") { + escape = true; + continue; + } + if (ch === '"') { + inString = !inString; + continue; + } + if (inString) continue; + if (ch === "{") depth++; + else if (ch === "}") { + depth--; + if (depth === 0) { + const slice = candidate.slice(start, i + 1); + try { + return JSON.parse(slice); + } catch { + return null; + } + } + } + } + return null; +} + +/** + * Validate and normalise a parsed plan. Returns a clean plan or null. + */ +function validatePlan(parsed, maxSubtasks) { + if (!parsed || !Array.isArray(parsed.subtasks) || parsed.subtasks.length === 0) { + return null; + } + + const subtasks = []; + const seenIds = new Set(); + + for (let i = 0; i < parsed.subtasks.length && subtasks.length < maxSubtasks; i++) { + const st = parsed.subtasks[i]; + if (!st || typeof st.prompt !== "string" || st.prompt.trim().length === 0) { + return null; // malformed subtask → reject whole plan, fall back + } + const id = typeof st.id === "string" && st.id.trim() ? st.id.trim() : `s${i + 1}`; + if (seenIds.has(id)) return null; // duplicate ids + seenIds.add(id); + + const agentType = KNOWN_AGENT_TYPES.includes(st.agentType) + ? st.agentType + : "general-purpose"; + + const dependsOn = Array.isArray(st.dependsOn) + ? st.dependsOn.filter((d) => typeof d === "string") + : []; + + subtasks.push({ id, agentType, prompt: st.prompt.trim(), dependsOn }); + } + + // Dependencies must reference real ids and contain no cycles. + for (const st of subtasks) { + for (const dep of st.dependsOn) { + if (!seenIds.has(dep)) return null; // dangling dependency + } + } + if (hasCycle(subtasks)) return null; + + return { + strategy: typeof parsed.strategy === "string" ? parsed.strategy : "", + subtasks, + }; +} + +/** + * Cycle detection via DFS colouring. + */ +function hasCycle(subtasks) { + const byId = new Map(subtasks.map((s) => [s.id, s])); + const state = new Map(); // id → 0 unvisited, 1 visiting, 2 done + + function visit(id) { + const cur = state.get(id) || 0; + if (cur === 1) return true; // back-edge → cycle + if (cur === 2) return false; + state.set(id, 1); + const node = byId.get(id); + for (const dep of node?.dependsOn || []) { + if (visit(dep)) return true; + } + state.set(id, 2); + return false; + } + + for (const s of subtasks) { + if (visit(s.id)) return true; + } + return false; +} + +/** + * Generate a validated plan for a task. + * @param {Object} params + * @param {string} params.task + * @param {string} [params.model="sonnet"] - planning needs reasoning; use a capable model + * @param {number} [params.maxSubtasks=6] + * @param {Function} [params.invoke] - injectable model invoker (for tests) + * @returns {Promise<{strategy:string, subtasks:Array, usage:Object}|null>} + */ +async function generatePlan({ task, model = "sonnet", maxSubtasks = 6, invoke } = {}) { + if (!task || typeof task !== "string") return null; + + let responseJson; + try { + responseJson = await callModel({ + messages: [{ role: "user", content: buildPlannerPrompt(task, maxSubtasks) }], + model, + maxTokens: 1500, + temperature: 0.1, + invoke, + }); + } catch (err) { + logger.warn({ err: err.message }, "[Decomposition] Planner model call failed"); + return null; + } + + const text = extractText(responseJson); + const parsed = extractJsonObject(text); + const plan = validatePlan(parsed, maxSubtasks); + + if (!plan) { + logger.warn( + { preview: text.slice(0, 200) }, + "[Decomposition] Plan failed validation — will fall back to monolithic" + ); + return null; + } + + plan.usage = { + inputTokens: responseJson?.usage?.input_tokens || 0, + outputTokens: responseJson?.usage?.output_tokens || 0, + }; + + logger.info( + { subtasks: plan.subtasks.length, strategy: plan.strategy }, + "[Decomposition] Plan generated" + ); + return plan; +} + +module.exports = { + generatePlan, + validatePlan, + extractJsonObject, + hasCycle, + buildPlannerPrompt, + KNOWN_AGENT_TYPES, +}; diff --git a/src/agents/decomposition/synthesizer.js b/src/agents/decomposition/synthesizer.js new file mode 100644 index 0000000..72afa02 --- /dev/null +++ b/src/agents/decomposition/synthesizer.js @@ -0,0 +1,89 @@ +/** + * Result synthesizer (Phase 4). + * + * Combines the (compressed) results of all subtasks into one coherent final + * answer to the original task. Single model call. If synthesis fails, the + * caller falls back to concatenating the subtask results. + */ + +const { callModel, extractText } = require("./model-call"); +const logger = require("../../logger"); + +const MAX_RESULT_CHARS = 4000; + +function buildSynthesisPrompt(task, subtaskResults) { + const blocks = subtaskResults + .map((r) => { + const status = r.success ? "OK" : `FAILED (${r.error})`; + const body = r.success + ? truncate(r.result, MAX_RESULT_CHARS) + : "(no result)"; + return `### Subtask ${r.id} [${r.agentType}] — ${status}\n${body}`; + }) + .join("\n\n"); + + return `You are synthesizing the results of several subtasks into one final answer for the original request. Integrate the findings into a single coherent response. Resolve overlaps, note any subtask that failed, and do not invent results that no subtask produced. + +ORIGINAL TASK: +${task} + +SUBTASK RESULTS: +${blocks} + +Write the final answer now.`; +} + +function truncate(text, max) { + if (typeof text !== "string") return String(text ?? ""); + return text.length <= max ? text : text.slice(0, max) + "\n…[truncated]"; +} + +/** + * Concatenation fallback used when the synthesis model call fails. + */ +function concatFallback(subtaskResults) { + return subtaskResults + .filter((r) => r.success && r.result) + .map((r) => `## ${r.id} (${r.agentType})\n${r.result}`) + .join("\n\n"); +} + +/** + * @param {Object} params + * @param {string} params.task + * @param {Array} params.subtaskResults - from dispatcher + * @param {string} [params.model="sonnet"] + * @param {Function} [params.invoke] + * @returns {Promise<{text:string, fallback:boolean, usage:Object}>} + */ +async function synthesize({ task, subtaskResults, model = "sonnet", invoke } = {}) { + const anySuccess = subtaskResults.some((r) => r.success); + if (!anySuccess) { + return { text: "All subtasks failed; no result could be produced.", fallback: true, usage: {} }; + } + + try { + const responseJson = await callModel({ + messages: [{ role: "user", content: buildSynthesisPrompt(task, subtaskResults) }], + model, + maxTokens: 4096, + temperature: 0.3, + invoke, + }); + const text = extractText(responseJson); + if (!text) throw new Error("Empty synthesis"); + return { + text, + fallback: false, + usage: { + inputTokens: responseJson?.usage?.input_tokens || 0, + outputTokens: responseJson?.usage?.output_tokens || 0, + }, + }; + } catch (err) { + logger.warn({ err: err.message }, "[Decomposition] Synthesis failed — concatenating results"); + return { text: concatFallback(subtaskResults), fallback: true, usage: {} }; + } +} + +module.exports = { synthesize, buildSynthesisPrompt, concatFallback }; diff --git a/src/agents/decomposition/telemetry.js b/src/agents/decomposition/telemetry.js new file mode 100644 index 0000000..6500351 --- /dev/null +++ b/src/agents/decomposition/telemetry.js @@ -0,0 +1,55 @@ +/** + * Decomposition telemetry + shadow mode (Phase 6). + * + * Appends one JSON line per decomposition decision to + * data/decomposition-decisions.jsonl so the net token effect can be audited. + * Because the research is clear that decomposition can COST more than it saves, + * a shadow mode (TASK_DECOMPOSITION_SHADOW=true) runs the gate + records what it + * WOULD have done without actually decomposing — so savings can be validated on + * real traffic before enabling for real. + */ + +const fs = require("fs"); +const path = require("path"); +const logger = require("../../logger"); + +const LOG_PATH = path.join(__dirname, "../../../data/decomposition-decisions.jsonl"); + +function estimateTokens(text) { + if (typeof text !== "string") return 0; + return Math.ceil(text.length / 4); +} + +/** + * Rough net-savings estimate. + * monolithic ≈ what one big context would have cost (estimated input tokens). + * decomposed ≈ planning + Σ(subagent in+out) + synthesis. + * Positive `savedTokens` = decomposition was cheaper. + */ +function estimateSavings({ monolithicTokens, planUsage, dispatchStats, synthUsage }) { + const decomposed = + (planUsage?.inputTokens || 0) + + (planUsage?.outputTokens || 0) + + (dispatchStats?.inputTokens || 0) + + (dispatchStats?.outputTokens || 0) + + (synthUsage?.inputTokens || 0) + + (synthUsage?.outputTokens || 0); + return { + monolithicTokens: monolithicTokens || 0, + decomposedTokens: decomposed, + savedTokens: (monolithicTokens || 0) - decomposed, + }; +} + +function record(entry) { + const line = { timestamp: Date.now(), ...entry }; + try { + fs.mkdirSync(path.dirname(LOG_PATH), { recursive: true }); + fs.appendFileSync(LOG_PATH, JSON.stringify(line) + "\n"); + } catch (err) { + logger.debug({ err: err.message }, "[Decomposition] Telemetry append failed"); + } + return line; +} + +module.exports = { record, estimateSavings, estimateTokens, LOG_PATH }; diff --git a/src/clients/databricks.js b/src/clients/databricks.js index 92f4ac4..6c2bac2 100644 --- a/src/clients/databricks.js +++ b/src/clients/databricks.js @@ -2185,6 +2185,13 @@ async function invokeModel(body, options = {}) { const { injectPromptCaching } = require('./prompt-cache-injection'); injectPromptCaching(body, initialProvider); + // Always-on markdown formatting guard. Stops formatting-weak backends + // (Moonshot/Kimi, Ollama, etc.) from emitting mangled ASCII box-drawing + // "diagrams". Keyed off the routing-resolved provider/model; skipped for + // Claude-family backends which already format cleanly. + const { injectFormatGuard } = require('../context/output-format-guard'); + injectFormatGuard(body, { provider: initialProvider, model: tierSelectedModel }); + // Build routing decision object for response headers const routingDecision = { provider: initialProvider, @@ -2384,6 +2391,71 @@ async function invokeModel(body, options = {}) { healthTracker.recordFailure(initialProvider, err, err.status); getLatencyTracker().record(initialProvider, routingDecision?.model, failLatency); + // Tier-aware escalate-then-demote fallback (TIER_FALLBACK_ENABLED). + // On failure, try a MORE capable tier first (climb toward REASONING); only + // if every higher tier is unavailable do we fall downward to SIMPLE/local. + // Runs before the flat global fallback below and is never silent. + if (config.tierFallback?.enabled && !options._tierFallbackInner && routingDecision.tier) { + const { getFallbackChain } = require("../routing/tier-fallback"); + const chain = getFallbackChain(routingDecision.tier); + for (const cand of chain) { + try { + logger.warn({ + fromTier: routingDecision.tier, + fromProvider: initialProvider, + toTier: cand.tier, + toProvider: cand.provider, + toModel: cand.model, + direction: cand.direction, + }, "[TierFallback] Primary tier failed — attempting tier fallback"); + + const attempt = await invokeModel( + { ...body, _tierModel: cand.model }, + { + forceProvider: cand.provider, + _tierFallbackInner: true, + disableFallback: true, + _cascadeInner: true, + workspace, + tenantPolicy, + } + ); + + metricsCollector.recordFallbackAttempt(initialProvider, cand.provider, "tier_fallback"); + logger.warn({ + servedTier: cand.tier, + servedProvider: cand.provider, + fromTier: routingDecision.tier, + direction: cand.direction, + }, "[TierFallback] Served by tier fallback"); + + return { + ...attempt, + actualProvider: cand.provider, + routingDecision: { + ...routingDecision, + provider: cand.provider, + model: cand.model, + servedTier: cand.tier, + fromTier: routingDecision.tier, + fallback: true, + fallbackDirection: cand.direction, + method: "tier_fallback", + }, + }; + } catch (innerErr) { + logger.warn( + { toProvider: cand.provider, error: innerErr.message }, + "[TierFallback] Candidate failed, trying next" + ); + } + } + logger.warn( + { fromTier: routingDecision.tier }, + "[TierFallback] All tier candidates exhausted — falling through" + ); + } + // Check if we should fallback (any provider can fall back, not just ollama) const shouldFallback = isFallbackEnabled() && diff --git a/src/config/index.js b/src/config/index.js index e4ac410..829be3a 100644 --- a/src/config/index.js +++ b/src/config/index.js @@ -518,6 +518,12 @@ const agentsDefaultModel = process.env.AGENTS_DEFAULT_MODEL ?? "haiku"; const agentsMaxSteps = Number.parseInt(process.env.AGENTS_MAX_STEPS ?? "15", 10); const agentsTimeout = Number.parseInt(process.env.AGENTS_TIMEOUT ?? "120000", 10); +// Task decomposition configuration (opt-in; builds on the agents subsystem). +// Single env toggle; everything else is hardcoded below. +const taskDecompositionEnabled = process.env.TASK_DECOMPOSITION_ENABLED === "true"; + +// Tier-aware fallback (escalate-then-demote) is always on (hardcoded). + // LLM Audit logging configuration const auditEnabled = process.env.LLM_AUDIT_ENABLED === "true"; // default false const auditLogFile = process.env.LLM_AUDIT_LOG_FILE ?? path.join(process.cwd(), "logs", "llm-audit.log"); @@ -775,6 +781,23 @@ var config = { maxSteps: Number.isNaN(agentsMaxSteps) ? 15 : agentsMaxSteps, timeout: Number.isNaN(agentsTimeout) ? 120000 : agentsTimeout, }, + taskDecomposition: { + enabled: taskDecompositionEnabled, // only env-driven knob (TASK_DECOMPOSITION_ENABLED) + // Hardcoded defaults — tune here if needed. + shadow: false, + planModel: "sonnet", + synthModel: "sonnet", + minConfidence: 0.5, + gate: { + minComplexity: 60, + minTokens: 3000, + maxSubtasks: 6, + minIndependentUnits: 2, + }, + }, + tierFallback: { + enabled: true, // always on (hardcoded): escalate-then-demote on provider failure; floor = SIMPLE + }, tests: { defaultCommand: testDefaultCommand ? testDefaultCommand.trim() : null, defaultArgs: testDefaultArgs, diff --git a/src/context/output-format-guard.js b/src/context/output-format-guard.js new file mode 100644 index 0000000..d1ee600 --- /dev/null +++ b/src/context/output-format-guard.js @@ -0,0 +1,99 @@ +/** + * Output Format Guard + * + * Appends a short formatting instruction to the system prompt so weaker + * backends (Moonshot/Kimi, Ollama, etc.) stop emitting mangled ASCII/Unicode + * box-drawing "diagrams" that render as garbage in clients. The actual backend + * is decided by tier routing — so even when the client asks for "claude-opus", + * the request may be served by a model that formats poorly. This normalizes the + * presentation without changing which model serves the request. + * + * Always-on (no env flag). It is skipped only for Claude-family backends, which + * already produce clean GitHub-flavored markdown — injecting there would just + * waste tokens. The skip biases toward injecting: if we can't tell, we inject + * (a false-inject is harmless ~50 tokens; a false-skip leaves the garble). + * + * Keyed off the ROUTING-RESOLVED provider/model, never the client's requested + * body.model (which is just a label once tier routing is on). + * + * @module context/output-format-guard + */ + +const logger = require("../logger"); + +const MARKER = "[fmt-guard]"; + +// Model names that already produce clean markdown. +const CLEAN_FORMATTER_RE = /\b(claude|sonnet|opus|haiku)\b/i; +// Providers that are always Claude-backed. +const CLEAN_PROVIDERS = new Set(["azure-anthropic"]); + +const GUARD_TEXT = + `${MARKER} Formatting rules for your response: use plain GitHub-flavored markdown only. ` + + "Do NOT draw diagrams or boxes with ASCII or Unicode line-drawing characters " + + "(such as ┌ ─ │ └ ├ ┤ ┬ ┴ ╔ ═ ║), and do NOT wrap headings or code in decorative borders. " + + "Represent structure and relationships with normal markdown headings, nested bullet lists, " + + "numbered lists, or tables. Use standard triple-backtick fenced code blocks for code. " + + "Keep code, file paths, commands, and URLs exact."; + +/** + * Whether the resolved backend already formats cleanly (→ skip injection). + * @param {string} provider - routing-resolved provider + * @param {string} model - routing-resolved model (NOT the client's requested model) + * @returns {boolean} + */ +function producesCleanMarkdown(provider, model) { + if (CLEAN_PROVIDERS.has(String(provider || "").toLowerCase())) return true; + if (model && CLEAN_FORMATTER_RE.test(String(model))) return true; + return false; +} + +/** + * Append the guard text to a system prompt that may be a string or an array of + * Anthropic content blocks. Idempotent via MARKER. Pure — returns the new value. + */ +function appendToSystem(system, text) { + // String (or empty) system prompt. + if (system == null || typeof system === "string") { + const base = system || ""; + if (base.includes(MARKER)) return base; + return base ? `${base}\n\n${text}` : text; + } + // Anthropic array-of-blocks system prompt. + if (Array.isArray(system)) { + const already = system.some( + (b) => typeof b?.text === "string" && b.text.includes(MARKER) + ); + if (already) return system; + return [...system, { type: "text", text }]; + } + return system; +} + +/** + * Inject the formatting guard into body.system in place, unless the resolved + * backend already formats cleanly. Always-on; idempotent. + * + * @param {object} body - request body (mutated in place) + * @param {object} [opts] + * @param {string} [opts.provider] - routing-resolved provider + * @param {string} [opts.model] - routing-resolved model + * @returns {object} body + */ +function injectFormatGuard(body, opts = {}) { + if (!body) return body; + const { provider, model } = opts; + if (producesCleanMarkdown(provider, model)) return body; + + body.system = appendToSystem(body.system, GUARD_TEXT); + logger.debug({ provider, model }, "[FormatGuard] Injected markdown formatting guard"); + return body; +} + +module.exports = { + injectFormatGuard, + producesCleanMarkdown, + appendToSystem, + MARKER, + GUARD_TEXT, +}; diff --git a/src/routing/index.js b/src/routing/index.js index b760fc3..2d3f5b5 100644 --- a/src/routing/index.js +++ b/src/routing/index.js @@ -697,6 +697,14 @@ function getRoutingHeaders(decision) { headers['X-Lynkr-Cost-Optimized'] = 'true'; } + // Tier-aware fallback surfacing (never silent). + if (decision.fallback) { + headers['X-Lynkr-Fallback'] = 'true'; + if (decision.fromTier) headers['X-Lynkr-Fallback-From-Tier'] = decision.fromTier; + if (decision.servedTier) headers['X-Lynkr-Served-Tier'] = decision.servedTier; + if (decision.fallbackDirection) headers['X-Lynkr-Fallback-Direction'] = decision.fallbackDirection; + } + if (decision.risk?.level) { headers['X-Lynkr-Risk'] = decision.risk.level; const hits = Array.from(new Set([ diff --git a/src/routing/tier-fallback.js b/src/routing/tier-fallback.js new file mode 100644 index 0000000..f4361f6 --- /dev/null +++ b/src/routing/tier-fallback.js @@ -0,0 +1,91 @@ +/** + * Tier-aware fallback chain (escalate-then-demote). + * + * When a tier's provider fails, prefer a MORE capable tier first (climb toward + * REASONING), and only if every higher tier is also unavailable do we fall + * downward — all the way to the local SIMPLE tier as a last resort. This biases + * for correctness/availability over cost, matching a conservative routing policy. + * + * Example ladder: SIMPLE → MEDIUM → COMPLEX → REASONING + * - COMPLEX fails → [REASONING, MEDIUM, SIMPLE] + * - REASONING fails → [COMPLEX, MEDIUM, SIMPLE] + * - MEDIUM fails → [COMPLEX, REASONING, SIMPLE] + * + * Pure and dependency-injectable so it can be unit-tested without real providers. + */ + +const logger = require("../logger"); + +const TIER_ORDER = ["SIMPLE", "MEDIUM", "COMPLEX", "REASONING"]; + +/** Default availability check: a provider is unavailable if its circuit is OPEN. */ +function defaultIsProviderAvailable(provider) { + try { + const { getCircuitBreakerRegistry } = require("../clients/circuit-breaker"); + const registry = getCircuitBreakerRegistry(); + const all = typeof registry.getAll === "function" ? registry.getAll() : []; + const entry = Array.isArray(all) ? all.find((b) => b.name === provider) : null; + return !(entry && entry.state === "OPEN"); + } catch { + return true; // fail open — never block fallback on a health-check error + } +} + +/** Resolve a tier name to { tier, provider, model }, or null if not configured. */ +function resolveTier(tier, selector) { + try { + const sel = selector || require("./model-tiers").getModelTierSelector(); + const r = sel.selectModel(tier); + if (!r || !r.provider || !r.model) return null; + return { tier, provider: r.provider, model: r.model }; + } catch { + return null; // tier not configured (TIER_ unset) — skip it + } +} + +/** + * Build the escalate-then-demote fallback chain for a failed tier. + * + * @param {string} currentTier - the tier whose provider just failed + * @param {Object} [opts] + * @param {Object} [opts.selector] - model-tiers selector (injected for tests) + * @param {Function} [opts.isProviderAvailable] - (provider) => boolean (injected for tests) + * @returns {Array<{tier,provider,model,demotedFrom,direction}>} ordered candidates + */ +function getFallbackChain(currentTier, opts = {}) { + const isAvailable = opts.isProviderAvailable || defaultIsProviderAvailable; + const idx = TIER_ORDER.indexOf(currentTier); + if (idx === -1) return []; + + const higher = TIER_ORDER.slice(idx + 1); // ascending toward REASONING + const lower = TIER_ORDER.slice(0, idx).reverse(); // descending toward SIMPLE + const ordered = [...higher, ...lower]; + + const seen = new Set(); + // Never re-attempt the exact provider:model that just failed. + const current = resolveTier(currentTier, opts.selector); + if (current) seen.add(`${current.provider}:${current.model}`); + + const chain = []; + for (const tier of ordered) { + const resolved = resolveTier(tier, opts.selector); + if (!resolved) continue; + const key = `${resolved.provider}:${resolved.model}`; + if (seen.has(key)) continue; + seen.add(key); + if (!isAvailable(resolved.provider)) continue; + chain.push({ + ...resolved, + demotedFrom: currentTier, + direction: TIER_ORDER.indexOf(tier) > idx ? "up" : "down", + }); + } + + logger.debug( + { currentTier, chain: chain.map((c) => `${c.tier}:${c.provider}`) }, + "[TierFallback] Built fallback chain" + ); + return chain; +} + +module.exports = { getFallbackChain, resolveTier, TIER_ORDER }; diff --git a/src/server.js b/src/server.js index 8af4e98..7b99096 100644 --- a/src/server.js +++ b/src/server.js @@ -29,6 +29,7 @@ const { registerTaskTools } = require("./tools/tasks"); const { registerTestTools } = require("./tools/tests"); const { registerMcpTools } = require("./tools/mcp"); const { registerAgentTaskTool } = require("./tools/agent-task"); +const { registerDecomposeTool } = require("./tools/decompose"); const { initConfigWatcher, getConfigWatcher } = require("./config/watcher"); const { initializeHeadroom, shutdownHeadroom, getHeadroomManager } = require("./headroom"); const { getWorkerPool, isWorkerPoolReady } = require("./workers/pool"); @@ -62,6 +63,7 @@ if (LAZY_TOOLS_ENABLED) { registerTestTools(); registerMcpTools(); registerAgentTaskTool(); + registerDecomposeTool(); logger.info({ mode: "eager" }, "All tools loaded at startup"); } diff --git a/src/tools/decompose.js b/src/tools/decompose.js new file mode 100644 index 0000000..5d30d88 --- /dev/null +++ b/src/tools/decompose.js @@ -0,0 +1,91 @@ +const { registerTool } = require("."); +const { runDecomposedTask } = require("../agents/decomposition"); +const logger = require("../logger"); + +/** + * DecomposeTask tool — breaks a complex task into focused subtasks with isolated + * context, runs them (parallel where independent), and synthesizes the result. + * + * Opt-in: requires TASK_DECOMPOSITION_ENABLED=true and AGENTS_ENABLED=true. + * Degrades gracefully — if the gate decides decomposition isn't worth it (or + * planning fails), it returns ok:true with decomposed:false and a reason so the + * caller can solve the task monolithically. + */ +function registerDecomposeTool() { + registerTool( + "DecomposeTask", + async ({ args = {} }, context = {}) => { + const task = args.task || args.prompt || args.description; + + if (!task || typeof task !== "string") { + return { + ok: false, + status: 400, + content: JSON.stringify({ error: "task is required" }, null, 2), + }; + } + + logger.info( + { task: task.slice(0, 100), sessionId: context.sessionId }, + "DecomposeTask: evaluating task for decomposition" + ); + + try { + const result = await runDecomposedTask(task, { + sessionId: context.sessionId, + cwd: context.cwd, + riskLevel: args.riskLevel || context.riskLevel, + }); + + if (result.decomposed) { + return { + ok: true, + status: 200, + content: result.result, + metadata: { + decomposed: true, + subtasks: result.plan?.subtasks?.length, + levels: result.stats?.levels, + strategy: result.plan?.strategy, + confidence: result.quality?.confidence, + recommendFallback: result.recommendFallback, + savedTokens: result.savings?.savedTokens, + }, + }; + } + + // Not decomposed — signal the caller to solve monolithically. + return { + ok: true, + status: 200, + content: JSON.stringify( + { + decomposed: false, + reason: result.reason, + guidance: "Solve this task directly without decomposition.", + }, + null, + 2 + ), + metadata: { decomposed: false, reason: result.reason }, + }; + } catch (error) { + logger.error({ error: error.message }, "DecomposeTask: error"); + return { + ok: false, + status: 500, + content: JSON.stringify( + { error: "Decomposition error", message: error.message }, + null, + 2 + ), + }; + } + }, + { category: "decompose" } + ); + + logger.info("DecomposeTask tool registered"); +} + +module.exports = { registerDecomposeTool }; diff --git a/src/tools/lazy-loader.js b/src/tools/lazy-loader.js index fcb3e04..c03c9a6 100644 --- a/src/tools/lazy-loader.js +++ b/src/tools/lazy-loader.js @@ -77,6 +77,11 @@ const TOOL_CATEGORIES = { loader: () => require('./agent-task').registerAgentTaskTool, priority: 2, }, + decompose: { + keywords: ['decompose', 'subtask', 'break down', 'break into', 'split task', 'plan and execute'], + loader: () => require('./decompose').registerDecomposeTool, + priority: 2, + }, 'code-mode': { keywords: ['mcp', 'execute', 'server', 'tool', 'code mode'], loader: () => require('./code-mode').registerCodeModeTools, @@ -296,6 +301,9 @@ function loadCategoryForTool(toolName) { // TinyFish (web agent) 'web_agent': 'tinyfish', 'agent_task': 'agentTask', + + // Task decomposition + 'decomposetask': 'decompose', }; // Direct mapping diff --git a/test/output-format-guard.test.js b/test/output-format-guard.test.js new file mode 100644 index 0000000..74b92c8 --- /dev/null +++ b/test/output-format-guard.test.js @@ -0,0 +1,95 @@ +/** + * Tests for the output formatting guard (src/context/output-format-guard.js). + */ + +const { describe, it } = require("node:test"); +const assert = require("node:assert/strict"); + +const { + injectFormatGuard, + producesCleanMarkdown, + appendToSystem, + MARKER, +} = require("../src/context/output-format-guard"); + +describe("output format guard", () => { + describe("producesCleanMarkdown", () => { + it("treats Claude-family models as clean", () => { + assert.equal(producesCleanMarkdown("openrouter", "anthropic/claude-3.5-sonnet"), true); + assert.equal(producesCleanMarkdown("bedrock", "claude-opus-4"), true); + assert.equal(producesCleanMarkdown("databricks", "databricks-claude-sonnet-4-5"), true); + }); + + it("treats azure-anthropic provider as clean regardless of model", () => { + assert.equal(producesCleanMarkdown("azure-anthropic", null), true); + }); + + it("treats non-Claude backends as needing the guard", () => { + assert.equal(producesCleanMarkdown("moonshot", "kimi-k2-turbo-preview"), false); + assert.equal(producesCleanMarkdown("ollama", "qwen2.5-coder:7b"), false); + assert.equal(producesCleanMarkdown("azure-openai", "gpt-4o"), false); + }); + + it("biases toward injecting when the model is unknown", () => { + // Unknown model on a provider that could serve either → inject (not clean). + assert.equal(producesCleanMarkdown("openrouter", null), false); + }); + }); + + describe("appendToSystem", () => { + it("appends to a string system prompt", () => { + const out = appendToSystem("You are helpful.", "GUARD"); + assert.match(out, /You are helpful\./); + assert.match(out, /GUARD/); + }); + + it("becomes the system prompt when empty", () => { + assert.equal(appendToSystem("", "GUARD"), "GUARD"); + assert.equal(appendToSystem(null, "GUARD"), "GUARD"); + }); + + it("appends a block to an array system prompt", () => { + const arr = [{ type: "text", text: "base" }]; + const out = appendToSystem(arr, "GUARD"); + assert.equal(out.length, 2); + assert.equal(out[1].text, "GUARD"); + }); + }); + + describe("injectFormatGuard", () => { + it("injects for a non-Claude backend (string system)", () => { + const body = { system: "base prompt", model: "claude-opus-4-5" }; + injectFormatGuard(body, { provider: "moonshot", model: "kimi-k2-turbo-preview" }); + assert.match(body.system, new RegExp(MARKER.replace(/[[\]]/g, "\\$&"))); + assert.match(body.system, /box-drawing|line-drawing/); + }); + + it("does NOT inject for a Claude backend even if client asked via label", () => { + const body = { system: "base prompt", model: "claude-opus-4-5" }; + injectFormatGuard(body, { provider: "azure-anthropic", model: null }); + assert.equal(body.system, "base prompt"); + }); + + it("keys off resolved model, not the client's requested body.model", () => { + // Client requested claude (label) but Lynkr resolved to kimi → must inject. + const body = { system: "base", model: "claude-opus-4-5" }; + injectFormatGuard(body, { provider: "moonshot", model: "kimi-k2-turbo-preview" }); + assert.match(body.system, new RegExp(MARKER.replace(/[[\]]/g, "\\$&"))); + }); + + it("is idempotent (no double injection)", () => { + const body = { system: "base", model: "kimi" }; + injectFormatGuard(body, { provider: "moonshot", model: "kimi" }); + const once = body.system; + injectFormatGuard(body, { provider: "moonshot", model: "kimi" }); + assert.equal(body.system, once); + }); + + it("handles array-format system prompts", () => { + const body = { system: [{ type: "text", text: "base" }] }; + injectFormatGuard(body, { provider: "ollama", model: "qwen2.5-coder" }); + assert.equal(body.system.length, 2); + assert.match(body.system[1].text, new RegExp(MARKER.replace(/[[\]]/g, "\\$&"))); + }); + }); +}); diff --git a/test/task-decomposition.test.js b/test/task-decomposition.test.js new file mode 100644 index 0000000..ca5ea05 --- /dev/null +++ b/test/task-decomposition.test.js @@ -0,0 +1,315 @@ +/** + * Tests for the task-decomposition feature (src/agents/decomposition/*). + * + * Everything here is deterministic and offline — model calls are injected. + */ + +// Must be set before requiring config-dependent modules. +process.env.TASK_DECOMPOSITION_ENABLED = "true"; +process.env.AGENTS_ENABLED = "true"; +process.env.DATABRICKS_API_KEY = process.env.DATABRICKS_API_KEY || "test-key"; +process.env.DATABRICKS_API_BASE = process.env.DATABRICKS_API_BASE || "http://test.com"; + +const { describe, it } = require("node:test"); +const assert = require("node:assert/strict"); + +const gate = require("../src/agents/decomposition/gate"); +const planner = require("../src/agents/decomposition/planner"); +const dispatcher = require("../src/agents/decomposition/dispatcher"); +const synthesizer = require("../src/agents/decomposition/synthesizer"); +const telemetry = require("../src/agents/decomposition/telemetry"); +const config = require("../src/config"); +const { runDecomposedTask } = require("../src/agents/decomposition"); + +// ── Phase 1: gate ─────────────────────────────────────────────────── + +describe("decomposition gate", () => { + const bigAnalysis = (score, tokens) => ({ + score, + breakdown: { tokens: { estimated: tokens } }, + }); + + it("decomposes a complex, large, divisible task", () => { + const task = + "Add a login endpoint in auth.js, then write tests in auth.test.js, and update the README.md docs."; + const res = gate.shouldDecompose(bigAnalysis(75, 5000), {}, { taskText: task }); + assert.equal(res.decompose, true); + assert.equal(res.reason, "decompose_worthwhile"); + }); + + it("skips low-complexity tasks", () => { + const res = gate.shouldDecompose(bigAnalysis(20, 5000), {}, { + taskText: "fix typo in readme and update docs", + }); + assert.equal(res.decompose, false); + assert.equal(res.reason, "below_complexity_threshold"); + }); + + it("skips tasks too small to amortise overhead", () => { + const res = gate.shouldDecompose(bigAnalysis(80, 500), {}, { + taskText: "implement A and then implement B and update C.js", + }); + assert.equal(res.decompose, false); + assert.equal(res.reason, "too_small_to_amortise_overhead"); + }); + + it("skips non-divisible tasks", () => { + const res = gate.shouldDecompose(bigAnalysis(80, 8000), {}, { + taskText: "explain how this system works", + }); + assert.equal(res.decompose, false); + assert.equal(res.reason, "not_divisible"); + }); + + it("never decomposes high-risk tasks", () => { + const task = "implement A and then implement B and update C.js"; + const res = gate.shouldDecompose(bigAnalysis(90, 9000), {}, { + taskText: task, + riskLevel: "high", + }); + assert.equal(res.decompose, false); + assert.equal(res.reason, "high_risk_skip"); + }); + + it("estimateIndependentUnits counts enumerated and file signals", () => { + assert.ok(gate.estimateIndependentUnits("1. do x\n2. do y\n3. do z") >= 3); + assert.ok(gate.estimateIndependentUnits("update a.js and b.js and c.py") >= 3); + assert.equal(gate.estimateIndependentUnits("a single vague sentence"), 1); + }); +}); + +// ── Phase 2: planner ──────────────────────────────────────────────── + +describe("decomposition planner", () => { + const fakeInvoke = (jsonText) => async () => ({ + json: { content: [{ type: "text", text: jsonText }], usage: { input_tokens: 10, output_tokens: 20 } }, + }); + + it("parses and validates a well-formed plan", async () => { + const json = JSON.stringify({ + strategy: "split by file", + subtasks: [ + { id: "s1", agentType: "Explore", prompt: "find the auth code", dependsOn: [] }, + { id: "s2", agentType: "Fix", prompt: "patch it", dependsOn: ["s1"] }, + ], + }); + const plan = await planner.generatePlan({ task: "x", invoke: fakeInvoke(json) }); + assert.ok(plan); + assert.equal(plan.subtasks.length, 2); + assert.equal(plan.subtasks[1].dependsOn[0], "s1"); + assert.equal(plan.usage.outputTokens, 20); + }); + + it("extracts JSON from prose + fences", () => { + const text = "Sure!\n```json\n{\"strategy\":\"s\",\"subtasks\":[{\"id\":\"s1\",\"prompt\":\"p\"}]}\n```\nDone."; + const obj = planner.extractJsonObject(text); + assert.equal(obj.subtasks[0].id, "s1"); + }); + + it("returns null on dangling dependency", () => { + const bad = { subtasks: [{ id: "s1", prompt: "p", dependsOn: ["nope"] }] }; + assert.equal(planner.validatePlan(bad, 6), null); + }); + + it("returns null on duplicate ids", () => { + const bad = { + subtasks: [ + { id: "s1", prompt: "a" }, + { id: "s1", prompt: "b" }, + ], + }; + assert.equal(planner.validatePlan(bad, 6), null); + }); + + it("coerces unknown agent types to general-purpose", () => { + const ok = { subtasks: [{ id: "s1", agentType: "Wizard", prompt: "p", dependsOn: [] }] }; + const plan = planner.validatePlan(ok, 6); + assert.equal(plan.subtasks[0].agentType, "general-purpose"); + }); + + it("detects cycles", () => { + const cyclic = [ + { id: "a", prompt: "a", dependsOn: ["b"] }, + { id: "b", prompt: "b", dependsOn: ["a"] }, + ]; + assert.equal(planner.hasCycle(cyclic), true); + }); + + it("returns null when model output is not JSON", async () => { + const plan = await planner.generatePlan({ task: "x", invoke: fakeInvoke("no json here") }); + assert.equal(plan, null); + }); +}); + +// ── Phase 3: dispatcher ───────────────────────────────────────────── + +describe("decomposition dispatcher", () => { + it("orders subtasks into dependency levels", () => { + const subtasks = [ + { id: "s1", dependsOn: [] }, + { id: "s2", dependsOn: ["s1"] }, + { id: "s3", dependsOn: ["s1"] }, + { id: "s4", dependsOn: ["s2", "s3"] }, + ]; + const levels = dispatcher.topologicalLevels(subtasks); + assert.deepEqual(levels[0], ["s1"]); + assert.deepEqual(levels[1].sort(), ["s2", "s3"]); + assert.deepEqual(levels[2], ["s4"]); + }); + + it("runs levels in order and forwards dependency results as context", async () => { + const calls = []; + const spawnParallel = async (agentTypes, prompts, opts) => { + calls.push({ agentTypes, prompts, mainContext: opts.mainContext }); + return prompts.map((p) => ({ + success: true, + result: `done:${p}`, + stats: { inputTokens: 5, outputTokens: 3 }, + })); + }; + + const plan = { + subtasks: [ + { id: "s1", agentType: "Explore", prompt: "explore", dependsOn: [] }, + { id: "s2", agentType: "Fix", prompt: "fix", dependsOn: ["s1"] }, + ], + }; + + const out = await dispatcher.dispatchPlan(plan, { spawnParallel }); + assert.equal(out.results.length, 2); + assert.equal(out.results[0].success, true); + assert.equal(out.stats.subagents, 2); + + // s2 ran after s1 and received s1's result as injected context. + const s2Call = calls.find((c) => c.prompts[0] === "fix"); + assert.ok(s2Call.mainContext); + assert.match(s2Call.mainContext.relevant_context, /done:explore/); + }); + + it("buildContextForSubtask returns null for independent subtasks", () => { + const ctx = dispatcher.buildContextForSubtask({ dependsOn: [] }, new Map()); + assert.equal(ctx, null); + }); +}); + +// ── Phase 4: synthesizer ──────────────────────────────────────────── + +describe("decomposition synthesizer", () => { + it("synthesizes via the model", async () => { + const invoke = async () => ({ + json: { content: [{ type: "text", text: "final answer" }], usage: {} }, + }); + const out = await synthesizer.synthesize({ + task: "t", + subtaskResults: [{ id: "s1", agentType: "Fix", success: true, result: "r1" }], + invoke, + }); + assert.equal(out.text, "final answer"); + assert.equal(out.fallback, false); + }); + + it("falls back to concatenation when synthesis throws", async () => { + const invoke = async () => { + throw new Error("boom"); + }; + const out = await synthesizer.synthesize({ + task: "t", + subtaskResults: [{ id: "s1", agentType: "Fix", success: true, result: "r1" }], + invoke, + }); + assert.equal(out.fallback, true); + assert.match(out.text, /r1/); + }); + + it("reports failure when all subtasks failed", async () => { + const out = await synthesizer.synthesize({ + task: "t", + subtaskResults: [{ id: "s1", agentType: "Fix", success: false, error: "x" }], + }); + assert.equal(out.fallback, true); + }); +}); + +// ── Phase 6: telemetry ────────────────────────────────────────────── + +describe("decomposition telemetry", () => { + it("computes net savings (positive = cheaper)", () => { + const s = telemetry.estimateSavings({ + monolithicTokens: 10000, + planUsage: { inputTokens: 100, outputTokens: 200 }, + dispatchStats: { inputTokens: 1000, outputTokens: 500 }, + synthUsage: { inputTokens: 300, outputTokens: 400 }, + }); + assert.equal(s.decomposedTokens, 2500); + assert.equal(s.savedTokens, 7500); + }); +}); + +// ── Orchestration ─────────────────────────────────────────────────── + +describe("runDecomposedTask orchestration", () => { + const inject = { + analyze: async () => ({ score: 80, breakdown: { tokens: { estimated: 8000 } } }), + generatePlan: async () => ({ + strategy: "split", + subtasks: [ + { id: "s1", agentType: "Explore", prompt: "p1", dependsOn: [] }, + { id: "s2", agentType: "Fix", prompt: "p2", dependsOn: ["s1"] }, + ], + usage: { inputTokens: 50, outputTokens: 60 }, + }), + dispatchPlan: async () => ({ + results: [ + { id: "s1", agentType: "Explore", success: true, result: "found it" }, + { id: "s2", agentType: "Fix", success: true, result: "fixed it" }, + ], + levels: [["s1"], ["s2"]], + stats: { subagents: 2, inputTokens: 200, outputTokens: 100 }, + }), + synthesize: async () => ({ text: "all done, fixed it correctly", fallback: false, usage: { inputTokens: 80, outputTokens: 40 } }), + }; + + it("decomposes a qualifying task end-to-end", async () => { + const task = "implement A in a.js and then test it in a.test.js and document it in README.md"; + const res = await runDecomposedTask(task, { _inject: inject }); + assert.equal(res.decomposed, true); + assert.match(res.result, /fixed it/); + assert.equal(res.plan.subtasks.length, 2); + assert.ok(typeof res.quality.confidence === "number"); + assert.ok(res.savings.savedTokens !== undefined); + }); + + it("skips via the real gate for trivial tasks", async () => { + const res = await runDecomposedTask("say hello", { + _inject: { ...inject, analyze: async () => ({ score: 10, breakdown: { tokens: { estimated: 100 } } }) }, + }); + assert.equal(res.decomposed, false); + assert.equal(res.reason, "below_complexity_threshold"); + }); + + it("respects shadow mode (logs but does not decompose)", async () => { + const original = config.taskDecomposition.shadow; + config.taskDecomposition.shadow = true; + try { + const task = "implement A in a.js and then test it in a.test.js and document README.md"; + const res = await runDecomposedTask(task, { _inject: inject }); + assert.equal(res.decomposed, false); + assert.equal(res.reason, "shadow_mode"); + assert.ok(res.gate); + } finally { + config.taskDecomposition.shadow = original; + } + }); + + it("returns disabled when feature flag is off", async () => { + const original = config.taskDecomposition.enabled; + config.taskDecomposition.enabled = false; + try { + const res = await runDecomposedTask("anything", { _inject: inject }); + assert.equal(res.decomposed, false); + assert.equal(res.reason, "disabled"); + } finally { + config.taskDecomposition.enabled = original; + } + }); +}); diff --git a/test/tier-fallback.test.js b/test/tier-fallback.test.js new file mode 100644 index 0000000..0954402 --- /dev/null +++ b/test/tier-fallback.test.js @@ -0,0 +1,100 @@ +/** + * Tests for tier-aware escalate-then-demote fallback (src/routing/tier-fallback.js). + */ + +process.env.DATABRICKS_API_KEY = process.env.DATABRICKS_API_KEY || "test-key"; +process.env.DATABRICKS_API_BASE = process.env.DATABRICKS_API_BASE || "http://test.com"; + +const { describe, it } = require("node:test"); +const assert = require("node:assert/strict"); + +const { getFallbackChain, TIER_ORDER } = require("../src/routing/tier-fallback"); + +// Fake selector mapping tiers → provider:model (distinct providers per tier). +const MAP = { + SIMPLE: { provider: "ollama", model: "minimax" }, + MEDIUM: { provider: "llamacpp", model: "qwen" }, + COMPLEX: { provider: "moonshot", model: "kimi" }, + REASONING: { provider: "azure-openai", model: "gpt-5" }, +}; +const selector = { selectModel: (tier) => MAP[tier] }; +const allAvailable = () => true; + +function chainTiers(tier, opts = {}) { + return getFallbackChain(tier, { selector, isProviderAvailable: allAvailable, ...opts }).map( + (c) => c.tier + ); +} + +describe("tier-fallback chain (escalate-then-demote)", () => { + it("COMPLEX → up to REASONING, then down to MEDIUM, SIMPLE", () => { + assert.deepEqual(chainTiers("COMPLEX"), ["REASONING", "MEDIUM", "SIMPLE"]); + }); + + it("REASONING (top) → only downward", () => { + assert.deepEqual(chainTiers("REASONING"), ["COMPLEX", "MEDIUM", "SIMPLE"]); + }); + + it("MEDIUM → up (COMPLEX, REASONING) then down (SIMPLE)", () => { + assert.deepEqual(chainTiers("MEDIUM"), ["COMPLEX", "REASONING", "SIMPLE"]); + }); + + it("SIMPLE (floor) → only upward", () => { + assert.deepEqual(chainTiers("SIMPLE"), ["MEDIUM", "COMPLEX", "REASONING"]); + }); + + it("marks direction up/down correctly", () => { + const chain = getFallbackChain("COMPLEX", { selector, isProviderAvailable: allAvailable }); + assert.equal(chain.find((c) => c.tier === "REASONING").direction, "up"); + assert.equal(chain.find((c) => c.tier === "SIMPLE").direction, "down"); + }); + + it("skips providers whose circuit is unavailable", () => { + const isAvailable = (p) => p !== "azure-openai"; // REASONING down + const chain = getFallbackChain("COMPLEX", { selector, isProviderAvailable: isAvailable }); + assert.deepEqual(chain.map((c) => c.tier), ["MEDIUM", "SIMPLE"]); + }); + + it("dedups identical provider:model across tiers", () => { + const dupMap = { + SIMPLE: { provider: "ollama", model: "minimax" }, + MEDIUM: { provider: "ollama", model: "minimax" }, // same as SIMPLE + COMPLEX: { provider: "moonshot", model: "kimi" }, + REASONING: { provider: "azure-openai", model: "gpt-5" }, + }; + const chain = getFallbackChain("COMPLEX", { + selector: { selectModel: (t) => dupMap[t] }, + isProviderAvailable: allAvailable, + }); + // MEDIUM and SIMPLE collapse to one ollama:minimax entry. + const ollama = chain.filter((c) => c.provider === "ollama"); + assert.equal(ollama.length, 1); + }); + + it("never re-attempts the failed tier's own provider:model", () => { + // If REASONING maps to the same provider:model as COMPLEX, it's excluded. + const sameMap = { ...MAP, REASONING: { provider: "moonshot", model: "kimi" } }; + const chain = getFallbackChain("COMPLEX", { + selector: { selectModel: (t) => sameMap[t] }, + isProviderAvailable: allAvailable, + }); + assert.ok(!chain.some((c) => c.provider === "moonshot")); + }); + + it("skips tiers that aren't configured (selector throws)", () => { + const partial = { selectModel: (t) => { + if (t === "REASONING") throw new Error("TIER_REASONING not configured"); + return MAP[t]; + } }; + const chain = getFallbackChain("COMPLEX", { selector: partial, isProviderAvailable: allAvailable }); + assert.deepEqual(chain.map((c) => c.tier), ["MEDIUM", "SIMPLE"]); + }); + + it("returns empty for an unknown tier", () => { + assert.deepEqual(getFallbackChain("BOGUS", { selector, isProviderAvailable: allAvailable }), []); + }); + + it("TIER_ORDER is SIMPLE→REASONING", () => { + assert.deepEqual(TIER_ORDER, ["SIMPLE", "MEDIUM", "COMPLEX", "REASONING"]); + }); +});