diff --git a/CHANGELOG.md b/CHANGELOG.md index cd28a03..c973980 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,18 @@ All notable changes to this project are documented here. Versions follow [Semantic Versioning](https://semver.org/). Dates are ISO 8601. +## [0.4.1] - 2026-06-23 + +### Fixed +- **The selected provider now actually applies the task.** Built-in agent personas hard-coded `claude-local`, so every apply/seed run spawned `claude` regardless of the provider you picked in Settings → Providers — surfacing as `spawn claude ENOENT` when claude wasn't installed (e.g. you'd selected Copilot). Built-in personas now inherit the global active provider; an explicit per-persona pin in `.annotask/agents.json` still wins. Verified live across all four CLIs (claude/codex/opencode/copilot). +- **Per-agent provider/model pins persist.** Setting a persona's model in Settings → Agents was silently wiped whenever the provider's live model catalog couldn't be enumerated (Copilot's interactive-only picker is the canonical case) — and a saved value displayed as empty. The model field now keeps and shows the saved value; a stale id from a *different* provider is cleared on provider change instead, never on catalog-fetch failure. +- **Reloading the page no longer destroys an in-flight apply.** An applying CLI was bound to the browser tab: a reload tore down the SSE, killed the child mid-edit, and reverted the task to `pending`. The spawn server now keeps a task-bearing run alive across a client disconnect (detach grace) and finalizes it on the child's own exit — a clean exit lands the task in `review`, an interrupted/failed run reverts to `pending`. The client no longer reverts the task on `pagehide` and warns before an accidental reload. A stale orphan-finalize can no longer clobber a newer run for the same task. +- **Auto-run reliability.** The headless auto-run driver logs when its single-run guard blocks a drain (previously silent), and bounds each run so a hung provider can't pin the queue and stall every later task. +- **Conversation rendering.** Agent messages now style the full Markdown surface (lists, headings, blockquotes, links, tables) instead of only paragraphs/code; wide tool output and long tokens no longer scroll the whole panel (`min-width: 0` + code wrapping); the "agent paused for input" banner label meets contrast; and rendered links open in a new tab with `rel="noopener"`. + +### Changed +- **Seed prompt carries the task inline.** Apply runs now embed a compact `Task grounding` block (file/line/component/route + per-type context, via the shared task-summary) in the seed prompt, so the agent applies directly instead of reflexively calling `annotask_get_task`. Heavy fields (screenshot, rendered HTML, interaction history) stay behind their MCP tools. + ## [0.4.0] - 2026-06-22 ### Fixed diff --git a/package.json b/package.json index 2690fd8..93e3e3c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "annotask", - "version": "0.4.0", + "version": "0.4.1", "description": "Visual UI design tool for web apps. Make changes in the browser and generate structured reports that AI agents can apply to source code. Works with Vue, React, Svelte, SolidJS, and plain HTML via Vite or Webpack.", "license": "MIT", "type": "module", diff --git a/src/server/__tests__/agent-spawn-http.test.ts b/src/server/__tests__/agent-spawn-http.test.ts index 8de2161..283330b 100644 --- a/src/server/__tests__/agent-spawn-http.test.ts +++ b/src/server/__tests__/agent-spawn-http.test.ts @@ -300,6 +300,39 @@ describe('agent-spawn over HTTP (real middleware + real spawn handler)', () => { child.finish(143) // simulate the SIGTERM landing so the run drains }) + it('keeps the child alive on client disconnect when the run carries a taskId (reload survival)', async () => { + const before = spawned.length + const clientReq = http.request({ + method: 'POST', + hostname: 'localhost', + port: serverPort(), + path: SPAWN_PATH, + headers: { 'Content-Type': 'application/json' }, + }) + clientReq.on('error', () => { /* expected after destroy */ }) + clientReq.on('response', (res) => { + res.on('data', () => { /* keep the stream flowing */ }) + res.on('error', () => { /* expected after destroy */ }) + }) + clientReq.end(JSON.stringify({ cli: 'claude', args: [], taskId: 'task-reload' })) + + await waitFor(() => spawned.length > before, 'child spawn') + const child = spawned[spawned.length - 1] + expect(child.killed).toBe(false) + + clientReq.destroy() + // Apply runs get a detach grace so a page reload doesn't destroy in-flight + // work — the child must still be alive shortly after the disconnect (the + // no-taskId case above is killed immediately). + await new Promise((r) => setTimeout(r, 80)) + expect(child.killed).toBe(false) + expect(agentSpawn.registry.taskRunning('task-reload')).toBe(true) + + // The child exiting on its own drains the run (afterEach also guards this). + child.finish(0) + await waitFor(() => agentSpawn.registry.size() === 0, 'run drains after child exit') + }) + describe('ANNOTASK_MAX_PERMISSION cap at the route level', () => { it('403s a bypass spawn under cap=default — canonical flag AND synonym spellings', async () => { process.env.ANNOTASK_MAX_PERMISSION = 'default' diff --git a/src/server/__tests__/agent-spawn.test.ts b/src/server/__tests__/agent-spawn.test.ts index 8e2d1f8..a0ed3aa 100644 --- a/src/server/__tests__/agent-spawn.test.ts +++ b/src/server/__tests__/agent-spawn.test.ts @@ -278,6 +278,42 @@ describe('run registry — per-task dedup + orphan hook', () => { await p expect(ended).toEqual([]) // no taskId → no orphan reconciliation }) + + it('reports a clean self-exit as { killed:false, exitCode:0 } so the finalizer can land review', async () => { + const ends: Array<{ killed: boolean; exitCode: number | null }> = [] + const child = new FakeChild() + const handler = createAgentSpawnHandler({ spawnImpl: fakeSpawn(child), onRunEnd: (_id, info) => ends.push(info) }) + const p = handler.handleSpawn(fakeReq(), fakeRes(), { cli: 'claude', args: [], taskId: 'task-clean' }, '/tmp') + child.finish(0) + await p + expect(ends).toEqual([{ killed: false, exitCode: 0 }]) + }) + + it('does NOT kill an apply run (taskId) on client disconnect — keeps it alive across a reload', async () => { + const child = new FakeChild() + const handler = createAgentSpawnHandler({ spawnImpl: fakeSpawn(child) }) + const req = fakeReq() + const p = handler.handleSpawn(req, fakeRes(), { cli: 'claude', args: [], taskId: 'task-detach' }, '/tmp') + ;(req as unknown as EventEmitter).emit('close') + // The detach grace keeps the child alive (a chat run would be killed — next test). + expect(child.killed).toBe(false) + expect(handler.registry.taskRunning('task-detach')).toBe(true) + // Its own clean exit drains the run and clears the grace timer. + child.finish(0) + await p + expect(handler.registry.taskRunning('task-detach')).toBe(false) + }) + + it('kills a chat run (no taskId) immediately on client disconnect', async () => { + const child = new FakeChild() + const handler = createAgentSpawnHandler({ spawnImpl: fakeSpawn(child) }) + const req = fakeReq() + const p = handler.handleSpawn(req, fakeRes(), { cli: 'claude', args: [] }, '/tmp') + ;(req as unknown as EventEmitter).emit('close') + expect(child.killed).toBe(true) + child.finish(143) + await p + }) }) describe('origin policy', () => { diff --git a/src/server/agent-spawn.ts b/src/server/agent-spawn.ts index ee5618f..8b67521 100644 --- a/src/server/agent-spawn.ts +++ b/src/server/agent-spawn.ts @@ -63,6 +63,18 @@ const MAX_SPAWN_DURATION_MS = 15 * 60_000 * at once. Excess spawns are refused with a `too_many_agents` error. */ const MAX_CONCURRENT_SPAWNS = 4 +/** + * Grace window to keep a CLI child alive after its client SSE disconnects, + * WHEN the run is applying a task (carries a taskId). A page reload tears down + * the SSE; without this the child is SIGTERM'd mid-edit and the in-flight work + * is lost. With it, the child keeps running and the server finalizes the task + * on the child's own exit (see `onRunEnd` → index.ts: clean exit → review, + * otherwise → pending). Chat runs (no taskId) are bound to their tab and are + * still killed the instant the client disconnects. Kept beneath + * MAX_SPAWN_DURATION_MS and below the boot-reclaim age so a client that never + * returns is still cleaned up promptly. + */ +const DETACH_GRACE_MS = 90_000 /** * PATH passed to spawned CLIs. Appends `ANNOTASK_HOST_PATH` (if set) so @@ -143,12 +155,16 @@ export interface AgentSpawnOptions { * the orchestrating tab closed mid-run) — it grace-checks "still in_progress?" * so a normal completion (client about to set `review`) is a no-op. * - * `info.killed` is true when WE terminated the child (client disconnect, - * explicit abort, or the duration backstop) and false when it exited on its - * own — lets the finalizer word its reason honestly instead of always - * blaming a closed tab. + * `info.killed` is true when WE terminated the child (explicit abort, the + * detach-grace expiry, or the duration backstop) and false when it exited on + * its own — lets the finalizer word its reason honestly instead of always + * blaming a closed tab. `info.exitCode` is the child's exit status (null when + * killed/signalled): a clean `0` from a child that exited on its own means + * the apply finished even though the client never finalized it (e.g. the tab + * reloaded mid-run), so the finalizer can land the task in `review` instead + * of wrongly reverting completed work to `pending`. */ - onRunEnd?: (taskId: string, info: { killed: boolean }) => void + onRunEnd?: (taskId: string, info: { killed: boolean; exitCode: number | null }) => void /** Test seam: inject a child-process factory (defaults to node:child_process spawn). */ spawnImpl?: typeof spawn } @@ -423,6 +439,13 @@ export function createAgentSpawnHandler(opts: AgentSpawnOptions = {}): AgentSpaw } let killed = false + // Set once the run has fully ended (child closed + cleanup running). Guards + // onClientGone so the `res.end()` in cleanup — which itself fires res + // 'close' — can't re-enter and arm a pointless detach timer on a dead run. + let runEnded = false + // Detach-grace timer: armed when an apply run's client disconnects, kills + // the child if no one returns within DETACH_GRACE_MS. + let detachTimer: ReturnType | null = null // Signal the whole process group on POSIX (the child was spawned detached, // i.e. as a group leader) so grandchildren the agent forked die with it. // Falls back to the direct child.kill when there's no pid (spawn-time @@ -436,9 +459,28 @@ export function createAgentSpawnHandler(opts: AgentSpawnOptions = {}): AgentSpaw function killChild() { if (killed) return killed = true + if (detachTimer) { clearTimeout(detachTimer); detachTimer = null } killGroup('SIGTERM') setTimeout(() => { killGroup('SIGKILL') }, KILL_GRACE_MS).unref() } + // Client SSE went away (tab closed, navigated, or RELOADED). For a chat run + // the child is bound to its tab — kill it. For an APPLY run (has a taskId) + // a reload would otherwise destroy in-flight edits, so keep the child alive + // for a grace window: it finishes and the server finalizes the task on its + // exit. The child keeps writing to a dead socket during the grace, which is + // harmless (write() swallows EPIPE). Kill on grace expiry. + // Capture the (narrowed) taskId in a local — TS doesn't carry the + // `typeof parsed === 'string'` narrowing into the nested closure below. + const runTaskId = parsed.taskId + let clientGone = false + function onClientGone() { + if (clientGone || runEnded) return + clientGone = true + if (!runTaskId) { killChild(); return } + if (detachTimer || killed) return + detachTimer = setTimeout(() => { killChild() }, DETACH_GRACE_MS) + detachTimer.unref?.() + } active.set(runId, { child, kill: killChild, taskId: parsed.taskId }) // Pipe stdin if provided. Attach the async error listener BEFORE writing: @@ -463,10 +505,11 @@ export function createAgentSpawnHandler(opts: AgentSpawnOptions = {}): AgentSpaw }, KEEPALIVE_INTERVAL_MS) keepalive.unref() - // Abort on client disconnect. Belt-and-suspenders: some proxies close - // the response socket rather than the request, so we listen on both. - req.on('close', () => { killChild() }) - res.on('close', () => { killChild() }) + // Client disconnect (incl. page reload). Belt-and-suspenders: some proxies + // close the response socket rather than the request, so we listen on both. + // For apply runs this starts the detach grace rather than an instant kill. + req.on('close', () => { onClientGone() }) + res.on('close', () => { onClientGone() }) // Absolute duration backstop — kill a child that outlives the ceiling // (e.g. its client died without closing the socket). Cleared on close. @@ -523,6 +566,7 @@ export function createAgentSpawnHandler(opts: AgentSpawnOptions = {}): AgentSpaw }) }) + let exitCode: number | null = null await new Promise((resolve) => { let resolved = false const finish = () => { if (!resolved) { resolved = true; resolve() } } @@ -532,6 +576,7 @@ export function createAgentSpawnHandler(opts: AgentSpawnOptions = {}): AgentSpaw finish() }) child.on('close', (code, signal) => { + exitCode = typeof code === 'number' ? code : null // Flush trailing stdout/stderr (no terminating newline). if (stdoutBuf.length > 0) write(res, 'stdout', stdoutBuf) if (stderrBuf.length > 0) write(res, 'stderr', stderrBuf) @@ -540,16 +585,23 @@ export function createAgentSpawnHandler(opts: AgentSpawnOptions = {}): AgentSpaw }) }) + // Mark ended BEFORE res.end() (which fires res 'close') so the disconnect + // handler can't arm a detach timer on this already-finished run. + runEnded = true clearInterval(keepalive) clearTimeout(maxDuration) + if (detachTimer) { clearTimeout(detachTimer); detachTimer = null } active.delete(runId) if (parsed.taskId && byTask.get(parsed.taskId) === runId) byTask.delete(parsed.taskId) try { res.end() } catch { /* already ended */ } - // Report the run end so a task the client never finalized (orphaned by a - // closed tab) can be reconciled server-side. Fired for normal completions - // too — the handler grace-checks status, so those no-op. + // Report the run end so a task the client never finalized can be reconciled + // server-side. `killed` distinguishes a terminated child from one that + // exited on its own; `exitCode` lets the finalizer land a detached-but- + // completed apply (clean exit) in `review` instead of reverting it to + // `pending`. Fired for normal completions too — the handler grace-checks + // status, so a client that already finalized makes this a no-op. if (parsed.taskId) { - try { opts.onRunEnd?.(parsed.taskId, { killed }) } catch { /* never let a sink break cleanup */ } + try { opts.onRunEnd?.(parsed.taskId, { killed, exitCode }) } catch { /* never let a sink break cleanup */ } } } diff --git a/src/server/index.ts b/src/server/index.ts index 03d1751..489d637 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -122,6 +122,7 @@ export function createAnnotaskServer(options: AnnotaskServerOptions): AnnotaskSe if (res && typeof res === 'object' && 'error' in res) return // eslint-disable-next-line no-console console.warn(`[annotask] reconciled orphaned task ${taskId} → pending (${why})`) + void finalizeFrozenPartial(taskId) if (before?.type === 'wireframe_apply') { try { await releaseApplyTask(applyOptions, taskId) } catch (err) { @@ -134,18 +135,76 @@ export function createAnnotaskServer(options: AnnotaskServerOptions): AnnotaskSe console.warn(`[annotask] orphan reconcile failed for task ${taskId}:`, err) }) } + /** + * A detached apply run exited cleanly (code 0) but its client never flipped + * the status — the orchestrating tab reloaded/closed mid-run while the CLI + * (kept alive by the spawn detach-grace) finished and wrote the files. Land + * the task in `review` (the state the client would have set) so completed + * work isn't reverted to `pending` and re-applied. Guarded to in_progress so + * a returning client that finalizes first still wins. + */ + function finalizeDetachedReview(taskId: string): void { + void state.updateTask( + taskId, + { status: 'review', resolution: 'Applied by the embedded agent; finalized server-side after the tab was closed or reloaded mid-run. Review the changes.' }, + { guard: (t) => (t.status === 'in_progress' ? null : 'task already finalized') }, + ).then((res) => { + if (res && typeof res === 'object' && 'error' in res) return + // eslint-disable-next-line no-console + console.warn(`[annotask] finalized detached run ${taskId} → review (clean exit)`) + void finalizeFrozenPartial(taskId) + }).catch((err) => { + // eslint-disable-next-line no-console + console.warn(`[annotask] detached review finalize failed for task ${taskId}:`, err) + }) + } + /** + * Clear a stuck `isPartial:true` assistant turn left behind when the + * streaming client vanished mid-run (only the client run loop ever clears it, + * so without this the Conversation tab shows a turn frozen mid-stream + * forever). Best-effort. + */ + async function finalizeFrozenPartial(taskId: string): Promise { + try { + const msgs = await taskThread.read(taskId) + for (let i = msgs.length - 1; i >= 0; i--) { + if (msgs[i].isPartial) { + await taskThread.update(taskId, msgs[i].id, { isPartial: false }) + break + } + } + } catch (err) { + // eslint-disable-next-line no-console + console.warn(`[annotask] clearing frozen partial failed for task ${taskId}:`, err) + } + } // The registry reports every run end; we grace-check a moment later (a normal // completion's client review/pending PATCH lands well within this window, so - // it no-ops) and, if the task is still `in_progress`, reconcile it. `killed` - // distinguishes an interrupted run (we terminated the child) from a child - // that exited on its own while the client failed to finalize — for honest - // logging; both reconcile to `pending`. + // it no-ops) and, if the task is still `in_progress`, finalize it. A child + // that exited on its own with code 0 finished the apply (the client just + // never flipped status — e.g. its tab reloaded) → land in `review`. Anything + // else — a non-zero exit, or a child WE killed (detach-grace expiry, abort, + // duration backstop) — reverts to `pending` so it stays retryable. + // wireframe_apply always takes the pending path: its apply-batch lifecycle is + // reconciled there and a server-side review flip would bypass it. const ORPHAN_FINALIZE_GRACE_MS = 12_000 - function scheduleOrphanFinalize(taskId: string, info: { killed: boolean }): void { + function scheduleOrphanFinalize(taskId: string, info: { killed: boolean; exitCode: number | null }): void { setTimeout(() => { - const task = state.getTasks().tasks.find((t: { id?: string; status?: string }) => t?.id === taskId) + const task = state.getTasks().tasks.find((t: { id?: string; status?: string; type?: string }) => t?.id === taskId) if (!task || task.status !== 'in_progress') return - reconcileOrphanedTask(taskId, info.killed ? 'run interrupted' : 'client never finalized') + // A NEWER run already owns this task (a re-spawn landed in the window + // between this run's child-exit — which cleared the byTask reservation — + // and this delayed callback). Our finalize is stale: flipping the task + // now would clobber the live run mid-edit. Bail and let that run finalize + // itself. The finalizer is otherwise run-identity-unaware (it keys only + // on taskId + status), so this guard is what prevents the clobber. + if (agentSpawn.registry.taskRunning(taskId)) return + const cleanCompletion = !info.killed && info.exitCode === 0 && task.type !== 'wireframe_apply' + if (cleanCompletion) { + finalizeDetachedReview(taskId) + } else { + reconcileOrphanedTask(taskId, info.killed ? 'run interrupted' : 'client never finalized') + } }, ORPHAN_FINALIZE_GRACE_MS).unref() } const agentSpawn = createAgentSpawnHandler({ onRunEnd: scheduleOrphanFinalize }) diff --git a/src/shell/components/AgentDirectionsPanel.vue b/src/shell/components/AgentDirectionsPanel.vue index 99c910d..28bb3b1 100644 --- a/src/shell/components/AgentDirectionsPanel.vue +++ b/src/shell/components/AgentDirectionsPanel.vue @@ -92,15 +92,18 @@ {{ effectiveModel }} (custom) - + { void agentModels.ensure(id) }, { immediate: true }) -// When the picker enters the "Cannot fetch models" state and the persona -// has a stale model id saved, reset it to Auto (empty) so the input starts -// clean instead of pre-filling a value the live probe couldn't confirm. -watch(modelsBlocked, (blocked) => { - if (!blocked) return - if (!selected.value) return - if (!effectiveModel.value) return - void persistRuntime({ model: '' }) -}) +// NB: we deliberately do NOT auto-reset the saved model when the catalog can't +// be fetched (modelsBlocked). A model id the live probe can't enumerate (e.g. a +// Copilot-hosted model) is still a valid, user-entered value — wiping it here +// was a data-loss bug that made per-agent model pins silently fail to persist. +// A genuinely stale id left over from a DIFFERENT provider is cleared on +// provider change instead (see onProviderChange). async function persistRuntime(patch: { providerId?: ProviderId; model?: string; effort?: EffortLevel }) { if (!selected.value) return @@ -333,11 +333,18 @@ async function persistRuntime(patch: { providerId?: ProviderId; model?: string; function onProviderChange(value: string) { if (!CLI_PROVIDER_OPTIONS.some(p => p.id === value)) return const nextProvider = value as ProviderId + // A model id is provider-specific, so clear it when the provider changes — + // this is the ONE place a stale cross-provider model is dropped (it replaces + // the old modelsBlocked auto-wipe). The user then picks/enters the new + // provider's model, which now persists. + const patch: { providerId: ProviderId; model: string; effort?: EffortLevel } = { + providerId: nextProvider, + model: '', + } // If the current effort isn't in the new provider's supported set, snap // to its first option (usually 'auto') so the select doesn't sit on a // value the dropdown can't render. const supportedEfforts = EFFORTS_BY_PROVIDER[nextProvider] ?? EFFORT_LEVELS - const patch: { providerId: ProviderId; effort?: EffortLevel } = { providerId: nextProvider } if (!supportedEfforts.includes(effectiveEffort.value)) { patch.effort = supportedEfforts[0] ?? 'auto' } diff --git a/src/shell/components/ConversationMessage.vue b/src/shell/components/ConversationMessage.vue index b55ecb7..de76c69 100644 --- a/src/shell/components/ConversationMessage.vue +++ b/src/shell/components/ConversationMessage.vue @@ -122,6 +122,10 @@ const lastIndex = computed(() => renderPlan.value.length - 1) padding: 10px 12px; border-radius: 8px; background: var(--surface-2); + /* Let wide children (code blocks, long tokens) shrink instead of forcing the + whole conversation scroller to scroll horizontally. Flex items default to + min-width:auto, which refuses to shrink below content width. */ + min-width: 0; } .cv-msg.role-user { background: color-mix(in srgb, var(--accent) 10%, var(--surface-2)); } .cv-msg.role-assistant { background: var(--surface-2); } @@ -142,7 +146,7 @@ const lastIndex = computed(() => renderPlan.value.length - 1) .cv-msg-model, .cv-msg-ts { font-weight: 500; } -.cv-msg-body { font-size: 13px; color: var(--text); line-height: 1.55; display: flex; flex-direction: column; gap: 8px; } +.cv-msg-body { font-size: 13px; color: var(--text); line-height: 1.55; display: flex; flex-direction: column; gap: 8px; min-width: 0; } .cv-msg-text { white-space: pre-wrap; word-break: break-word; } :deep(.cv-msg-markdown p) { margin: 0 0 8px; } @@ -151,7 +155,8 @@ const lastIndex = computed(() => renderPlan.value.length - 1) background: var(--surface-3); border-radius: 4px; padding: 8px 10px; - overflow-x: auto; + overflow: auto; + max-height: 360px; font-size: 12px; } :deep(.cv-msg-markdown code) { @@ -159,8 +164,28 @@ const lastIndex = computed(() => renderPlan.value.length - 1) border-radius: 3px; padding: 1px 4px; font-size: 12px; + word-break: break-word; + overflow-wrap: anywhere; } -:deep(.cv-msg-markdown pre code) { background: transparent; padding: 0; } +:deep(.cv-msg-markdown pre code) { background: transparent; padding: 0; word-break: normal; overflow-wrap: normal; } +/* Full GFM surface — safeMd emits lists/headings/blockquotes/links/tables, none + of which were styled, so agent prose rendered with raw browser defaults. */ +:deep(.cv-msg-markdown ul), :deep(.cv-msg-markdown ol) { margin: 4px 0 8px; padding-left: 20px; } +:deep(.cv-msg-markdown li) { margin: 2px 0; } +:deep(.cv-msg-markdown li > p) { margin: 0; } +:deep(.cv-msg-markdown h1), :deep(.cv-msg-markdown h2), :deep(.cv-msg-markdown h3), +:deep(.cv-msg-markdown h4), :deep(.cv-msg-markdown h5), :deep(.cv-msg-markdown h6) { + margin: 10px 0 6px; font-size: 13px; font-weight: 600; color: var(--text); line-height: 1.3; +} +:deep(.cv-msg-markdown > :first-child) { margin-top: 0; } +:deep(.cv-msg-markdown a) { color: var(--text-link); text-decoration: underline; word-break: break-word; } +:deep(.cv-msg-markdown strong) { font-weight: 600; color: var(--text); } +:deep(.cv-msg-markdown blockquote) { margin: 8px 0; padding: 2px 12px; border-left: 3px solid var(--border); color: var(--text-muted); } +:deep(.cv-msg-markdown hr) { border: none; border-top: 1px solid var(--border); margin: 10px 0; } +:deep(.cv-msg-markdown table) { display: block; max-width: 100%; overflow-x: auto; border-collapse: collapse; margin: 8px 0; } +:deep(.cv-msg-markdown th), :deep(.cv-msg-markdown td) { border: 1px solid var(--border); padding: 3px 8px; text-align: left; } +:deep(.cv-msg-markdown th) { background: var(--surface-3); font-weight: 600; } +:deep(.cv-msg-markdown img) { max-width: 100%; height: auto; } .cv-cursor { display: inline-block; diff --git a/src/shell/components/ConversationStreamBlock.vue b/src/shell/components/ConversationStreamBlock.vue index 0b53431..70cc146 100644 --- a/src/shell/components/ConversationStreamBlock.vue +++ b/src/shell/components/ConversationStreamBlock.vue @@ -114,6 +114,8 @@ const hasPairedResult = computed(() => color: var(--text); line-height: 1.55; word-break: break-word; + /* Allow wide children to shrink rather than scroll the whole conversation. */ + min-width: 0; } :deep(.csb-markdown p) { margin: 0 0 8px; } :deep(.csb-markdown p:last-child) { margin-bottom: 0; } @@ -121,7 +123,8 @@ const hasPairedResult = computed(() => background: var(--surface-3); border-radius: 4px; padding: 8px 10px; - overflow-x: auto; + overflow: auto; + max-height: 360px; font-size: 12px; } :deep(.csb-markdown code) { @@ -129,8 +132,28 @@ const hasPairedResult = computed(() => border-radius: 3px; padding: 1px 4px; font-size: 12px; + word-break: break-word; + overflow-wrap: anywhere; } -:deep(.csb-markdown pre code) { background: transparent; padding: 0; } +:deep(.csb-markdown pre code) { background: transparent; padding: 0; word-break: normal; overflow-wrap: normal; } +/* Full GFM surface — this is the LIVE/streaming agent-text path, so unstyled + lists/headings/links were the most visible. Mirrors ConversationMessage. */ +:deep(.csb-markdown ul), :deep(.csb-markdown ol) { margin: 4px 0 8px; padding-left: 20px; } +:deep(.csb-markdown li) { margin: 2px 0; } +:deep(.csb-markdown li > p) { margin: 0; } +:deep(.csb-markdown h1), :deep(.csb-markdown h2), :deep(.csb-markdown h3), +:deep(.csb-markdown h4), :deep(.csb-markdown h5), :deep(.csb-markdown h6) { + margin: 10px 0 6px; font-size: 13px; font-weight: 600; color: var(--text); line-height: 1.3; +} +:deep(.csb-markdown > :first-child) { margin-top: 0; } +:deep(.csb-markdown a) { color: var(--text-link); text-decoration: underline; word-break: break-word; } +:deep(.csb-markdown strong) { font-weight: 600; color: var(--text); } +:deep(.csb-markdown blockquote) { margin: 8px 0; padding: 2px 12px; border-left: 3px solid var(--border); color: var(--text-muted); } +:deep(.csb-markdown hr) { border: none; border-top: 1px solid var(--border); margin: 10px 0; } +:deep(.csb-markdown table) { display: block; max-width: 100%; overflow-x: auto; border-collapse: collapse; margin: 8px 0; } +:deep(.csb-markdown th), :deep(.csb-markdown td) { border: 1px solid var(--border); padding: 3px 8px; text-align: left; } +:deep(.csb-markdown th) { background: var(--surface-3); font-weight: 600; } +:deep(.csb-markdown img) { max-width: 100%; height: auto; } .csb-cursor { display: inline-block; @@ -144,6 +167,7 @@ const hasPairedResult = computed(() => display: flex; flex-direction: column; gap: 4px; + min-width: 0; } .csb-tool-head { @@ -228,7 +252,9 @@ const hasPairedResult = computed(() => align-items: flex-start; padding: 8px 10px; border: 1px solid color-mix(in srgb, var(--warning) 40%, var(--border)); - background: color-mix(in srgb, var(--warning) 8%, transparent); + /* Mix into a solid surface (not transparent) so the backdrop is theme-correct + regardless of what sits behind it, and the label has reliable contrast. */ + background: color-mix(in srgb, var(--warning) 14%, var(--surface-2)); border-radius: 4px; } .csb-question-label { @@ -236,7 +262,9 @@ const hasPairedResult = computed(() => font-weight: 700; letter-spacing: 0.04em; text-transform: uppercase; - color: var(--warning); + /* --warning on a faint --warning tint failed contrast in some themes; pair the + warning hue with --text so the label stays legible. */ + color: color-mix(in srgb, var(--warning) 55%, var(--text)); } .csb-question-text { font-size: 12.5px; diff --git a/src/shell/components/ConversationTab.vue b/src/shell/components/ConversationTab.vue index 0b88ca2..42d8a1f 100644 --- a/src/shell/components/ConversationTab.vue +++ b/src/shell/components/ConversationTab.vue @@ -52,8 +52,7 @@ const detect = useAgentDetect() // routes to a different provider than the global active one. const effectivePermissionMode = computed(() => { const taskMode = (props.task as { permissionMode?: PermissionMode }).permissionMode - const persona = providerSettings.getPersonaForTaskType(props.task.type) - const activeProviderId = persona?.providerId ?? providerSettings.activeProvider.value + const activeProviderId = providerSettings.resolveProviderForTaskType(props.task.type).providerId const providerCfg = providerSettings.settings.value.providers[activeProviderId] const providerMode = (providerCfg as { permissionMode?: PermissionMode }).permissionMode return resolveEffectivePermissionMode( @@ -72,8 +71,7 @@ const PERMISSION_MODE_TOOLTIPS: Record = { } const permissionModeTooltip = computed(() => { if (effectivePermissionMode.value !== 'plan') return PERMISSION_MODE_TOOLTIPS[effectivePermissionMode.value] - const persona = providerSettings.getPersonaForTaskType(props.task.type) - const activeProviderId = persona?.providerId ?? providerSettings.activeProvider.value + const activeProviderId = providerSettings.resolveProviderForTaskType(props.task.type).providerId if (NATIVE_PLAN_PROVIDERS.has(activeProviderId)) return PERMISSION_MODE_TOOLTIPS.plan return 'Plan (best-effort) — read-only enforced via model directive only. This provider has no native CLI/sandbox plan mode.' }) @@ -210,15 +208,15 @@ const tokenSummary = computed(() => { // otherwise the header label lies when a persona overrides the provider // (e.g. user picked opencode-local for `style_update` while the global // active provider is still claude-local). -const taskPersona = computed(() => providerSettings.getPersonaForTaskType(props.task.type)) -const activeProvider = computed(() => - taskPersona.value?.providerId ?? providerSettings.activeProvider.value, +// Resolve the provider/model the seed run will actually use, so the header +// label never lies: built-in personas inherit the global active provider, +// custom personas / explicit per-persona pins keep their own (see +// resolveProviderForTaskType). +const routedProvider = computed(() => providerSettings.resolveProviderForTaskType(props.task.type)) +const activeProvider = computed(() => routedProvider.value.providerId) +const activeModel = computed(() => + routedProvider.value.model || providerSettings.settings.value.providers[activeProvider.value]?.model || '', ) -const activeModel = computed(() => { - const persona = taskPersona.value - if (persona && persona.model.length > 0) return persona.model - return providerSettings.settings.value.providers[activeProvider.value]?.model || '' -}) const localCliBlocked = computed(() => { const id = activeProvider.value diff --git a/src/shell/components/TaskDetailModal.vue b/src/shell/components/TaskDetailModal.vue index 31f454c..b7f5b65 100644 --- a/src/shell/components/TaskDetailModal.vue +++ b/src/shell/components/TaskDetailModal.vue @@ -94,8 +94,7 @@ const taskPermissionMode = computed(() => (props.task as { permissionMod // provider when one matches the task type so the dropdown shows what'll // actually fire on a Run-with-agent click. const inheritedPermissionMode = computed(() => { - const persona = providerSettings.getPersonaForTaskType(props.task.type) - const activeProviderId = persona?.providerId ?? providerSettings.activeProvider.value + const activeProviderId = providerSettings.resolveProviderForTaskType(props.task.type).providerId const providerCfg = providerSettings.settings.value.providers[activeProviderId] const providerMode = (providerCfg as { permissionMode?: PermissionMode }).permissionMode return providerMode ?? providerSettings.settings.value.permissionMode diff --git a/src/shell/composables/__tests__/useEmbeddedAgent.test.ts b/src/shell/composables/__tests__/useEmbeddedAgent.test.ts index 542ebd8..5a1f601 100644 --- a/src/shell/composables/__tests__/useEmbeddedAgent.test.ts +++ b/src/shell/composables/__tests__/useEmbeddedAgent.test.ts @@ -102,9 +102,15 @@ function makeStubThread(initial: ThreadMessage[] = []): UseTaskThread & { beforeEach(() => { resetProviderSettingsForTests() - // Default to OpenRouter so makeProvider doesn't bail on missing creds. + // Default to a local CLI: seed (apply) runs now route through + // resolveProviderForTaskType, and a built-in persona with no explicit pin + // inherits the global active provider. The seed-lifecycle tests below use + // no-`type` tasks (→ the `general` built-in), so the active provider must be + // a local CLI or the "local CLIs only" apply gate fires before they can run. + // (makeProvider is mocked, so claude-local needs no real CLI here.) const store = useProviderSettings() - store.setActiveProvider('openrouter') + store.setActiveProvider('claude-local') + // Keep an OpenRouter config populated so the redaction test can flip to it. store.setProviderConfig({ id: 'openrouter', apiKey: 'sk-or-test', @@ -172,6 +178,9 @@ describe('useEmbeddedAgent — surfaces', () => { describe('useEmbeddedAgent — redaction wiring', () => { it('scrubs secrets from outgoing history before the provider sees them', async () => { + // Redaction matters most on an HTTP provider (the prompt leaves the machine), + // so exercise it on openrouter specifically (the suite default is a local CLI). + useProviderSettings().setActiveProvider('openrouter') const secret = `sk-ant-${'A'.repeat(40)}` const thread = makeStubThread([ { id: 'u0', ts: 1, role: 'user', content: `my key is ${secret}` }, @@ -255,6 +264,28 @@ describe('useEmbeddedAgent — seed run lifecycle', () => { expect(updateTaskStatusMock).not.toHaveBeenCalled() }) + it('inlines task grounding as a SEPARATE markdown block (no lazy continuation into the blockquote)', async () => { + tasksRef.value = [{ + id: 'task-test', status: 'pending', type: 'style_update', + description: 'Make the button blue.', file: 'src/App.vue', line: 42, + }] + const thread = makeStubThread() + const agent = useEmbeddedAgent(thread) + await agent.send('Make the button blue.', { isSeed: true }) + + const seed = thread.appended.find((m) => m.role === 'user') + expect(seed).toBeDefined() + const content = seed!.content + // The description rides as a blockquote (human-facing in the Conversation tab). + expect(content).toContain('> Make the button blue.') + // The grounding heading MUST be preceded by a blank line so CommonMark + // renders it as its own block instead of absorbing it into the blockquote. + expect(content).toMatch(/\n\n\*\*Task grounding\*\*/) + // file/line are inlined so the agent doesn't reflexively annotask_get_task. + expect(content).toContain('src/App.vue') + expect(content).toContain(':42') + }) + it('uses the agent\'s last text block as the resolution', async () => { tasksRef.value = [{ id: 'task-test', status: 'in_progress', description: 'x' }] // Mid-run, replace the mock so the end-of-run sees status: in_progress. diff --git a/src/shell/composables/useAutoRunDriver.ts b/src/shell/composables/useAutoRunDriver.ts index a00b65c..0ee223c 100644 --- a/src/shell/composables/useAutoRunDriver.ts +++ b/src/shell/composables/useAutoRunDriver.ts @@ -47,7 +47,15 @@ export function startAutoRunDriver(): void { } async function drain(taskSystem: ReturnType): Promise { - if (runningId) return // already busy; we'll re-enter when this turn finishes + if (runningId) { + // Already busy; we'll re-enter when this turn finishes. Warn (don't stay + // silent) so a stuck runningId — the failure mode where every later auto + // task sits unstarted until the user opens its Conversation tab — is + // diagnosable from the console instead of looking like nothing happened. + // eslint-disable-next-line no-console + console.warn(`[annotask:autorun] drain skipped: a run is already in flight (runningId=${runningId})`) + return + } // Skip the entire drain when the user is in skill/MCP mode. Tasks may // still enqueue (older code paths, tests), but the driver should not @@ -137,7 +145,36 @@ async function runHeadless(taskId: string, prompt: string): Promise { cancelHooks.set(taskId, () => agent.abort()) // Seed run — agent treats this prompt as its objective. On clean // completion the composable flips the task to `review`. - await agent.send(prompt, { isSeed: true }) + // + // Hard ceiling: a `send()` that never settles (a provider that hangs with + // the run watchdog disabled, or an abort that fails to break the stream) + // would pin the module-global `runningId` forever and wedge the FIFO — every + // later auto task then sits unstarted until the user opens its Conversation + // tab. Race the send against an absolute ceiling sized generously ABOVE the + // user's configured run watchdog so it never pre-empts a legitimate long + // run; on expiry abort the agent and reject so `drain`'s finally releases + // `runningId`. + const s = useProviderSettings().settings.value + const watchdogMs = Math.max(s.maxRunDurationMs, s.idleTimeoutMs, 0) + const ceilingMs = watchdogMs > 0 ? watchdogMs + 60_000 : 20 * 60_000 + let timer: ReturnType | null = null + const sendPromise = agent.send(prompt, { isSeed: true }) + // Swallow a late rejection so aborting via the ceiling doesn't surface as an + // unhandled rejection after the race already settled. + void sendPromise.catch(() => {}) + try { + await Promise.race([ + sendPromise, + new Promise((_, reject) => { + timer = setTimeout(() => { + agent.abort() + reject(new Error(`headless run exceeded the ${Math.round(ceilingMs / 60_000)}-minute driver ceiling`)) + }, ceilingMs) + }), + ]) + } finally { + if (timer) clearTimeout(timer) + } } finally { cancelHooks.delete(taskId) thread.close() diff --git a/src/shell/composables/useEmbeddedAgent.ts b/src/shell/composables/useEmbeddedAgent.ts index 0edf025..3f84929 100644 --- a/src/shell/composables/useEmbeddedAgent.ts +++ b/src/shell/composables/useEmbeddedAgent.ts @@ -44,6 +44,7 @@ import type { ProviderMessage, ProviderEvent, ProviderContentBlock } from '../.. import type { PermissionMode } from '../../schema' import type { WorkStreamBlock } from '../../shared/work-stream' import { summarizeToolCall, summarizeToolResult } from '../utils/toolSummary' +import { buildTaskSummary } from '../../shared/task-summary' import { fetchMcpToolCatalog } from '../services/mcpClient' import type { ThreadMessage, UseTaskThread } from './useTaskThread' @@ -128,21 +129,23 @@ function redactMessages(messages: ProviderMessage[]): ProviderMessage[] { /** * Compose the seed prompt sent to the local CLI. * - * Two things go in: the task id (canonical handle) and the description - * (rendered as a blockquote). Everything else — file, line, component, theme, - * selected element, screenshot, interaction history, type-specific context — - * lives behind `annotask_get_task` and the agent fetches what it needs. + * The prompt inlines the COMPACT task — id, description blockquote, and a + * `**Task grounding**` block built from `buildTaskSummary` (file/line/component/ + * route + per-type context + a capped slice of the structured body the task IS: + * style changes / theme edits / wireframe placements). This eliminates the + * reflexive `annotask_get_task` round-trip every seed run used to start with. * - * Why this shape: - * - The id is the only field the agent strictly needs; MCP carries the rest. - * - The description is decorative for the agent (it'll get a richer copy via - * MCP) but **load-bearing for the human** reading the Conversation tab — - * without it, the chat thread is just opaque task IDs. - * - Also serves as a graceful-degrade fallback when MCP isn't configured: - * the agent still has the user's request as plain text. + * Only the HEAVY fields stay behind MCP — screenshot bytes + * (`annotask_get_screenshot`), rendered outerHTML (`annotask_get_rendered_html`), + * and interaction history (`annotask_get_interaction_history`). The grounding + * block mirrors `buildTaskSummary` (the CLI/HTTP/MCP single source of truth) so + * the inlined field set can't drift from what triage surfaces elsewhere. * - * The runner handles status transitions (lock-on-start, mark-review-on-clean- - * exit), so the agent does NOT need to call `annotask_update_task`. + * The description blockquote stays first — it's load-bearing for the human + * reading the Conversation tab, and a graceful-degrade fallback when MCP isn't + * configured. The runner handles status transitions (lock-on-start, mark- + * review-on-clean-exit), so the agent does NOT need to call + * `annotask_update_task`. */ interface SeedFeedbackEntry { questions?: Array<{ id?: string; text?: string }> @@ -177,8 +180,84 @@ function answeredFeedbackBlock(feedback: SeedFeedbackEntry[] | undefined): strin ].join('\n') } +/** Summary keys already surfaced elsewhere in the seed prompt, so the grounding + * block shouldn't repeat them. `feedback`/`resolution` ride the retry block; + * `attempts` is conveyed by the answered-feedback Q&A block; file/line/ + * component/route render on the dedicated `where:` line. */ +const GROUNDING_SKIP = new Set([ + 'id', 'type', 'status', 'description', 'feedback', 'blocked_reason', 'resolution', + 'attempts', 'file', 'line', 'component', 'route', +]) + +/** Cap on inlined structured-body entries (style changes / theme edits / + * wireframe placements) so a huge task can't blow the prompt — the rest stays + * one `annotask_get_task` away. */ +const GROUNDING_BODY_CAP = 12 + +/** + * Render the compact, agent-useful subset of the task inline so a seed run has + * file/line/component/context WITHOUT a reflexive `annotask_get_task`. Reuses + * `buildTaskSummary` (the CLI/HTTP/MCP single source of truth); the heavy + * fields (screenshot bytes, rendered outerHTML, interaction history) stay + * behind their dedicated MCP tools. Returns '' when there's nothing to ground. + */ +function groundingBlock(task: Record): string { + let summary: Record + try { summary = buildTaskSummary(task) } catch { return '' } + + const loc: string[] = [] + if (typeof summary.file === 'string' && summary.file) { + loc.push(`\`${summary.file}\`${typeof summary.line === 'number' ? `:${summary.line}` : ''}`) + } + if (typeof summary.component === 'string' && summary.component) loc.push(`component \`${summary.component}\``) + if (typeof summary.route === 'string' && summary.route) loc.push(`route \`${summary.route}\``) + + const extras: string[] = [] + for (const [k, v] of Object.entries(summary)) { + if (GROUNDING_SKIP.has(k) || v == null) continue + extras.push(`- ${k}: ${typeof v === 'object' ? JSON.stringify(v) : String(v)}`) + } + + // The structured arrays a task IS (changes/edits/placements). Capped so a + // large wireframe/theme task stays bounded; the tail points back to MCP. + let bodyBlock = '' + const ctx = (task.context && typeof task.context === 'object' && !Array.isArray(task.context)) + ? task.context as Record + : undefined + const wf = ctx && ctx.wireframe && typeof ctx.wireframe === 'object' && !Array.isArray(ctx.wireframe) + ? ctx.wireframe as Record + : undefined + let arr: unknown[] | null = null + if (ctx && Array.isArray(ctx.changes)) arr = ctx.changes + else if (ctx && Array.isArray(ctx.edits)) arr = ctx.edits + else if (wf && Array.isArray(wf.instances)) arr = wf.instances + if (arr && arr.length > 0) { + const head = arr.slice(0, GROUNDING_BODY_CAP) + const tail = arr.length > GROUNDING_BODY_CAP + ? `\n…+${arr.length - GROUNDING_BODY_CAP} more — \`annotask_get_task\` for the full set` + : '' + bodyBlock = '```json\n' + JSON.stringify(head) + '\n```' + tail + } + + const lines: string[] = [] + if (loc.length > 0) lines.push(`- where: ${loc.join(', ')}`) + lines.push(...extras) + if (lines.length === 0 && !bodyBlock) return '' + // Assemble heading / list / code-fence as DISTINCT markdown blocks separated + // by blank lines. A single-'\n' join (with the leading '' stripped by + // filter(Boolean)) let the heading lazily continue into the preceding + // blockquote once this string was spliced into the seed prompt — the caller + // also joins sections with a blank line for the same reason. + const parts: string[] = [ + '**Task grounding** (inlined — apply from this; fetch the full task only for the screenshot, rendered HTML, or interaction history):', + ] + if (lines.length > 0) parts.push(lines.join('\n')) + if (bodyBlock) parts.push(bodyBlock) + return parts.join('\n\n') +} + function buildSeedPrompt( - task: { id?: string; status?: string; feedback?: string; resolution?: string; agent_feedback?: SeedFeedbackEntry[] }, + task: { id?: string; status?: string; feedback?: string; resolution?: string; agent_feedback?: SeedFeedbackEntry[]; [k: string]: unknown }, description: string, ): string { const id = task.id ?? '' @@ -207,16 +286,25 @@ function buildSeedPrompt( 'Address the feedback specifically — do not repeat the previous approach unless the feedback explicitly asks for a small tweak on top of it.', ].join('\n') : '' + const grounding = groundingBlock(task) + const closing = [ + grounding + ? `The grounding above is the compact task. Only call \`annotask_get_task\` if you need the screenshot, rendered HTML, or interaction history — otherwise apply directly.` + : `Call \`annotask_get_task\` with this id if you need more than the description above.`, + `Annotask handles the status transitions for you — exit cleanly when done, or reply with a short explanation if you can't apply the change.`, + ].join('\n') + // Join only non-empty sections with a BLANK LINE so each (lead-in + + // description blockquote, grounding, retry, answered-feedback, closing) + // renders as its own markdown block. A single-'\n' join previously let the + // grounding heading and the closing lines lazily continue INTO the + // blockquote / where-list (CommonMark lazy continuation). return [ - `Apply Annotask task \`${id}\`:`, - '', - quoted, + `Apply Annotask task \`${id}\`:\n\n${quoted}`, + grounding, retryBlock, answeredFeedbackBlock(task.agent_feedback), - '', - `Use \`annotask_get_task\` with this id for full context if you need it.`, - `Annotask handles the status transitions for you — exit cleanly when done, or reply with a short explanation if you can't apply the change.`, - ].filter(Boolean).join('\n') + closing, + ].map((s) => s.trim()).filter(Boolean).join('\n\n') } /** @@ -245,31 +333,30 @@ async function fetchSystemPrompt(taskType: string | undefined): Promise /** * Task ids this tab currently holds an `in_progress` lock for via a live seed - * run. If the tab is closed mid-run, the run's normal finalization never gets - * to execute — so on `pagehide` we relinquish each lock back to `pending` with - * a keepalive PATCH. That reconciles the task *before* the server's - * orphan-finalizer has to guess, so a closed tab leaves a retryable `pending` - * task instead of a stranded one (and never the misleading "tab was likely - * closed" blocked state). Module-scoped so the single listener covers both the - * headless auto-run driver and any open Conversation tab. + * run. Used to WARN before an accidental reload/close (`beforeunload`). + * + * We deliberately do NOT revert the task to `pending` on unload anymore. The + * old `pagehide` keepalive-PATCH-to-pending raced the server and instantly + * abandoned whatever the CLI had already written — that WAS the "reload breaks + * the agent" bug. The server now keeps the CLI alive across a disconnect + * (spawn detach-grace) and finalizes the task on the child's own exit (clean + * exit → `review`, interrupted/failed → `pending`; see `scheduleOrphanFinalize` + * in server/index.ts), so the correct client behavior is to leave the lock + * alone and just discourage the accidental reload. Module-scoped so the single + * listener covers both the headless auto-run driver and any open tab. */ const lockedSeedTasks = new Set() -let pagehideArmed = false -function armPagehideFlush(): void { - if (pagehideArmed || typeof window === 'undefined') return - pagehideArmed = true - window.addEventListener('pagehide', () => { - for (const id of lockedSeedTasks) { - try { - // keepalive lets the request outlive the unloading document. - void fetch(`/__annotask/api/tasks/${encodeURIComponent(id)}`, { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ status: 'pending' }), - keepalive: true, - }).catch(() => { /* best-effort */ }) - } catch { /* ignore */ } - } +let unloadGuardArmed = false +function armUnloadGuard(): void { + if (unloadGuardArmed || typeof window === 'undefined') return + unloadGuardArmed = true + window.addEventListener('beforeunload', (e: BeforeUnloadEvent) => { + if (lockedSeedTasks.size === 0) return + // Standard "are you sure you want to leave?" prompt. The agent keeps + // running server-side either way, but the user loses the live work-stream + // view, so it's worth a confirm. + e.preventDefault() + e.returnValue = '' }) } @@ -393,10 +480,17 @@ export function useEmbeddedAgent(thread: UseTaskThread): UseEmbeddedAgent { const taskType = currentTask && typeof currentTask.type === 'string' ? currentTask.type : undefined const persona = isSeed ? providerSettings.getPersonaForTaskType(taskType) : null - const activeProviderId = persona?.providerId ?? providerSettings.activeProvider.value + // Provider routing. Seed (apply) runs resolve through the persona/active- + // provider resolver so the global Settings → Providers picker actually + // drives which CLI applies the task: built-in personas inherit the active + // provider unless the user explicitly pinned one in Settings → Agents; + // custom personas keep their own. Free-form chat (isSeed=false) never routes + // through a persona — it always uses the active provider + its model/effort. + const routed = isSeed ? providerSettings.resolveProviderForTaskType(taskType) : null + const activeProviderId = routed ? routed.providerId : providerSettings.activeProvider.value const activeCfg = providerSettings.settings.value.providers[activeProviderId] - const activeModel = (persona && persona.model.length > 0 ? persona.model : activeCfg.model) ?? '' - const activeEffort = persona && persona.effort !== 'auto' ? persona.effort : activeCfg.effort + const activeModel = (routed ? routed.model : activeCfg.model) ?? '' + const activeEffort = routed ? routed.effort : activeCfg.effort // Phase 3 v1: only local CLIs can apply tasks (they have native file-edit // tools and the spawn `cwd` lands them in the project root). HTTP-only @@ -404,7 +498,7 @@ export function useEmbeddedAgent(thread: UseTaskThread): UseEmbeddedAgent { // the user how to fix it. if (isSeed && !isLocalCliProvider(activeProviderId)) { status.value = 'error' - errorMessage.value = `${activeProviderId} can't apply tasks yet — pick a persona that uses a local CLI (claude-local, codex-local, opencode-local, copilot-local) in Settings → Agents.` + errorMessage.value = `${activeProviderId} can't apply tasks yet — choose a local CLI (claude-local, codex-local, opencode-local, copilot-local) as your provider in Settings → Providers, or pin one for this task type in Settings → Agents.` if (taskId) markRunFinished(taskId) return } @@ -461,11 +555,12 @@ export function useEmbeddedAgent(thread: UseTaskThread): UseEmbeddedAgent { let provider: import('../../embedded/provider.js').LLMProvider try { - // If a persona is active and selects a different provider than the - // global active one, fork the settings so makeProvider keys off the - // persona's choice without mutating the user's persisted activeProvider. - const settingsForCall = persona && persona.providerId !== providerSettings.settings.value.activeProvider - ? { ...providerSettings.settings.value, activeProvider: persona.providerId } + // Fork the settings so makeProvider keys off the RESOLVED provider + // (routed above through resolveProviderForTaskType) without mutating the + // user's persisted activeProvider. This is the seam that actually spawns + // the right CLI — editing only `activeProviderId` above isn't enough. + const settingsForCall = activeProviderId !== providerSettings.settings.value.activeProvider + ? { ...providerSettings.settings.value, activeProvider: activeProviderId } : providerSettings.settings.value provider = makeProvider(settingsForCall, { referer: typeof window !== 'undefined' ? window.location.origin : undefined, @@ -786,9 +881,10 @@ export function useEmbeddedAgent(thread: UseTaskThread): UseEmbeddedAgent { if (!current) return if (current.status !== 'pending' && current.status !== 'denied') return await taskSystem.updateTaskStatus(taskId, 'in_progress') - // Track the held lock so a pagehide (tab close) can relinquish it. + // Track the held lock so beforeunload can warn before an accidental + // reload/close (the server keeps the run alive + finalizes it). lockedSeedTasks.add(taskId) - armPagehideFlush() + armUnloadGuard() } catch (err) { // Surface in the conversation error strip, don't abort the run — the // agent should still try to do useful work even if the lock fails. diff --git a/src/shell/composables/useProviderSettings.ts b/src/shell/composables/useProviderSettings.ts index b67275e..2bc6d93 100644 --- a/src/shell/composables/useProviderSettings.ts +++ b/src/shell/composables/useProviderSettings.ts @@ -202,6 +202,37 @@ function create( return resolvePersonaForTaskType(personas.value, taskType, settings.value.personaOverrides ?? {}) } + /** + * Resolve the provider + model + effort a SEED (apply) run for the given task + * type should use. Policy: + * - A built-in persona with NO explicit per-persona pin inherits the global + * active provider (the Settings → Providers picker) and its model/effort. + * This is what makes the picker actually drive apply runs instead of every + * built-in silently using its hardcoded `claude-local`. + * - An explicit per-persona pin (Settings → Agents, persisted to + * `.annotask/agents.json`) wins. `combinePersonas` has already baked the + * override into the persona's providerId/model/effort, so we read those. + * - A custom persona keeps its own providerId/model/effort (authored + * deliberately). + * Free-form chat does NOT call this — it always uses the active provider. + */ + function resolveProviderForTaskType( + taskType: string | undefined, + ): { providerId: ProviderId; model: string; effort: EffortLevel } { + const inheritGlobal = () => { + const id = settings.value.activeProvider + const cfg = settings.value.providers[id] + return { providerId: id, model: cfg.model ?? '', effort: cfg.effort } + } + const persona = getPersonaForTaskType(taskType) + if (!persona) return inheritGlobal() + const explicitPin = Boolean(agentOverrides()[persona.id]?.providerId) + if (explicitPin || persona.isCustom) { + return { providerId: persona.providerId, model: persona.model, effort: persona.effort } + } + return inheritGlobal() + } + /** Insert or replace a custom persona by id. Built-ins are never written * to `customPersonas`; instead, editing a built-in clones it as * `custom:` so the original stays available. */ @@ -258,6 +289,7 @@ function create( setMaxRunDurationMs, personas, getPersonaForTaskType, + resolveProviderForTaskType, upsertPersona, deletePersona, setPersonaOverride, diff --git a/src/shell/utils/safeMd.ts b/src/shell/utils/safeMd.ts index cf45d73..b6d1847 100644 --- a/src/shell/utils/safeMd.ts +++ b/src/shell/utils/safeMd.ts @@ -1,6 +1,16 @@ import { marked } from 'marked' import DOMPurify from 'dompurify' +// Links in rendered markdown (agent output, task descriptions, reports) should +// open in a new tab and never reverse-tabnab the shell. Registered once at +// module load — DOMPurify hooks are global, so adding it per-call would stack. +DOMPurify.addHook('afterSanitizeAttributes', (node) => { + if (node.tagName === 'A' && node.getAttribute('href')) { + node.setAttribute('target', '_blank') + node.setAttribute('rel', 'noopener noreferrer') + } +}) + /** Parse markdown and sanitize output HTML. Use instead of raw marked.parse() + v-html. */ export function safeMd(text: string): string { if (!text) return ''