diff --git a/.changeset/anthropic-server-tool-roundtrip.md b/.changeset/anthropic-server-tool-roundtrip.md new file mode 100644 index 000000000..4031e66c1 --- /dev/null +++ b/.changeset/anthropic-server-tool-roundtrip.md @@ -0,0 +1,18 @@ +--- +'@tanstack/ai-anthropic': patch +'@tanstack/ai': patch +--- + +Preserve Anthropic server-tool results (`web_search` / `web_fetch`) across turns. + +Previously the Anthropic adapter dropped `server_tool_use` and +`web_search_tool_result` / `web_fetch_tool_result` blocks while streaming, so the +evidence never round-tripped — a follow-up turn could no longer see the prior +web-search sources (issue #839). These now stream as a **provider-executed** +tool call carrying the raw result, which the agent loop skips (never executed +client-side) and the adapter replays verbatim into the next request. Adds the +`ProviderExecutedToolMetadata` convention plus `isProviderExecutedToolCall` / +`getProviderExecutedMetadata` helpers to `@tanstack/ai`. + +(No e2e: aimock cannot synthesize `server_tool_use` blocks; covered by unit +tests and verified live against the Anthropic API.) diff --git a/docs/config.json b/docs/config.json index 217345c6a..757a84fce 100644 --- a/docs/config.json +++ b/docs/config.json @@ -77,7 +77,8 @@ { "label": "Provider Tools", "to": "tools/provider-tools", - "addedAt": "2026-04-21" + "addedAt": "2026-04-21", + "updatedAt": "2026-06-26" }, { "label": "Provider Skills", diff --git a/docs/tools/provider-tools.md b/docs/tools/provider-tools.md index 5c3d04f57..19915290e 100644 --- a/docs/tools/provider-tools.md +++ b/docs/tools/provider-tools.md @@ -45,6 +45,47 @@ const stream = chat({ }) ``` +## Multi-turn persistence + +Provider tools run on the provider's own infrastructure, so their results +(e.g. Anthropic `web_search` sources, `web_fetch` page contents) come back +embedded in the assistant turn rather than as a separate tool message. TanStack +AI preserves those results on the assistant message, so when you feed the prior +conversation back into the next `chat()` call the model still sees the earlier +evidence — no special handling required: + +```typescript +import { chat, StreamProcessor } from '@tanstack/ai' +import { anthropicText } from '@tanstack/ai-anthropic' +import { webSearchTool } from '@tanstack/ai-anthropic/tools' + +const adapter = anthropicText('claude-opus-4-6') +const tools = [webSearchTool({ name: 'web_search', type: 'web_search_20250305' })] + +const processor = new StreamProcessor() +for await (const chunk of chat({ + adapter, + tools, + messages: [{ role: 'user', content: 'Find two sources on the drone market.' }], +})) { + processor.processChunk(chunk) +} +processor.finalizeStream() + +// The follow-up turn can still cite the previous search results. +const followUp = chat({ + adapter, + tools, + messages: [ + ...processor.getMessages(), + { role: 'user', content: 'List the exact sources you used.' }, + ], +}) +``` + +The search/fetch call surfaces as a provider-executed `tool-call` part on the +assistant message; the agent loop never tries to run it client-side. + ## Type-level guard Every provider-specific tool factory (e.g. `webSearchTool`, `computerUseTool`) diff --git a/packages/ai-anthropic/src/adapters/text.ts b/packages/ai-anthropic/src/adapters/text.ts index bdffea41c..64ae12484 100644 --- a/packages/ai-anthropic/src/adapters/text.ts +++ b/packages/ai-anthropic/src/adapters/text.ts @@ -28,11 +28,14 @@ import type { ContentBlockParam, DocumentBlockParam, ImageBlockParam, + ServerToolUseBlockParam, TextBlockParam, ThinkingBlockParam, ToolUseBlockParam, URLImageSource, URLPDFSource, + WebFetchToolResultBlockParam, + WebSearchToolResultBlockParam, } from '@anthropic-ai/sdk/resources/messages' import type Anthropic_SDK from '@anthropic-ai/sdk' import type { AnthropicBeta } from '@anthropic-ai/sdk/resources/beta/beta' @@ -57,6 +60,83 @@ import type { } from '../message-types' import type { AnthropicClientConfig } from '../utils' +/** + * The block type carried by an Anthropic provider-executed (server) tool's + * stored result. Mirrors the `*_tool_result` block emitted by the streaming + * API so it can be replayed verbatim into a later turn. + */ +type AnthropicServerToolResultBlockType = + | 'web_search_tool_result' + | 'web_fetch_tool_result' + +/** + * Anthropic payload stashed on a provider-executed tool call's `metadata` + * (under the `anthropic` key, alongside `providerExecuted: true`). Holds enough + * to reconstruct the original `server_tool_use` + `*_tool_result` blocks so the + * model still sees prior `web_search` / `web_fetch` evidence on the next turn. + */ +interface AnthropicServerToolMetadata { + serverToolType: ServerToolUseBlockParam['name'] + resultBlockType: AnthropicServerToolResultBlockType + /** Raw result block content, preserved verbatim from the stream. */ + result: unknown +} + +/** + * Narrow an opaque tool-call `metadata` to {@link AnthropicServerToolMetadata} + * when it follows the provider-executed convention, else `null`. + */ +function readAnthropicServerToolMetadata( + metadata: unknown, +): AnthropicServerToolMetadata | null { + if (typeof metadata !== 'object' || metadata === null) return null + const outer = metadata as { providerExecuted?: unknown; anthropic?: unknown } + if (outer.providerExecuted !== true) return null + const inner = outer.anthropic + if (typeof inner !== 'object' || inner === null) return null + const { serverToolType, resultBlockType, result } = inner as { + serverToolType?: unknown + resultBlockType?: unknown + result?: unknown + } + if ( + typeof serverToolType !== 'string' || + (resultBlockType !== 'web_search_tool_result' && + resultBlockType !== 'web_fetch_tool_result') + ) { + return null + } + return { + // Validated as a string above; widen back to the SDK's tool-name union. + serverToolType: serverToolType as ServerToolUseBlockParam['name'], + resultBlockType, + result, + } +} + +/** + * Reconstruct the `*_tool_result` block param from stored server-tool metadata. + * The `result` content is opaque round-trip data, asserted to the SDK's param + * content type at this single boundary. + */ +function buildServerToolResultBlock( + toolUseId: string, + meta: AnthropicServerToolMetadata, +): WebSearchToolResultBlockParam | WebFetchToolResultBlockParam { + if (meta.resultBlockType === 'web_search_tool_result') { + return { + type: 'web_search_tool_result', + tool_use_id: toolUseId, + content: meta.result as WebSearchToolResultBlockParam['content'], + } + } + return { + type: 'web_fetch_tool_result', + tool_use_id: toolUseId, + content: meta.result as WebFetchToolResultBlockParam['content'], + } +} + /** * Computes the `betas` array for a Messages request. Unions: * - `interleaved-thinking-2025-05-14` when interleaved thinking is enabled, @@ -636,6 +716,25 @@ export class AnthropicTextAdapter< parsedInput = toolCall.function.arguments } + // Provider-executed server tools (e.g. web_search) replay as the + // original `server_tool_use` + result blocks so the model still sees + // the prior evidence. Their result was captured verbatim during + // streaming (see processAnthropicStream). + const serverMeta = readAnthropicServerToolMetadata(toolCall.metadata) + if (serverMeta) { + const serverToolUseBlock: ServerToolUseBlockParam = { + type: 'server_tool_use', + id: toolCall.id, + name: serverMeta.serverToolType, + input: parsedInput, + } + contentBlocks.push(serverToolUseBlock) + contentBlocks.push( + buildServerToolResultBlock(toolCall.id, serverMeta), + ) + continue + } + const toolUseBlock: ToolUseBlockParam = { type: 'tool_use', id: toolCall.id, @@ -806,6 +905,14 @@ export class AnthropicTextAdapter< // input. let currentServerTool: { id: string; name: string; input: string } | null = null + // Completed server tools awaiting their matching result block. Anthropic + // emits `server_tool_use` then a separate `*_tool_result` block; we hold + // the call here (keyed by id) until the result arrives so we can emit a + // single provider-executed tool call carrying the raw result for round-trip. + const completedServerTools = new Map< + string, + { id: string; name: string; input: string } + >() // AG-UI lifecycle tracking const runId = options.runId ?? genId() @@ -881,6 +988,61 @@ export class AnthropicTextAdapter< }, ) } + + // Emit the server tool as a single provider-executed tool call, + // carrying its raw result so the evidence (e.g. web_search sources) + // round-trips into the next turn's request. The agent loop skips + // provider-executed calls, so this never triggers client execution. + const serverTool = completedServerTools.get( + event.content_block.tool_use_id, + ) + if (serverTool) { + completedServerTools.delete(serverTool.id) + + let parsedInput: unknown = {} + try { + const parsed = serverTool.input + ? JSON.parse(serverTool.input) + : {} + parsedInput = parsed && typeof parsed === 'object' ? parsed : {} + } catch { + parsedInput = {} + } + + const serverToolMetadata = { + providerExecuted: true, + anthropic: { + serverToolType: serverTool.name, + resultBlockType: event.content_block.type, + result: content, + }, + } + + currentToolIndex++ + yield { + type: EventType.TOOL_CALL_START, + toolCallId: serverTool.id, + toolCallName: serverTool.name, + toolName: serverTool.name, + parentMessageId: messageId, + model, + timestamp: Date.now(), + index: currentToolIndex, + metadata: serverToolMetadata, + } + yield { + type: EventType.TOOL_CALL_END, + toolCallId: serverTool.id, + toolCallName: serverTool.name, + toolName: serverTool.name, + model, + timestamp: Date.now(), + input: parsedInput, + } + + // Text after the server tool starts a fresh message segment. + hasEmittedTextMessageStart = false + } } else if (event.content_block.type === 'thinking') { accumulatedThinking = '' accumulatedSignature = '' @@ -1095,6 +1257,9 @@ export class AnthropicTextAdapter< input: currentServerTool.input, }, ) + // Hold the call until its result block arrives so we can emit + // both together as one provider-executed tool call. + completedServerTools.set(currentServerTool.id, currentServerTool) } currentServerTool = null } else if ( diff --git a/packages/ai-anthropic/tests/anthropic-adapter.test.ts b/packages/ai-anthropic/tests/anthropic-adapter.test.ts index 611c38a33..6581b8da1 100644 --- a/packages/ai-anthropic/tests/anthropic-adapter.test.ts +++ b/packages/ai-anthropic/tests/anthropic-adapter.test.ts @@ -1,5 +1,11 @@ import { describe, it, expect, beforeEach, vi } from 'vitest' -import { chat, type Tool, type StreamChunk } from '@tanstack/ai' +import { + chat, + StreamProcessor, + type Tool, + type StreamChunk, + type UIMessage, +} from '@tanstack/ai' import { AnthropicTextAdapter } from '../src/adapters/text' import type { AnthropicTextProviderOptions } from '../src/adapters/text' import { z } from 'zod' @@ -1144,20 +1150,41 @@ describe('Anthropic stream processing', () => { chunks.push(chunk) } - const toolEnds = chunks.filter((c) => c.type === 'TOOL_CALL_END') - expect(toolEnds).toHaveLength(1) - expect(toolEnds[0]).toMatchObject({ + // The client tool's input must NOT absorb the server tool's deltas. + const clientEnd = chunks.find( + (c) => + c.type === 'TOOL_CALL_END' && + (c as { toolCallId: string }).toolCallId === 'tool_client', + ) + expect(clientEnd).toMatchObject({ toolCallId: 'tool_client', input: { location: 'Berlin' }, }) - expect( - chunks.some( - (c) => - c.type === 'TOOL_CALL_START' && - (c as { toolCallId: string }).toolCallId === 'srv_fetch', - ), - ).toBe(false) + // The server tool now round-trips as a provider-executed tool call carrying + // its own input plus the raw result block. + const serverStart = chunks.find( + (c) => + c.type === 'TOOL_CALL_START' && + (c as { toolCallId: string }).toolCallId === 'srv_fetch', + ) as (StreamChunk & { metadata?: Record }) | undefined + expect(serverStart).toBeDefined() + expect(serverStart!.metadata).toMatchObject({ + providerExecuted: true, + anthropic: { + serverToolType: 'web_fetch', + resultBlockType: 'web_fetch_tool_result', + }, + }) + const serverEnd = chunks.find( + (c) => + c.type === 'TOOL_CALL_END' && + (c as { toolCallId: string }).toolCallId === 'srv_fetch', + ) + expect(serverEnd).toMatchObject({ + toolCallId: 'srv_fetch', + input: { url: 'https://example.com' }, + }) }) it.each([ @@ -1185,10 +1212,11 @@ describe('Anthropic stream processing', () => { ], ], ] as const)( - 'cleanly handles a server-only %s response with no prior client tool_use', + 'emits a provider-executed tool call for a server-only %s response with no prior client tool_use', async (toolName, resultType, resultContent) => { - // With no prior client tool_use, currentToolIndex is -1; server-tool - // deltas must not crash or create phantom client tool calls. + // With no prior client tool_use, currentToolIndex is -1; the server tool + // must emit its own provider-executed call without crashing or colliding + // with a phantom client tool call. const mockStream = (async function* () { yield { type: 'content_block_start', @@ -1238,14 +1266,182 @@ describe('Anthropic stream processing', () => { chunks.push(chunk) } - expect(chunks.some((c) => c.type === 'TOOL_CALL_START')).toBe(false) - expect(chunks.some((c) => c.type === 'TOOL_CALL_END')).toBe(false) + const start = chunks.find((c) => c.type === 'TOOL_CALL_START') as + | (StreamChunk & { metadata?: Record }) + | undefined + expect(start).toMatchObject({ + toolCallId: 'srv_only', + toolCallName: toolName, + }) + expect(start!.metadata).toMatchObject({ + providerExecuted: true, + anthropic: { + serverToolType: toolName, + resultBlockType: resultType, + result: resultContent, + }, + }) + + const end = chunks.find((c) => c.type === 'TOOL_CALL_END') + expect(end).toMatchObject({ + toolCallId: 'srv_only', + input: { url: 'https://example.com' }, + }) const runFinished = chunks.filter((c) => c.type === 'RUN_FINISHED') expect(runFinished).toHaveLength(1) }, ) + it('round-trips web_search evidence across turns (issue #839)', async () => { + // Turn 1: thinking + server_tool_use(web_search) + result + final text. + const searchResults = [ + { + type: 'web_search_result', + encrypted_content: 'enc-1', + page_age: null, + title: 'Defense Drone Market', + url: 'https://example.com/drones', + }, + ] + const turn1 = (async function* () { + yield { + type: 'content_block_start', + index: 0, + content_block: { type: 'thinking', thinking: '' }, + } + yield { + type: 'content_block_delta', + index: 0, + delta: { type: 'thinking_delta', thinking: 'I should search.' }, + } + yield { + type: 'content_block_delta', + index: 0, + delta: { type: 'signature_delta', signature: 'sig-abc' }, + } + yield { type: 'content_block_stop', index: 0 } + yield { + type: 'content_block_start', + index: 1, + content_block: { + type: 'server_tool_use', + id: 'srv_search', + name: 'web_search', + }, + } + yield { + type: 'content_block_delta', + index: 1, + delta: { + type: 'input_json_delta', + partial_json: '{"query":"defense drone market"}', + }, + } + yield { type: 'content_block_stop', index: 1 } + yield { + type: 'content_block_start', + index: 2, + content_block: { + type: 'web_search_tool_result', + tool_use_id: 'srv_search', + content: searchResults, + }, + } + yield { type: 'content_block_stop', index: 2 } + yield { + type: 'content_block_start', + index: 3, + content_block: { type: 'text', text: '' }, + } + yield { + type: 'content_block_delta', + index: 3, + delta: { type: 'text_delta', text: 'Found one source.' }, + } + yield { type: 'content_block_stop', index: 3 } + yield { + type: 'message_delta', + delta: { stop_reason: 'end_turn' }, + usage: { output_tokens: 20 }, + } + yield { type: 'message_stop' } + })() + + mocks.betaMessagesCreate.mockResolvedValueOnce(turn1) + + const adapter = createAdapter('claude-3-7-sonnet') + + const firstMessages = [ + { role: 'user' as const, content: 'Search the drone market.' }, + ] + // Seed the processor with the same (ModelMessage-shaped) messages passed to + // chat() — exactly the pattern from issue #839. The processor must tolerate + // these parts-less entries. + const processor = new StreamProcessor({ + initialMessages: firstMessages as unknown as Array, + }) + for await (const chunk of chat({ adapter, messages: firstMessages })) { + processor.processChunk(chunk) + } + processor.finalizeStream() + + const afterTurn1 = processor.getMessages() + const assistant = afterTurn1.find((m) => m.role === 'assistant') + // The assistant message carries the server tool as a provider-executed + // tool-call part with the raw result on its metadata. + const serverPart = assistant?.parts.find( + (p) => p.type === 'tool-call' && p.id === 'srv_search', + ) + expect(serverPart).toBeDefined() + expect((serverPart as { metadata?: unknown }).metadata).toMatchObject({ + providerExecuted: true, + anthropic: { + serverToolType: 'web_search', + resultBlockType: 'web_search_tool_result', + result: searchResults, + }, + }) + + // Turn 2: replay prior messages + a new user turn. Assert the request the + // adapter sends preserves the server_tool_use + result blocks. + mocks.betaMessagesCreate.mockResolvedValueOnce(createTextStream('Sources:')) + + for await (const _ of chat({ + adapter, + messages: [ + ...afterTurn1, + { role: 'user', content: 'List the sources you used.' }, + ], + })) { + // consume + } + + expect(mocks.betaMessagesCreate).toHaveBeenCalledTimes(2) + const [secondPayload] = mocks.betaMessagesCreate.mock.calls[1]! + const replayedAssistant = ( + secondPayload.messages as Array<{ + role: string + content: unknown + }> + ).find((m) => m.role === 'assistant') + expect(Array.isArray(replayedAssistant?.content)).toBe(true) + const blocks = replayedAssistant!.content as Array<{ type: string }> + const serverToolUse = blocks.find((b) => b.type === 'server_tool_use') + const resultBlock = blocks.find((b) => b.type === 'web_search_tool_result') + expect(serverToolUse).toMatchObject({ + type: 'server_tool_use', + id: 'srv_search', + name: 'web_search', + input: { query: 'defense drone market' }, + }) + expect(resultBlock).toMatchObject({ + type: 'web_search_tool_result', + tool_use_id: 'srv_search', + content: searchResults, + }) + }) + it('logs an error when a server tool result block carries an error variant', async () => { // A failed web_fetch (e.g. url_not_accessible) is otherwise invisible — // the model just keeps going. Surface it via the debug logger. diff --git a/packages/ai/src/activities/chat/index.ts b/packages/ai/src/activities/chat/index.ts index 9ddb55e6f..e6e508839 100644 --- a/packages/ai/src/activities/chat/index.ts +++ b/packages/ai/src/activities/chat/index.ts @@ -12,6 +12,7 @@ import { streamToText } from '../../stream-to-response.js' import { resolveDebugOption } from '../../logger/resolve' import { EventType } from '../../types' import { normalizeToolResult } from '../../utilities/tool-result' +import { isProviderExecutedToolCall } from '../../utilities/provider-executed' import { LazyToolManager } from './tools/lazy-tool-manager' import { MiddlewareAbortError, @@ -1856,6 +1857,13 @@ class TextEngine< for (const message of this.messages) { if (message.role === 'assistant' && message.toolCalls) { for (const toolCall of message.toolCalls) { + // Provider-executed tool calls (e.g. Anthropic `web_search`) were + // already run by the provider; they carry no client result, so they + // would otherwise look "pending" forever and the loop would try (and + // fail) to execute them client-side. Skip them. + if (isProviderExecutedToolCall(toolCall)) { + continue + } if (!completedToolIds.has(toolCall.id)) { pending.push(toolCall) } diff --git a/packages/ai/src/activities/chat/stream/processor.ts b/packages/ai/src/activities/chat/stream/processor.ts index f2c41fb62..b5ac8fe44 100644 --- a/packages/ai/src/activities/chat/stream/processor.ts +++ b/packages/ai/src/activities/chat/stream/processor.ts @@ -23,6 +23,7 @@ import { uiMessageToModelMessages, } from '../messages.js' import { normalizeToolResult } from '../../../utilities/tool-result' +import { isProviderExecutedToolCall } from '../../../utilities/provider-executed' import { defaultJSONParser } from './json-parser' import { appendStructuredOutputDelta, @@ -401,12 +402,15 @@ export class StreamProcessor { // 1. It was approved/denied (approval-responded state) // 2. It has an output field set (client tool completed via addToolResult) // 3. It has a corresponding tool-result part (server tool completed) + // 4. It is provider-executed (e.g. Anthropic web_search) — already run by + // the provider, so there is no client result to wait for. return toolParts.every( (part) => part.state === 'complete' || part.state === 'approval-responded' || (part.output !== undefined && !part.approval) || - toolResultIds.has(part.id), + toolResultIds.has(part.id) || + isProviderExecutedToolCall(part), ) } @@ -1739,8 +1743,12 @@ export class StreamProcessor { * downgrading a failed call back to 'input-complete'. */ private isToolCallPartErrored(toolCallId: string): boolean { + // `initialMessages` may be ModelMessage-shaped (no `parts`) — e.g. the + // common pattern of seeding a processor with the same messages passed to + // `chat()`. Guard the access so iterating them never throws. return this.messages.some((msg) => - msg.parts.some( + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- `parts` is typed as required, but seeded ModelMessage-shaped messages can lack it at runtime. + msg.parts?.some( (part) => part.type === 'tool-call' && part.id === toolCallId && diff --git a/packages/ai/src/index.ts b/packages/ai/src/index.ts index 4a38e2e99..a534e5c91 100644 --- a/packages/ai/src/index.ts +++ b/packages/ai/src/index.ts @@ -252,6 +252,11 @@ export { normalizeToolResult, } from './utilities/tool-result' +export { + getProviderExecutedMetadata, + isProviderExecutedToolCall, +} from './utilities/provider-executed' + // Adapter extension utilities export { createModel, extendAdapter } from './extend-adapter' export type { ExtendedModelDef, ModelCapabilities } from './extend-adapter' diff --git a/packages/ai/src/types.ts b/packages/ai/src/types.ts index 3a7fe0807..6ad879109 100644 --- a/packages/ai/src/types.ts +++ b/packages/ai/src/types.ts @@ -159,6 +159,26 @@ export interface ToolCall { metadata?: TMetadata } +/** + * Convention for tool-call `metadata` that marks a call as **provider-executed** + * — run by the provider's own infrastructure (e.g. Anthropic `web_search` / + * `web_fetch` server tools) rather than by the agent loop. Adapters set + * `providerExecuted: true` so that: + * + * 1. The agent loop never tries to execute the call client-side (see + * {@link isProviderExecutedToolCall} usage in the chat engine), and + * 2. The adapter can stash the raw provider result alongside it so the call — + * and its evidence — round-trips into the next turn's request. + * + * Provider-specific payloads live under a namespaced key (e.g. `anthropic`), + * keeping this convention opaque to the framework core. The index signature + * preserves those per-adapter fields. + */ +export interface ProviderExecutedToolMetadata { + providerExecuted?: boolean + [key: string]: unknown +} + // ============================================================================ // Multimodal Content Types // ============================================================================ @@ -361,7 +381,9 @@ export interface ToolCallPart { /** Tool execution output (for client tools or after approval) */ output?: any /** Provider-specific metadata that round-trips with the tool call. - * Typed per-adapter via `TToolCallMetadata`. */ + * Typed per-adapter via `TToolCallMetadata`. May follow the + * {@link ProviderExecutedToolMetadata} convention to mark provider-executed + * server tools (e.g. Anthropic `web_search`). */ metadata?: TMetadata } diff --git a/packages/ai/src/utilities/provider-executed.ts b/packages/ai/src/utilities/provider-executed.ts new file mode 100644 index 000000000..d5a924d5a --- /dev/null +++ b/packages/ai/src/utilities/provider-executed.ts @@ -0,0 +1,32 @@ +import type { ProviderExecutedToolMetadata } from '../types' + +/** + * Narrow a tool call's opaque `metadata` to the provider-executed convention. + * Returns the typed metadata when the call is provider-executed, else `null`. + * + * @see ProviderExecutedToolMetadata + */ +export function getProviderExecutedMetadata( + toolCall: { metadata?: unknown } | null | undefined, +): ProviderExecutedToolMetadata | null { + const metadata = toolCall?.metadata + if ( + typeof metadata === 'object' && + metadata !== null && + (metadata as ProviderExecutedToolMetadata).providerExecuted === true + ) { + return metadata as ProviderExecutedToolMetadata + } + return null +} + +/** + * True when a tool call was executed by the provider (e.g. Anthropic + * `web_search` / `web_fetch` server tools) rather than the agent loop. Such + * calls must not be routed to client-side execution and are already "complete". + */ +export function isProviderExecutedToolCall( + toolCall: { metadata?: unknown } | null | undefined, +): boolean { + return getProviderExecutedMetadata(toolCall) !== null +} diff --git a/packages/ai/tests/messages.test.ts b/packages/ai/tests/messages.test.ts index 025e33b8a..0abc39825 100644 --- a/packages/ai/tests/messages.test.ts +++ b/packages/ai/tests/messages.test.ts @@ -99,4 +99,43 @@ describe('convertMessagesToModelMessages — AG-UI dedup pre-pass', () => { expect(result[0]?.role).toBe('system') expect(result[0]?.content).toBe('You are helpful') }) + + it('round-trips a provider-executed tool call without emitting a tool result (issue #839)', () => { + const metadata = { + providerExecuted: true, + anthropic: { + serverToolType: 'web_search', + resultBlockType: 'web_search_tool_result', + result: [{ type: 'web_search_result', url: 'https://example.com' }], + }, + } + const messages = [ + { + id: 'a1', + role: 'assistant', + parts: [ + { + type: 'tool-call', + id: 'srv_search', + name: 'web_search', + arguments: '{"query":"drones"}', + state: 'input-complete', + metadata, + }, + { type: 'text', content: 'Found a source.' }, + ], + } as UIMessage, + ] + + const result = convertMessagesToModelMessages(messages) + + // No tool result message — the provider executed the call, there is no + // client output to deliver. + expect(result.some((m) => m.role === 'tool')).toBe(false) + + const assistant = result.find((m) => m.role === 'assistant') + expect(assistant?.toolCalls).toHaveLength(1) + // Metadata round-trips so the adapter can replay the server tool blocks. + expect(assistant?.toolCalls?.[0]?.metadata).toMatchObject(metadata) + }) }) diff --git a/testing/e2e/tests/anthropic-server-tool.spec.ts b/testing/e2e/tests/anthropic-server-tool.spec.ts index 90b5865ea..bd85451a1 100644 --- a/testing/e2e/tests/anthropic-server-tool.spec.ts +++ b/testing/e2e/tests/anthropic-server-tool.spec.ts @@ -41,30 +41,46 @@ test.describe('anthropic — webFetchTool() streaming (#604)', () => { c.type === 'TOOL_CALL_END' && (c as { input?: unknown }).input !== undefined, ) + const byId = ( + list: Array>, + id: string, + ): Record | undefined => + list.find((c) => (c as { toolCallId?: string }).toolCallId === id) - // Exactly one client tool call. `web_fetch` is executed by Anthropic - // server-side, not surfaced as a client tool call. - expect(toolCallStarts).toHaveLength(1) - expect(toolCallArgEnds).toHaveLength(1) - expect(toolCallStarts[0]).toMatchObject({ + // Two tool calls surface: the client `lookup_weather` and the + // Anthropic-executed `web_fetch`. The latter now streams as a + // provider-executed tool call (#839) so its result can be replayed into + // later turns — it is NOT routed to client-side execution. + expect(toolCallStarts).toHaveLength(2) + expect(byId(toolCallStarts, 'toolu_client_weather')).toMatchObject({ toolCallId: 'toolu_client_weather', toolName: 'lookup_weather', }) - // Client tool args must be the clean Berlin payload — not the pre-fix - // concatenated `{"location":"Berlin"}{"url":"..."}`. - expect(toolCallArgEnds[0]).toMatchObject({ + // The web_fetch start is flagged provider-executed (in metadata) and + // carries the raw server tool result the adapter replays verbatim. + expect(byId(toolCallStarts, 'srvtoolu_web_fetch')).toMatchObject({ + toolCallId: 'srvtoolu_web_fetch', + toolName: 'web_fetch', + metadata: { + providerExecuted: true, + anthropic: { resultBlockType: 'web_fetch_tool_result' }, + }, + }) + + // Both tools surface a clean parsed `input` on TOOL_CALL_END. The client + // tool's args must still be the Berlin payload — not the pre-fix + // concatenated `{"location":"Berlin"}{"url":"..."}` — which is the + // regression this suite guards. + expect(toolCallArgEnds).toHaveLength(2) + expect(byId(toolCallArgEnds, 'toolu_client_weather')).toMatchObject({ toolCallId: 'toolu_client_weather', input: { location: 'Berlin' }, }) - - // No phantom client tool call for the server-side web_fetch. - expect( - toolCallStarts.some( - (c) => - (c as { toolCallId?: string }).toolCallId === 'srvtoolu_web_fetch', - ), - ).toBe(false) + expect(byId(toolCallArgEnds, 'srvtoolu_web_fetch')).toMatchObject({ + toolCallId: 'srvtoolu_web_fetch', + input: { url: 'https://example.com' }, + }) // Run completes cleanly through the agent loop's follow-up turn. expect(chunks.some((c) => c.type === 'RUN_FINISHED')).toBe(true)