diff --git a/.changeset/approval-gate-final-response.md b/.changeset/approval-gate-final-response.md new file mode 100644 index 0000000..87a554d --- /dev/null +++ b/.changeset/approval-gate-final-response.md @@ -0,0 +1,5 @@ +--- +"@openrouter/agent": patch +--- + +Run the approval check before executing pending tool calls on the `allowFinalResponse` path, and evaluate function-form `requireApproval` predicates against schema-normalized arguments so the approval decision matches the executed input. diff --git a/packages/agent/src/lib/conversation-state.ts b/packages/agent/src/lib/conversation-state.ts index d672643..1240c77 100644 --- a/packages/agent/src/lib/conversation-state.ts +++ b/packages/agent/src/lib/conversation-state.ts @@ -1,4 +1,5 @@ import type * as models from '@openrouter/sdk/models'; +import * as z4 from 'zod/v4'; import type { ConversationState, ParsedToolCall, @@ -104,6 +105,21 @@ export function appendToMessages( ]; } +function normalizeToolCallArguments< + TTool extends Exclude< + Tool, + { + _brand: 'server-tool'; + } + >, +>(toolCall: ParsedToolCall, tool: TTool): ParsedToolCall { + const parsed = z4.safeParse(tool.function.inputSchema, toolCall.arguments); + if (parsed.success) { + toolCall.arguments = parsed.data as ParsedToolCall['arguments']; + } + return toolCall; +} + /** * Check if a tool call requires approval * @param toolCall - The tool call to check @@ -120,12 +136,6 @@ export async function toolRequiresApproval( context: TurnContext, ) => boolean | Promise, ): Promise { - // Call-level check takes precedence - if (callLevelCheck) { - return callLevelCheck(toolCall, context); - } - - // Fall back to tool-level setting (server tools never require approval) const tool = tools.find( ( t, @@ -136,6 +146,16 @@ export async function toolRequiresApproval( } > => isClientTool(t) && t.function.name === toolCall.name, ); + if (tool) { + normalizeToolCallArguments(toolCall as ParsedToolCall, tool); + } + + // Call-level check takes precedence + if (callLevelCheck) { + return callLevelCheck(toolCall, context); + } + + // Fall back to tool-level setting (server tools never require approval) if (!tool) { return false; } diff --git a/packages/agent/src/lib/model-result.ts b/packages/agent/src/lib/model-result.ts index 4ca5fc9..28057a3 100644 --- a/packages/agent/src/lib/model-result.ts +++ b/packages/agent/src/lib/model-result.ts @@ -2010,6 +2010,10 @@ export class ModelResult< numberOfTurns: turnNumber, }; + if (await this.handleApprovalCheck(pendingToolCalls, turnNumber, currentResponse)) { + return; + } + await this.options.onTurnStart?.(turnContext); await this.resolveAsyncFunctionsForTurn(turnContext); diff --git a/packages/agent/tests/unit/allow-final-response.test.ts b/packages/agent/tests/unit/allow-final-response.test.ts index b33f116..dd6a8b1 100644 --- a/packages/agent/tests/unit/allow-final-response.test.ts +++ b/packages/agent/tests/unit/allow-final-response.test.ts @@ -323,6 +323,61 @@ describe('allowFinalResponse', () => { expect(fnCallOutput?.output).toContain('"temperature":99'); }); + it('pauses for approval before executing final-response tool calls', async () => { + const executeSpy = vi.fn(async (_p: { location: string }) => ({ + temperature: 99, + })); + const approvalTool = { + type: ToolType.Function, + function: { + name: 'get_weather', + description: 'Get the weather for a location.', + inputSchema: z.object({ + location: z.string(), + }), + outputSchema: z.object({ + temperature: z.number(), + }), + requireApproval: true, + execute: executeSpy, + }, + } as const; + const saved: Array<{ + status?: string; + pendingToolCalls?: unknown[]; + }> = []; + const stateAccessor = { + load: async () => null, + save: async (state: { status?: string; pendingToolCalls?: unknown[] }) => { + saved.push(state); + }, + }; + + mockBetaResponsesSend.mockResolvedValueOnce({ + ok: true, + value: toolCallResponse(), + }); + + const result = callModel(client, { + model: 'test-model', + input: 'What is the weather?', + tools: [ + approvalTool, + ] as const, + stopWhen: stepCountIs(0), + allowFinalResponse: true, + state: stateAccessor as unknown as Parameters[1]['state'], + }); + + const pending = await result.getPendingToolCalls(); + + expect(pending).toHaveLength(1); + expect(pending[0]?.name).toBe('get_weather'); + expect(executeSpy).not.toHaveBeenCalled(); + expect(mockBetaResponsesSend).toHaveBeenCalledTimes(1); + expect(saved.some((state) => state.status === 'awaiting_approval')).toBe(true); + }); + it('does not trigger when allowFinalResponse is false', async () => { mockBetaResponsesSend.mockResolvedValueOnce({ ok: true, diff --git a/packages/agent/tests/unit/conversation-state.test.ts b/packages/agent/tests/unit/conversation-state.test.ts index 81c60e2..3ca56b2 100644 --- a/packages/agent/tests/unit/conversation-state.test.ts +++ b/packages/agent/tests/unit/conversation-state.test.ts @@ -308,6 +308,36 @@ describe('Conversation State Utilities', () => { ).toBe(true); }); + it('should apply schema defaults before function-based approval checks', async () => { + const toolWithDefaultedApproval = tool({ + name: 'defaulted_action', + inputSchema: z.object({ + destructive: z.boolean().default(true), + }), + requireApproval: (params) => params.destructive === true, + execute: async () => ({}), + }); + + const toolCall = { + id: '1', + name: 'defaulted_action', + arguments: {}, + }; + + expect( + await toolRequiresApproval( + toolCall, + [ + toolWithDefaultedApproval, + ], + context, + ), + ).toBe(true); + expect(toolCall.arguments).toEqual({ + destructive: true, + }); + }); + it('should support async function-based tool-level requireApproval', async () => { // Tool with async function-based approval const toolWithAsyncApproval = tool({