diff --git a/api/oss/src/apis/fastapi/tools/models.py b/api/oss/src/apis/fastapi/tools/models.py index 768574f23c..86c62dcec6 100644 --- a/api/oss/src/apis/fastapi/tools/models.py +++ b/api/oss/src/apis/fastapi/tools/models.py @@ -1,6 +1,12 @@ -from typing import List, Optional, Union +from typing import Any, List, Optional, Union -from pydantic import BaseModel +from agenta.sdk.agents.tools import ( + BuiltinToolConfig, + GatewayToolConfig, + ToolConfigurationError, + coerce_tool_configs, +) +from pydantic import BaseModel, Field, field_validator from oss.src.core.tools.dtos import ( # Tool Catalog @@ -98,10 +104,26 @@ class ToolCallResponse(BaseModel): class ToolResolveRequest(BaseModel): - tools: List[AgentToolReference] = [] + tools: List[AgentToolReference] = Field(default_factory=list) + + @field_validator("tools", mode="before") + @classmethod + def _coerce_tools(cls, value: Any) -> List[AgentToolReference]: + try: + configs = coerce_tool_configs(value or []).tool_configs + except ToolConfigurationError as exc: + raise ValueError(str(exc)) from exc + unsupported = [ + config + for config in configs + if not isinstance(config, (BuiltinToolConfig, GatewayToolConfig)) + ] + if unsupported: + raise ValueError("/tools/resolve accepts only builtin and gateway tools") + return configs class ToolResolveResponse(BaseModel): count: int = 0 - builtins: List[str] = [] - custom: List[ResolvedAgentTool] = [] + builtins: List[str] = Field(default_factory=list) + custom: List[ResolvedAgentTool] = Field(default_factory=list) diff --git a/api/oss/src/core/tools/dtos.py b/api/oss/src/core/tools/dtos.py index 3c3f0ec53e..ad4d105bdd 100644 --- a/api/oss/src/core/tools/dtos.py +++ b/api/oss/src/core/tools/dtos.py @@ -1,6 +1,7 @@ from enum import Enum -from typing import Annotated, Any, Dict, List, Literal, Optional, Union +from typing import Any, Dict, List, Optional, Union +from agenta.sdk.agents.tools import BuiltinToolConfig, GatewayToolConfig from agenta.sdk.models.workflows import JsonSchemas from pydantic import BaseModel, Field @@ -250,28 +251,9 @@ class ToolExecutionResponse(BaseModel): # them into model-ready specs at invoke time (see ToolsService.resolve_agent_tools). -class AgentBuiltinTool(BaseModel): - """A Pi built-in tool, referenced by name (e.g. ``read``, ``bash``).""" - - type: Literal["builtin"] = "builtin" - name: str - - -class AgentComposioTool(BaseModel): - """A Composio action, carrying the slug segments ``/tools/call`` parses.""" - - type: Literal["composio"] = "composio" - integration: str - action: str - connection: str - # Function name shown to the model. Defaults to ``{integration}__{action}``. - name: Optional[str] = None - - -AgentToolReference = Annotated[ - Union[AgentBuiltinTool, AgentComposioTool], - Field(discriminator="type"), -] +AgentBuiltinTool = BuiltinToolConfig +AgentComposioTool = GatewayToolConfig +AgentToolReference = Union[BuiltinToolConfig, GatewayToolConfig] class ResolvedAgentTool(BaseModel): @@ -294,5 +276,5 @@ class AgentToolsResolution(BaseModel): ``customTools`` whose ``execute`` routes through ``/tools/call``. """ - builtins: List[str] = [] - custom: List[ResolvedAgentTool] = [] + builtins: List[str] = Field(default_factory=list) + custom: List[ResolvedAgentTool] = Field(default_factory=list) diff --git a/api/oss/tests/pytest/unit/tools/__init__.py b/api/oss/tests/pytest/unit/tools/__init__.py new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/api/oss/tests/pytest/unit/tools/__init__.py @@ -0,0 +1 @@ + diff --git a/api/oss/tests/pytest/unit/tools/test_agent_resolution.py b/api/oss/tests/pytest/unit/tools/test_agent_resolution.py new file mode 100644 index 0000000000..12ad49266a --- /dev/null +++ b/api/oss/tests/pytest/unit/tools/test_agent_resolution.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +from types import SimpleNamespace +from uuid import uuid4 + +import pytest +from pydantic import ValidationError + +from agenta.sdk.agents.tools import BuiltinToolConfig, GatewayToolConfig + +from oss.src.apis.fastapi.tools.models import ToolResolveRequest +from oss.src.core.tools.dtos import AgentBuiltinTool, AgentComposioTool +from oss.src.core.tools.service import ToolsService + + +def test_api_reuses_sdk_tool_config_classes(): + assert AgentBuiltinTool is BuiltinToolConfig + assert AgentComposioTool is GatewayToolConfig + + +def test_resolve_request_coerces_legacy_composio_shape(): + request = ToolResolveRequest( + tools=[ + "read", + { + "type": "composio", + "integration": "github", + "action": "GET_USER", + "connection": "c1", + }, + ] + ) + assert isinstance(request.tools[0], BuiltinToolConfig) + assert isinstance(request.tools[1], GatewayToolConfig) + + +def test_resolve_request_rejects_non_gateway_runtime_tools(): + with pytest.raises(ValidationError, match="only builtin and gateway"): + ToolResolveRequest( + tools=[ + { + "type": "code", + "name": "calc", + "script": "...", + } + ] + ) + + +async def test_api_resolution_returns_stable_call_reference(monkeypatch): + service = object.__new__(ToolsService) + + async def _connection(**_kwargs): + return object() + + async def _action(**_kwargs): + return SimpleNamespace( + description="Get user", + schemas=SimpleNamespace( + inputs={"type": "object", "properties": {}}, + ), + ) + + monkeypatch.setattr(service, "resolve_connection_by_slug", _connection) + monkeypatch.setattr(service, "get_action", _action) + + result = await service.resolve_agent_tools( + project_id=uuid4(), + tools=[ + BuiltinToolConfig(name="read"), + GatewayToolConfig( + integration="github", + action="GET_USER", + connection="c1", + ), + ], + ) + assert result.builtins == ["read"] + assert result.custom[0].call_ref == "tools.composio.github.GET_USER.c1" diff --git a/sdks/python/agenta/sdk/engines/running/interfaces.py b/sdks/python/agenta/sdk/engines/running/interfaces.py index cf587e21fb..514135c90b 100644 --- a/sdks/python/agenta/sdk/engines/running/interfaces.py +++ b/sdks/python/agenta/sdk/engines/running/interfaces.py @@ -529,19 +529,26 @@ def llm_inputs_schema( schemas=dict( # type: ignore parameters=obj( properties={ - "model": scalar( - jtype="string", - default="gpt-5.5", - description="Model the agent runs on.", - ), - "agents_md": scalar( - jtype="string", - default=( - "You are a friendly hello-world agent running on the " - "Agenta agent service.\n\n- Greet the user warmly.\n- " - "Answer the user's message in one or two short sentences." - ), - description="The agent's instructions (AGENTS.md).", + # One composite control for the whole agent config. The field shape lives in + # `AgentConfigSchema` (agenta.sdk.utils.types), registered as the `agent_config` + # catalog type; the playground resolves this ref and renders the AgentConfigControl. + "agent": semantic_field( + x_ag_type_ref="agent_config", + jtype="object", + description="The agent's instructions, model, tools, MCP servers, and runtime.", + default={ + "agents_md": ( + "You are a friendly hello-world agent running on the " + "Agenta agent service.\n\n- Greet the user warmly.\n- " + "Answer the user's message in one or two short sentences." + ), + "model": "gpt-5.5", + "tools": [], + "mcp_servers": [], + "harness": "pi", + "sandbox": "local", + "permission_policy": "auto", + }, ), }, additional_properties=True, diff --git a/sdks/python/agenta/sdk/utils/types.py b/sdks/python/agenta/sdk/utils/types.py index 8e629b92fb..994c781aa4 100644 --- a/sdks/python/agenta/sdk/utils/types.py +++ b/sdks/python/agenta/sdk/utils/types.py @@ -8,6 +8,8 @@ from pydantic import Field, model_validator, AliasChoices +from agenta.sdk.agents.mcp import MCPServerConfig +from agenta.sdk.agents.tools import ToolConfig from agenta.sdk.utils.assets import supported_llm_models, model_metadata from agenta.sdk.utils.helpers import _PLACEHOLDER_RE from agenta.sdk.utils.rendering import ( @@ -1052,6 +1054,81 @@ def _model_catalog_type() -> dict: } +_DEFAULT_AGENT_MODEL = "gpt-5.5" +_DEFAULT_AGENTS_MD = ( + "You are a friendly hello-world agent running on the Agenta agent service.\n\n" + "- Greet the user warmly.\n" + "- Answer the user's message in one or two short sentences." +) + + +class AgentConfigSchema(AgSchemaMixin): + """The playground's editable agent config (the ``agent`` element), as one semantic type. + + This is the schema-generation counterpart to the runtime :class:`agenta.sdk.agents.AgentConfig` + parser: it exists only to emit a rich JSON Schema for the ``agent_config`` control, so the + field shapes live in Pydantic (single source of truth) instead of a hand-written literal. + It deliberately composes the editable fields the control surfaces — the neutral config + (``agents_md``/``model``/``tools``/``mcp_servers``) plus the run selection + (``harness``/``sandbox``/``permission_policy``) — and types ``tools``/``mcp_servers`` with the + real tool-def models so the playground gets typed editors. The runtime ``AgentConfig`` stays + permissive (``List[Any]``) because its job is to coerce the loose shapes the playground emits; + this model is strict because its job is to describe them. + """ + + __ag_type__ = "agent_config" + + agents_md: str = Field( + default=_DEFAULT_AGENTS_MD, + title="Instructions", + description="The agent's system prompt (its AGENTS.md).", + json_schema_extra={"x-ag-type": "textarea"}, + ) + model: str = Field( + default=_DEFAULT_AGENT_MODEL, + title="Model", + description="Model the agent runs on.", + json_schema_extra={"x-parameter": "grouped_choice"}, + ) + tools: List[ToolConfig] = Field( + default_factory=list, + title="Tools", + description=( + "Runnable tools the agent can call: harness built-ins, server-side gateway " + "actions (e.g. Composio), sandboxed code, or client-fulfilled tools." + ), + ) + mcp_servers: List[MCPServerConfig] = Field( + default_factory=list, + title="MCP servers", + description=( + "Declared MCP servers exposed to the agent. The backend resolves each server's " + "secret env from the vault at run time; tokens never live in the config." + ), + ) + harness: Literal["pi", "claude", "agenta"] = Field( + default="pi", + title="Harness", + description=( + "Coding agent to drive: pi, claude, or agenta (pi with Agenta's forced " + "skills, tools, and base instructions)." + ), + ) + sandbox: Literal["local", "daytona"] = Field( + default="local", + title="Sandbox", + description="Where the agent runs: local daemon or a Daytona sandbox.", + ) + permission_policy: Literal["auto", "deny"] = Field( + default="auto", + title="Permission policy", + description=( + "How a permission-gating harness (e.g. Claude Code) handles tool-use prompts " + "in this headless run: auto-approve or deny." + ), + ) + + CATALOG_TYPES = { Message.ag_type(): _dereference_schema(Message.model_json_schema()), Messages.ag_type(): _dereference_schema(Messages.model_json_schema()), @@ -1065,4 +1142,7 @@ def _model_catalog_type() -> dict: AgPermissions.ag_type(): _dereference_schema(AgPermissions.model_json_schema()), AgResponse.ag_type(): _dereference_schema(AgResponse.model_json_schema()), PromptTemplate.ag_type(): _dereference_schema(PromptTemplate.model_json_schema()), + AgentConfigSchema.ag_type(): _dereference_schema( + AgentConfigSchema.model_json_schema() + ), } diff --git a/services/oss/src/agent/app.py b/services/oss/src/agent/app.py index bcb0594aa4..fe3e164788 100644 --- a/services/oss/src/agent/app.py +++ b/services/oss/src/agent/app.py @@ -27,12 +27,13 @@ SessionConfig, make_harness, to_messages, + ui_message_stream, ) from oss.src.agent.config import load_config, wrapper_dir from oss.src.agent.schemas import AGENT_SCHEMAS from oss.src.agent.secrets import resolve_harness_secrets -from oss.src.agent.tools import resolve_tools +from oss.src.agent.tools import resolve_agent_resources from oss.src.agent.tracing import record_usage, trace_context @@ -75,6 +76,7 @@ async def _agent( inputs: Optional[Dict[str, Any]] = None, messages: Optional[List[Any]] = None, parameters: Optional[Dict] = None, + stream: Optional[bool] = None, ): params = parameters or {} @@ -86,22 +88,33 @@ async def _agent( ) msgs = to_messages(messages or (inputs or {}).get("messages") or []) - builtins, custom_tools, tool_callback = await resolve_tools(agent_config.tools) + resources = await resolve_agent_resources( + tools=agent_config.tools, + mcp_servers=agent_config.mcp_servers, + ) session_config = SessionConfig( agent=agent_config, secrets=await resolve_harness_secrets(), permission_policy=selection.permission_policy, trace=trace_context(), - builtin_tools=builtins, - custom_tools=custom_tools, - tool_callback=tool_callback, + builtin_names=resources.tools.builtin_names, + tool_specs=resources.tools.tool_specs, + tool_callback=resources.tools.tool_callback, + mcp_servers=resources.mcp_servers, ) # The harness validates that the chosen backend can drive it; select_backend already # routes a claude harness or a non-local sandbox to rivet, so this never fails in - # practice. setup/cleanup own the backend lifecycle; prompt runs one cold turn. + # practice. setup/cleanup own the backend lifecycle; prompt/stream run one cold turn. harness = make_harness(selection.harness, Environment(select_backend(selection))) + + # The `/messages` SSE path sets `stream`: return the Vercel UI Message Stream as an async + # generator (the normalizer turns it into a streaming response). `/invoke` and the + # `/messages` JSON path leave it unset and take the batch path below. + if stream: + return _agent_stream(harness, session_config, msgs) + await harness.setup() try: result = await harness.prompt(session_config, msgs) @@ -112,13 +125,34 @@ async def _agent( return {"role": "assistant", "content": result.output} +async def _agent_stream(harness, session_config, msgs): + """Run one streaming turn and yield Vercel UI Message Stream parts. + + Owns the environment lifecycle (``setup`` / ``cleanup``); the per-turn session is torn + down by the ``AgentRun``'s own cleanup hook when the stream drains. The ``session_id`` is + stamped onto the stream's ``start`` part by the endpoint, so it is not threaded here. + """ + await harness.setup() + try: + run = await harness.stream(session_config, msgs) + async for part in ui_message_stream(run): + yield part + try: + record_usage(run.result().usage) + except Exception: # result unavailable on a failed/aborted stream + pass + finally: + await harness.cleanup() + + def create_agent_app(): app = ag.create_app() # No builtin URI yet: registering the agent as a first-class workflow type # (`agenta:builtin:agent:v0`) and its interface is WP-6. Here we register the handler # directly, so it gets an auto URI (`user:custom:...`) and runs locally. routed = ag.workflow(schemas=AGENT_SCHEMAS)(_agent) - ag.route("/", app=app, flags={"is_chat": True})(routed) + # is_agent gates the agent-only `/messages` + `/load-session` routes (next to /invoke). + ag.route("/", app=app, flags={"is_chat": True, "is_agent": True})(routed) return app diff --git a/services/oss/src/agent/schemas.py b/services/oss/src/agent/schemas.py index d165938ba2..e696e84500 100644 --- a/services/oss/src/agent/schemas.py +++ b/services/oss/src/agent/schemas.py @@ -35,15 +35,17 @@ } # The agent config element: one composite control the playground renders for the whole -# agent config, instead of reusing `prompt-template` plus loose params. The -# `x-ag-type: agent_config` marker is what the playground dispatches to the AgentConfigControl -# (web/packages/agenta-entity-ui/.../AgentConfigControl.tsx). The schema is inline (not an -# `x-ag-type-ref`), so it needs no `/ag-types` registration; the control reuses the existing -# model selector, tool picker, and enum selects. agent.py reads this value (see inputs.py). +# agent config, instead of reusing `prompt-template` plus loose params. The field shape is +# the `agent_config` catalog type (AgentConfigSchema in agenta.sdk.utils.types), so this is a +# thin `x-ag-type-ref` the playground resolves against `/workflows/catalog/types/agent_config` +# and dispatches to the AgentConfigControl (web/packages/agenta-entity-ui/.../AgentConfigControl.tsx). +# Generating the schema from the model keeps the typed tools/mcp_servers in one place; we only +# carry the default here (what the playground pre-fills). agent.py reads this value (see inputs.py). _DEFAULT_AGENT_CONFIG = { - "instructions": _DEFAULT_AGENTS_MD, + "agents_md": _DEFAULT_AGENTS_MD, "model": _DEFAULT_MODEL, "tools": [], + "mcp_servers": [], "harness": "pi", "sandbox": "local", "permission_policy": "auto", @@ -51,61 +53,9 @@ AGENT_CONFIG_SCHEMA = { "type": "object", - "x-ag-type": "agent_config", + "x-ag-type-ref": "agent_config", "title": "Agent", - "description": "The agent's instructions, model, tools, and runtime.", - "properties": { - "instructions": { - "type": "string", - "x-ag-type": "textarea", - "title": "Instructions", - "description": "The agent's system prompt (its AGENTS.md).", - "default": _DEFAULT_AGENTS_MD, - }, - "model": { - "type": "string", - "x-parameter": "grouped_choice", - "title": "Model", - "default": _DEFAULT_MODEL, - }, - "tools": { - "type": "array", - "title": "Tools", - "description": ( - "Runnable tools the agent can call. Picked from connected providers " - "(e.g. Composio) and run server-side via /tools/call." - ), - "items": {"type": "object", "additionalProperties": True}, - "default": [], - }, - "harness": { - "type": "string", - "title": "Harness", - "enum": ["pi", "claude", "agenta"], - "default": "pi", - "description": ( - "Coding agent to drive: pi, claude, or agenta (pi with Agenta's forced " - "skills, tools, and base instructions)." - ), - }, - "sandbox": { - "type": "string", - "title": "Sandbox", - "enum": ["local", "daytona"], - "default": "local", - "description": "Where the agent runs: local daemon or a Daytona sandbox.", - }, - "permission_policy": { - "type": "string", - "title": "Permission policy", - "enum": ["auto", "deny"], - "default": "auto", - "description": ( - "How a permission-gating harness (e.g. Claude Code) handles tool-use " - "prompts in this headless run: auto-approve or deny." - ), - }, - }, + "description": "The agent's instructions, model, tools, MCP servers, and runtime.", "default": _DEFAULT_AGENT_CONFIG, } diff --git a/services/oss/src/agent/tools.py b/services/oss/src/agent/tools.py deleted file mode 100644 index fee9172f1f..0000000000 --- a/services/oss/src/agent/tools.py +++ /dev/null @@ -1,124 +0,0 @@ -"""Resolve the agent's configured tools through the Agenta backend. - -The playground tool picker emits provider-agnostic references; the backend resolver -(``POST /tools/resolve``) validates Composio connections up front and enriches each action -from the catalog. We turn the result into the customTool specs the wire carries and the -``/tools/call`` callback. The provider key and connection auth stay server-side. -""" - -from typing import Any, Dict, List, Optional, Tuple - -import httpx - -from oss.src.agent.client import ( - TOOLS_TIMEOUT, - agenta_api_base, - request_authorization, -) -from agenta.sdk.agents import ToolCallback - - -def _parse_gateway_slug(slug: Any) -> Optional[Dict[str, Any]]: - """Parse a gateway tool slug into a Composio reference, or ``None``. - - The playground tool picker encodes a Composio action as a function name like - ``tools__composio__github__GET_THE_AUTHENTICATED_USER__github-tvn`` (the same 5-segment - slug ``/tools/call`` parses; ``__`` or ``.`` separated). Anything that is not a - 5-segment ``tools.composio.*`` slug returns ``None`` so the caller can skip it. - """ - if not isinstance(slug, str): - return None - parts = slug.replace("__", ".").split(".") - if len(parts) == 5 and parts[0] == "tools" and parts[1] == "composio": - return { - "type": "composio", - "integration": parts[2], - "action": parts[3], - "connection": parts[4], - } - return None - - -def _normalize_tool_ref(ref: Any) -> Optional[Dict[str, Any]]: - """Coerce a config entry into a discriminated tool reference the resolver parses. - - Handles three shapes: a bare string (or single-key ``{"name": ...}``) is a built-in - tool name; a dict already carrying ``type`` passes through; and the playground picker's - gateway entry (``{"function": {"name": "tools__composio__..."}}``) becomes a - ``composio`` ref. Unsupported picker entries (provider built-ins, inline custom - functions) return ``None`` and are skipped rather than failing the run. - """ - if isinstance(ref, str): - return {"type": "builtin", "name": ref} - if isinstance(ref, dict): - if ref.get("type") in ("builtin", "composio"): - return ref - function = ref.get("function") if isinstance(ref.get("function"), dict) else {} - gateway = _parse_gateway_slug(function.get("name") or ref.get("name")) - if gateway: - return gateway - if "type" not in ref and isinstance(ref.get("name"), str): - return {"type": "builtin", "name": ref["name"]} - return None - return None - - -async def resolve_tools( - tools: List[Any], -) -> Tuple[List[str], List[Dict[str, Any]], Optional[ToolCallback]]: - """Resolve config tool references into built-in names + customTool specs + callback. - - Calls the backend resolver (``POST /tools/resolve``), which validates Composio - connections up front and enriches each action from the catalog. Returns the built-in - tool names, the camelCase customTool specs for the wire, and the ``/tools/call`` - callback. Raises on resolution failure so the invoke fails early with a clear message - rather than the model hitting a runtime tool error. - """ - refs = [ref for ref in (_normalize_tool_ref(t) for t in tools if t) if ref] - if not refs: - return [], [], None - - api_base = agenta_api_base() - if not api_base: - raise RuntimeError( - "Agent has tools configured but the Agenta API base URL is unknown. " - "Set AGENTA_AGENT_TOOLS_API_URL or AGENTA_API_URL." - ) - - authorization = request_authorization() - headers = {"Content-Type": "application/json"} - if authorization: - headers["Authorization"] = authorization - - async with httpx.AsyncClient(timeout=TOOLS_TIMEOUT) as client: - response = await client.post( - f"{api_base}/tools/resolve", - json={"tools": refs}, - headers=headers, - ) - - if response.status_code >= 400: - raise RuntimeError( - f"Tool resolution failed (HTTP {response.status_code}): {response.text[:500]}" - ) - - data = response.json() - builtins = data.get("builtins") or [] - custom = data.get("custom") or [] - - custom_tools = [ - { - "name": spec["name"], - "description": spec.get("description"), - "inputSchema": spec.get("input_schema"), - "callRef": spec["call_ref"], - } - for spec in custom - ] - - callback = ToolCallback( - endpoint=f"{api_base}/tools/call", - authorization=authorization, - ) - - return builtins, custom_tools, callback diff --git a/services/oss/src/agent/tools/__init__.py b/services/oss/src/agent/tools/__init__.py new file mode 100644 index 0000000000..e3b68d6167 --- /dev/null +++ b/services/oss/src/agent/tools/__init__.py @@ -0,0 +1,21 @@ +"""Agent-service composition and adapters for tool resolution.""" + +from .gateway import AgentaGatewayToolResolver, _to_gateway_reference +from .resolver import ( + ResolvedAgentResources, + resolve_agent_resources, + resolve_mcp_servers, + resolve_tools, +) +from .secrets import VaultToolSecretProvider + +_gateway_ref = _to_gateway_reference + +__all__ = [ + "AgentaGatewayToolResolver", + "VaultToolSecretProvider", + "ResolvedAgentResources", + "resolve_agent_resources", + "resolve_tools", + "resolve_mcp_servers", +] diff --git a/services/oss/src/agent/tools/gateway.py b/services/oss/src/agent/tools/gateway.py new file mode 100644 index 0000000000..adbea3465f --- /dev/null +++ b/services/oss/src/agent/tools/gateway.py @@ -0,0 +1,195 @@ +"""Agenta HTTP adapter for server-bound gateway tools.""" + +from __future__ import annotations + +from typing import Any, Dict, Sequence + +import httpx + +from agenta.sdk.agents.tools import ( + CallbackToolSpec, + GatewayToolConfig, + GatewayToolResolution, + GatewayToolResolutionError, + ToolCallback, + UnsupportedToolProviderError, +) +from agenta.sdk.utils.logging import get_module_logger + +from oss.src.agent.client import ( + TOOLS_TIMEOUT, + agenta_api_base, + request_authorization, +) + +log = get_module_logger(__name__) + + +def _normalize_reference(reference: str) -> str: + return reference.replace("__", ".") + + +def _to_gateway_reference(tool_config: GatewayToolConfig) -> Dict[str, Any]: + reference: Dict[str, Any] = { + "type": "gateway", + "provider": tool_config.provider, + "integration": tool_config.integration, + "action": tool_config.action, + "connection": tool_config.connection, + } + if tool_config.name: + reference["name"] = tool_config.name + return reference + + +class AgentaGatewayToolResolver: + async def resolve( + self, + tools: Sequence[GatewayToolConfig], + ) -> GatewayToolResolution: + for tool_config in tools: + if tool_config.provider != "composio": + raise UnsupportedToolProviderError(tool_config.provider) + + api_base = agenta_api_base() + if not api_base: + error = GatewayToolResolutionError( + "Agent has gateway tools configured but the Agenta API base URL " + "is unknown. Set AGENTA_AGENT_TOOLS_API_URL or AGENTA_API_URL." + ) + log.warning("agent: gateway tool resolution failed: %s", error) + raise error + + authorization = request_authorization() + headers = {"Content-Type": "application/json"} + if authorization: + headers["Authorization"] = authorization + + references = [_to_gateway_reference(tool_config) for tool_config in tools] + configs_by_reference: dict[str, GatewayToolConfig] = {} + for tool_config in tools: + reference = _normalize_reference(tool_config.reference) + if reference in configs_by_reference: + error = GatewayToolResolutionError( + f"Duplicate gateway reference: {reference}", + reference=reference, + ) + log.warning("agent: %s", error) + raise error + configs_by_reference[reference] = tool_config + + try: + async with httpx.AsyncClient(timeout=TOOLS_TIMEOUT) as client: + response = await client.post( + f"{api_base}/tools/resolve", + json={"tools": references}, + headers=headers, + ) + except httpx.HTTPError as exc: + log.warning( + "agent: gateway tool resolution request failed for %d tool(s)", + len(tools), + exc_info=True, + ) + raise GatewayToolResolutionError( + "Gateway tool resolution request failed", + ref_count=len(tools), + ) from exc + + if response.status_code >= 400: + error = GatewayToolResolutionError( + f"Gateway tool resolution failed (HTTP {response.status_code})", + status=response.status_code, + ref_count=len(tools), + ) + log.warning("agent: %s", error) + raise error + + try: + payload = response.json() or {} + except ValueError as exc: + log.warning( + "agent: gateway tool resolution returned invalid JSON", + exc_info=True, + ) + raise GatewayToolResolutionError( + "Gateway tool resolution returned invalid JSON", + ref_count=len(tools), + ) from exc + + raw_specs = payload.get("custom") if isinstance(payload, dict) else None + if not isinstance(raw_specs, list): + raw_specs = [] + if len(raw_specs) != len(tools): + error = GatewayToolResolutionError( + f"Gateway tool resolution returned {len(raw_specs)} spec(s) for " + f"{len(tools)} ref(s); expected one per ref.", + ref_count=len(tools), + spec_count=len(raw_specs), + ) + log.warning("agent: %s", error) + raise error + + specs_by_reference: dict[str, dict[str, Any]] = {} + for raw_spec in raw_specs: + if not isinstance(raw_spec, dict): + error = GatewayToolResolutionError( + "Gateway tool resolution returned a non-object spec" + ) + log.warning("agent: %s", error) + raise error + call_ref = raw_spec.get("call_ref") + if not call_ref: + error = GatewayToolResolutionError( + "Gateway tool resolution returned an incomplete spec " + f"(name={raw_spec.get('name')!r}, call_ref={call_ref!r})" + ) + log.warning("agent: %s", error) + raise error + reference = _normalize_reference(str(call_ref)) + if reference in specs_by_reference: + error = GatewayToolResolutionError( + f"Gateway tool resolution returned duplicate ref: {reference}", + reference=reference, + ) + log.warning("agent: %s", error) + raise error + specs_by_reference[reference] = raw_spec + + tool_specs: list[CallbackToolSpec] = [] + for reference, tool_config in configs_by_reference.items(): + raw_spec = specs_by_reference.get(reference) + if raw_spec is None: + error = GatewayToolResolutionError( + f"Gateway tool resolution did not return ref: {reference}", + reference=reference, + ) + log.warning("agent: %s", error) + raise error + name = raw_spec.get("name") + if not name: + error = GatewayToolResolutionError( + f"Gateway tool resolution returned an incomplete spec for {reference}", + reference=reference, + ) + log.warning("agent: %s", error) + raise error + tool_specs.append( + CallbackToolSpec( + name=str(name), + description=raw_spec.get("description") or str(name), + input_schema=raw_spec.get("input_schema") + or {"type": "object", "properties": {}}, + call_ref=str(raw_spec["call_ref"]), + needs_approval=tool_config.needs_approval, + render=tool_config.render, + ) + ) + + return GatewayToolResolution( + tool_specs=tool_specs, + tool_callback=ToolCallback( + endpoint=f"{api_base}/tools/call", + authorization=authorization, + ), + ) diff --git a/services/oss/src/agent/tools/resolver.py b/services/oss/src/agent/tools/resolver.py new file mode 100644 index 0000000000..8d56e92906 --- /dev/null +++ b/services/oss/src/agent/tools/resolver.py @@ -0,0 +1,85 @@ +"""Composition of SDK tool and MCP resolvers for the agent service.""" + +from __future__ import annotations + +import os +from typing import Any, Sequence + +from pydantic import BaseModel, ConfigDict, Field + +from agenta.sdk.agents.mcp import ( + MCPResolver, + ResolvedMCPServer, + parse_mcp_server_configs, +) +from agenta.sdk.agents.tools import ( + MissingSecretPolicy, + ResolvedToolSet, + ToolConfig, + ToolResolver, + coerce_tool_configs, +) +from agenta.sdk.utils.constants import TRUTHY + +from .gateway import AgentaGatewayToolResolver +from .secrets import VaultToolSecretProvider + + +class ResolvedAgentResources(BaseModel): + model_config = ConfigDict(frozen=True) + + tools: ResolvedToolSet = Field(default_factory=ResolvedToolSet) + mcp_servers: list[ResolvedMCPServer] = Field(default_factory=list) + + +def _mcp_enabled() -> bool: + return os.getenv("AGENTA_AGENT_ENABLE_MCP", "").strip().lower() in TRUTHY + + +async def resolve_agent_resources( + *, + tools: Sequence[Any], + mcp_servers: Sequence[Any], +) -> ResolvedAgentResources: + tool_configs: list[ToolConfig] = coerce_tool_configs(tools).tool_configs + secret_provider = VaultToolSecretProvider() + resolved_tools = await ToolResolver( + secret_provider=secret_provider, + gateway_resolver=AgentaGatewayToolResolver(), + missing_secret_policy=MissingSecretPolicy.ERROR, + ).resolve(tool_configs) + + resolved_mcp_servers: list[ResolvedMCPServer] = [] + if _mcp_enabled(): + resolved_mcp_servers = await MCPResolver( + secret_provider=secret_provider, + missing_secret_policy=MissingSecretPolicy.ERROR, + ).resolve(parse_mcp_server_configs(mcp_servers)) + + return ResolvedAgentResources( + tools=resolved_tools, + mcp_servers=resolved_mcp_servers, + ) + + +async def resolve_tools(tools: Sequence[Any]) -> ResolvedToolSet: + """Compatibility wrapper for callers resolving tools without MCP.""" + return ( + await resolve_agent_resources( + tools=tools, + mcp_servers=[], + ) + ).tools + + +async def resolve_mcp_servers( + mcp_servers: Sequence[Any], +) -> list[dict[str, Any]]: + """Compatibility wrapper returning the previous wire-dictionary shape.""" + if not _mcp_enabled(): + return [] + resources = await resolve_agent_resources( + tools=[], + mcp_servers=mcp_servers, + ) + return [server.to_wire() for server in resources.mcp_servers] diff --git a/services/oss/src/agent/tools/secrets.py b/services/oss/src/agent/tools/secrets.py new file mode 100644 index 0000000000..59aa10e9f0 --- /dev/null +++ b/services/oss/src/agent/tools/secrets.py @@ -0,0 +1,65 @@ +"""Vault-backed secret provider for agent tools and MCP servers.""" + +from __future__ import annotations + +from typing import Mapping, Sequence + +import httpx + +from agenta.sdk.utils.logging import get_module_logger + +from oss.src.agent.client import ( + TOOLS_TIMEOUT, + agenta_api_base, + request_authorization, +) + +log = get_module_logger(__name__) + + +async def resolve_named_secrets(names: Sequence[str]) -> dict[str, str]: + """Resolve project vault secrets by name for tool and MCP environments.""" + if not names: + return {} + + api_base = agenta_api_base() + if not api_base: + return {} + + headers = {"Content-Type": "application/json"} + authorization = request_authorization() + if authorization: + headers["Authorization"] = authorization + + try: + async with httpx.AsyncClient(timeout=TOOLS_TIMEOUT) as client: + response = await client.post( + f"{api_base}/secrets/resolve", + json={"names": list(names)}, + headers=headers, + ) + if response.status_code >= 400: + log.warning( + "agent: named-secret resolve HTTP %s for %s", + response.status_code, + names, + ) + return {} + data = response.json() or {} + except Exception: # pylint: disable=broad-except + log.warning("agent: named-secret resolve failed for %s", names, exc_info=True) + return {} + + resolved = data.get("secrets") if isinstance(data, dict) else None + resolved = resolved if isinstance(resolved, dict) else {} + missing = [name for name in names if name not in resolved] + if missing: + log.warning("agent: unresolved named secret(s): %s", missing) + return { + str(key): str(value) for key, value in resolved.items() if value is not None + } + + +class VaultToolSecretProvider: + async def get_many(self, names: Sequence[str]) -> Mapping[str, str]: + return await resolve_named_secrets(names) diff --git a/services/oss/tests/pytest/integration/__init__.py b/services/oss/tests/pytest/integration/__init__.py new file mode 100644 index 0000000000..a78ea06af0 --- /dev/null +++ b/services/oss/tests/pytest/integration/__init__.py @@ -0,0 +1 @@ +# Integration tests package. diff --git a/services/oss/tests/pytest/integration/agent/__init__.py b/services/oss/tests/pytest/integration/agent/__init__.py new file mode 100644 index 0000000000..da89fd87e0 --- /dev/null +++ b/services/oss/tests/pytest/integration/agent/__init__.py @@ -0,0 +1 @@ +# Integration tests for the agent workflow service (httpx boundary mocked, no live backend). diff --git a/services/oss/tests/pytest/integration/agent/conftest.py b/services/oss/tests/pytest/integration/agent/conftest.py new file mode 100644 index 0000000000..dfc223343c --- /dev/null +++ b/services/oss/tests/pytest/integration/agent/conftest.py @@ -0,0 +1,76 @@ +"""Integration fixtures: a fake httpx client for the tool/secret resolvers. + +These tests wire the real resolver code against a mocked HTTP boundary (no live backend, no +respx/pytest-httpx dependency). ``install_http`` patches, in a given resolver module, the two +``client`` helpers (``agenta_api_base`` / ``request_authorization``) plus ``httpx.AsyncClient``, +and returns a ``capture`` dict the test can assert the outgoing request against. +""" + +from __future__ import annotations + +import json +from typing import Any, Dict, Optional + +import pytest + + +class _FakeResponse: + def __init__(self, status_code: int, payload: Any, text: Optional[str]) -> None: + self.status_code = status_code + self._payload = payload if payload is not None else {} + self.text = text if text is not None else json.dumps(self._payload) + + def json(self) -> Any: + return self._payload + + +def _fake_async_client(*, response, raises, capture: Dict[str, Any]): + class _Client: + def __init__(self, *args, **kwargs) -> None: + pass + + async def __aenter__(self): + return self + + async def __aexit__(self, *args): + return False + + async def post(self, url, json=None, headers=None): + capture.update(method="POST", url=url, json=json, headers=headers) + if raises: + raise raises + return response + + async def get(self, url, headers=None): + capture.update(method="GET", url=url, headers=headers) + if raises: + raise raises + return response + + return _Client + + +@pytest.fixture +def install_http(monkeypatch): + def _install( + module, + *, + status: int = 200, + payload: Any = None, + text: Optional[str] = None, + raises: Optional[BaseException] = None, + api_base: Optional[str] = "https://api.x/api", + authorization: Optional[str] = "Access tok", + ) -> Dict[str, Any]: + capture: Dict[str, Any] = {} + monkeypatch.setattr(module, "agenta_api_base", lambda: api_base) + monkeypatch.setattr(module, "request_authorization", lambda: authorization) + response = _FakeResponse(status, payload, text) + monkeypatch.setattr( + module.httpx, + "AsyncClient", + _fake_async_client(response=response, raises=raises, capture=capture), + ) + return capture + + return _install diff --git a/services/oss/tests/pytest/integration/agent/test_resolve_secrets_http.py b/services/oss/tests/pytest/integration/agent/test_resolve_secrets_http.py new file mode 100644 index 0000000000..8eb4f45a01 --- /dev/null +++ b/services/oss/tests/pytest/integration/agent/test_resolve_secrets_http.py @@ -0,0 +1,64 @@ +"""``resolve_harness_secrets`` against a mocked ``GET /secrets/``. + +Best-effort by design: it maps only ``provider_key`` vault entries to env vars, dedupes by +env var, and returns ``{}`` on any error rather than failing the run. +""" + +from __future__ import annotations + +import pytest + +from oss.src.agent import secrets +from oss.src.agent.secrets import resolve_harness_secrets + +pytestmark = pytest.mark.integration + + +async def test_no_api_base_returns_empty(install_http): + install_http(secrets, api_base=None) + assert await resolve_harness_secrets() == {} + + +async def test_maps_only_provider_keys_with_dedupe(install_http): + install_http( + secrets, + status=200, + payload=[ + { + "kind": "provider_key", + "data": {"kind": "openai", "provider": {"key": "sk-1"}}, + }, + # duplicate env var -> first one wins (setdefault). + { + "kind": "provider_key", + "data": {"kind": "openai", "provider": {"key": "sk-2"}}, + }, + { + "kind": "provider_key", + "data": {"kind": "anthropic", "provider": {"key": "sk-ant"}}, + }, + # not a provider key -> ignored. + {"kind": "other", "data": {"kind": "openai", "provider": {"key": "x"}}}, + # unmapped provider -> ignored. + { + "kind": "provider_key", + "data": {"kind": "made_up", "provider": {"key": "y"}}, + }, + # missing key -> ignored. + {"kind": "provider_key", "data": {"kind": "groq", "provider": {}}}, + ], + ) + + env = await resolve_harness_secrets() + + assert env == {"OPENAI_API_KEY": "sk-1", "ANTHROPIC_API_KEY": "sk-ant"} + + +async def test_http_error_returns_empty(install_http): + install_http(secrets, status=400) + assert await resolve_harness_secrets() == {} + + +async def test_network_exception_returns_empty(install_http): + install_http(secrets, raises=RuntimeError("network down")) + assert await resolve_harness_secrets() == {} diff --git a/services/oss/tests/pytest/integration/agent/tools/__init__.py b/services/oss/tests/pytest/integration/agent/tools/__init__.py new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/services/oss/tests/pytest/integration/agent/tools/__init__.py @@ -0,0 +1 @@ + diff --git a/services/oss/tests/pytest/integration/agent/tools/test_gateway_http.py b/services/oss/tests/pytest/integration/agent/tools/test_gateway_http.py new file mode 100644 index 0000000000..1664e8fcfe --- /dev/null +++ b/services/oss/tests/pytest/integration/agent/tools/test_gateway_http.py @@ -0,0 +1,170 @@ +from __future__ import annotations + +import httpx +import pytest + +from agenta.sdk.agents import ( + GatewayToolResolutionError, + ToolCallback, +) + +from oss.src.agent.tools import resolve_tools +from oss.src.agent.tools import gateway + +pytestmark = pytest.mark.integration + +_GATEWAY = { + "type": "gateway", + "provider": "composio", + "integration": "github", + "action": "GET_USER", + "connection": "c1", +} + + +async def test_no_gateway_short_circuits_without_http(install_http): + capture = install_http(gateway, raises=AssertionError("must not call HTTP")) + resolved = await resolve_tools(["read"]) + assert resolved.builtin_names == ["read"] + assert capture == {} + + +async def test_missing_api_base_raises_typed_error(install_http): + install_http(gateway, api_base=None) + with pytest.raises(GatewayToolResolutionError, match="API base URL"): + await resolve_tools([_GATEWAY]) + + +async def test_gateway_metadata_and_description_fallback_are_preserved(install_http): + capture = install_http( + gateway, + payload={ + "custom": [ + { + "name": "get_user", + "description": None, + "input_schema": {"type": "object"}, + "call_ref": "tools.composio.github.GET_USER.c1", + } + ] + }, + ) + resolved = await resolve_tools( + [ + { + **_GATEWAY, + "needs_approval": True, + "render": {"kind": "component", "component": "User"}, + } + ] + ) + spec = resolved.tool_specs[0] + assert spec.description == "get_user" + assert spec.needs_approval is True + assert spec.render == {"kind": "component", "component": "User"} + assert spec.to_wire()["needsApproval"] is True + assert isinstance(resolved.tool_callback, ToolCallback) + assert capture["json"]["tools"][0]["type"] == "gateway" + + +async def test_gateway_specs_are_joined_by_call_ref_not_position(install_http): + install_http( + gateway, + payload={ + "custom": [ + { + "name": "second", + "description": "Second", + "input_schema": {}, + "call_ref": "tools.composio.github.SECOND.c2", + }, + { + "name": "first", + "description": "First", + "input_schema": {}, + "call_ref": "tools.composio.github.FIRST.c1", + }, + ] + }, + ) + resolved = await resolve_tools( + [ + { + **_GATEWAY, + "action": "FIRST", + "connection": "c1", + "needs_approval": True, + }, + { + **_GATEWAY, + "action": "SECOND", + "connection": "c2", + "render": {"kind": "component", "component": "Second"}, + }, + ] + ) + first, second = resolved.tool_specs + assert first.name == "first" + assert first.needs_approval is True + assert first.render is None + assert second.name == "second" + assert second.needs_approval is False + assert second.render == {"kind": "component", "component": "Second"} + + +async def test_transport_failure_is_logged_and_normalized( + install_http, + monkeypatch, +): + warnings = [] + monkeypatch.setattr( + gateway, + "log", + type( + "Log", + (), + {"warning": lambda self, *args, **kwargs: warnings.append(args)}, + )(), + ) + request = httpx.Request("POST", "https://api.x/api/tools/resolve") + install_http(gateway, raises=httpx.ConnectError("offline", request=request)) + with pytest.raises(GatewayToolResolutionError) as caught: + await resolve_tools([_GATEWAY]) + assert isinstance(caught.value.__cause__, httpx.ConnectError) + assert warnings + assert "gateway tool resolution request failed" in warnings[0][0].lower() + + +@pytest.mark.parametrize( + ("payload", "message"), + [ + ({"custom": []}, "expected one per ref"), + ( + { + "custom": [ + { + "name": "get_user", + "description": "x", + "input_schema": {}, + } + ] + }, + "incomplete spec", + ), + ], +) +async def test_invalid_gateway_response_fails_fast( + install_http, + payload, + message, +): + install_http(gateway, payload=payload) + with pytest.raises(GatewayToolResolutionError, match=message): + await resolve_tools([_GATEWAY]) + + +async def test_http_status_failure_is_typed(install_http): + install_http(gateway, status=400, text="bad request") + with pytest.raises(GatewayToolResolutionError) as caught: + await resolve_tools([_GATEWAY]) + assert caught.value.status == 400 diff --git a/services/oss/tests/pytest/integration/agent/tools/test_secrets_http.py b/services/oss/tests/pytest/integration/agent/tools/test_secrets_http.py new file mode 100644 index 0000000000..2aea5678fb --- /dev/null +++ b/services/oss/tests/pytest/integration/agent/tools/test_secrets_http.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import pytest + +from oss.src.agent.tools import secrets + +pytestmark = pytest.mark.integration + + +async def test_named_secrets_are_resolved_by_tools_adapter(install_http): + capture = install_http( + secrets, + payload={"secrets": {"TOKEN": "value", "EMPTY": None}}, + ) + + resolved = await secrets.resolve_named_secrets(["TOKEN", "EMPTY", "MISSING"]) + + assert resolved == {"TOKEN": "value"} + assert capture == { + "method": "POST", + "url": "https://api.x/api/secrets/resolve", + "json": {"names": ["TOKEN", "EMPTY", "MISSING"]}, + "headers": { + "Content-Type": "application/json", + "Authorization": "Access tok", + }, + } + + +async def test_named_secrets_without_api_base_return_empty(install_http): + install_http(secrets, api_base=None) + + assert await secrets.resolve_named_secrets(["TOKEN"]) == {} + + +async def test_named_secret_http_failure_returns_empty(install_http): + install_http(secrets, status=500) + + assert await secrets.resolve_named_secrets(["TOKEN"]) == {} diff --git a/services/oss/tests/pytest/unit/__init__.py b/services/oss/tests/pytest/unit/__init__.py new file mode 100644 index 0000000000..1a351fabba --- /dev/null +++ b/services/oss/tests/pytest/unit/__init__.py @@ -0,0 +1 @@ +# Unit tests package. diff --git a/services/oss/tests/pytest/unit/agent/conftest.py b/services/oss/tests/pytest/unit/agent/conftest.py index 7664bf9c90..af83b70d93 100644 --- a/services/oss/tests/pytest/unit/agent/conftest.py +++ b/services/oss/tests/pytest/unit/agent/conftest.py @@ -59,7 +59,15 @@ async def destroy(self) -> None: class FakeBackend(Backend): - """Echoes a fixed result, regardless of harness. Records lifecycle for assertions.""" + """Echoes a fixed result, regardless of harness. Records lifecycle for assertions. + + Crucially it also records the *harness-shaped* config each ``create_session`` receives + (the ``PiAgentConfig`` / ``ClaudeAgentConfig`` / ``AgentaAgentConfig`` the harness + produced). This is the backend boundary where per-harness translation surfaces, so a + handler test can assert the response body is identical across harnesses *and* that the + translated configs diverge as designed (Pi keeps built-ins and forces auto; Claude drops + built-ins and honors the policy; Agenta unions forced tools and carries skills). + """ def __init__( self, @@ -75,6 +83,8 @@ def __init__( self._result = result if result is not None else AgentResult(output="echo") self.setup_calls = 0 self.shutdown_calls = 0 + # Every harness-shaped config that reached the backend boundary, in call order. + self.created_configs: list = [] async def setup(self) -> None: self.setup_calls += 1 @@ -88,6 +98,7 @@ async def create_sandbox(self) -> _FakeSandbox: async def create_session( self, sandbox, config, *, harness, secrets=None, trace=None, session_id=None ) -> _FakeSession: + self.created_configs.append(config) return _FakeSession(self._result) diff --git a/services/oss/tests/pytest/unit/agent/test_invoke_handler.py b/services/oss/tests/pytest/unit/agent/test_invoke_handler.py index d32386cd6b..59643e5754 100644 --- a/services/oss/tests/pytest/unit/agent/test_invoke_handler.py +++ b/services/oss/tests/pytest/unit/agent/test_invoke_handler.py @@ -9,23 +9,39 @@ import pytest -from agenta.sdk.agents import AgentConfig, AgentResult +from agenta.sdk.agents import ( + AgentConfig, + AgentResult, + GatewayToolResolutionError, + ResolvedToolSet, +) +from agenta.sdk.agents.adapters.agenta_builtins import AGENTA_FORCED_SKILLS from oss.src.agent import app +from oss.src.agent.tools import ResolvedAgentResources -@pytest.fixture -def patched(monkeypatch, fake_backend): - backend = fake_backend(result=AgentResult(output="echo", usage={"total": 15})) +def _patch_handler(monkeypatch, backend, *, builtins=(), tool_callback=None): + """Stub the network-touching helpers and pin one ``backend`` for the run. + + ``builtins`` are the resolved built-in tool names ``resolve_tools`` hands back, so a turn + can carry a real tool list and the per-harness translation has something to diverge on. + Returns the ``recorded`` dict the usage hook writes into. + """ recorded = {} - async def _no_tools(_tools): - return [], [], None + async def _resources(*, tools, mcp_servers): + return ResolvedAgentResources( + tools=ResolvedToolSet( + builtin_names=list(builtins), + tool_callback=tool_callback, + ) + ) async def _no_secrets(): return {} - monkeypatch.setattr(app, "resolve_tools", _no_tools) + monkeypatch.setattr(app, "resolve_agent_resources", _resources) monkeypatch.setattr(app, "resolve_harness_secrets", _no_secrets) monkeypatch.setattr(app, "trace_context", lambda: None) monkeypatch.setattr( @@ -35,13 +51,20 @@ async def _no_secrets(): monkeypatch.setattr( app, "_default_agent_config", lambda: AgentConfig(instructions="x", model="m") ) + return recorded + + +@pytest.fixture +def patched(monkeypatch, fake_backend): + backend = fake_backend(result=AgentResult(output="echo", usage={"total": 15})) + recorded = _patch_handler(monkeypatch, backend) return backend, recorded -async def _invoke(harness="pi"): +async def _invoke(harness="pi", **agent): return await app._agent( messages=[{"role": "user", "content": "hi"}], - parameters={"agent": {"harness": harness}}, + parameters={"agent": {"harness": harness, **agent}}, ) @@ -62,10 +85,108 @@ async def test_invoke_runs_backend_lifecycle(patched): assert backend.shutdown_calls == 1 # cleanup() tears the backend down -async def test_invoke_body_is_identical_across_harnesses(patched): - # The same turn against the same (echoing) backend must produce a byte-identical body - # whether routed as pi, agenta, or claude. This is the design's cross-harness guarantee. - pi = await _invoke("pi") - agenta = await _invoke("agenta") - claude = await _invoke("claude") - assert pi == agenta == claude +async def test_invoke_cross_harness_same_body_divergent_configs( + monkeypatch, fake_backend +): + """The real cross-harness guarantee, exercised through the handler — not stubbed. + + The earlier version of this test pinned a single echoing backend and asserted + ``pi == agenta == claude`` on the echoed constant. That passes no matter how badly the + per-harness translation diverges, because the translation never ran. Here the same turn + runs as pi / agenta / claude against a backend that records the *harness-shaped* config it + receives, so we can assert two distinct things: + + 1. the response body is byte-identical across the three harnesses (the response-layer + guarantee), and + 2. the config that reached the backend boundary diverged exactly as designed — proving + the handler actually drove ``PiHarness`` / ``ClaudeHarness`` / ``AgentaHarness``, + each producing its own config. + + The turn carries a built-in tool (``web_search``) and a ``deny`` policy so the divergence + is observable: Claude drops Pi built-ins and honors the policy; Pi keeps them and forces + ``auto``; Agenta unions the forced tools and ships skills. + """ + backend = fake_backend(result=AgentResult(output="echo", usage={"total": 15})) + _patch_handler(monkeypatch, backend, builtins=["web_search"]) + + bodies = [ + await _invoke(harness, permission_policy="deny") + for harness in ("pi", "agenta", "claude") + ] + pi_body, agenta_body, claude_body = bodies + + # (1) Response-layer guarantee: identical body regardless of harness. + assert ( + pi_body + == agenta_body + == claude_body + == { + "role": "assistant", + "content": "echo", + } + ) + + # (2) The three harness-shaped configs that reached the backend boundary, in call order. + assert len(backend.created_configs) == 3 + pi_cfg, agenta_cfg, claude_cfg = backend.created_configs + pi_wire = pi_cfg.wire_tools() + agenta_wire = agenta_cfg.wire_tools() + claude_wire = claude_cfg.wire_tools() + + # Pi keeps its built-in tool natively and never gates tool use (policy forced to auto, + # the author's `deny` notwithstanding). + assert pi_wire["tools"] == ["web_search"] + assert pi_wire["permissionPolicy"] == "auto" + assert "skills" not in pi_wire + + # Claude has no Pi built-ins (the `web_search` name is dropped) and honors the policy. + assert claude_wire["tools"] == [] + assert claude_wire["permissionPolicy"] == "deny" + assert "skills" not in claude_wire + + # Agenta is Pi-with-an-opinion: it unions the forced tools onto the author's set, forces + # auto like Pi, and ships the forced skills. + assert agenta_wire["tools"] == ["web_search", "read", "bash"] + assert agenta_wire["permissionPolicy"] == "auto" + assert agenta_wire["skills"] == list(AGENTA_FORCED_SKILLS) + + # The configs genuinely differ at the boundary; the body's sameness is not a tautology. + assert pi_wire != claude_wire + assert agenta_wire != pi_wire + + +async def test_stream_tool_resolution_failure_is_raised_before_backend_setup( + monkeypatch, +): + async def _failure(*, tools, mcp_servers): + raise GatewayToolResolutionError("gateway unavailable") + + monkeypatch.setattr(app, "resolve_agent_resources", _failure) + monkeypatch.setattr( + app, + "_default_agent_config", + lambda: AgentConfig( + tools=[ + { + "type": "gateway", + "integration": "github", + "action": "GET_USER", + "connection": "c1", + } + ] + ), + ) + monkeypatch.setattr( + app, + "select_backend", + lambda _selection: (_ for _ in ()).throw( + AssertionError("backend must not be selected") + ), + ) + + with pytest.raises(GatewayToolResolutionError, match="gateway unavailable"): + await app._agent( + messages=[{"role": "user", "content": "hi"}], + parameters={"agent": {"harness": "pi"}}, + stream=True, + ) diff --git a/services/oss/tests/pytest/unit/agent/test_tool_refs.py b/services/oss/tests/pytest/unit/agent/test_tool_refs.py deleted file mode 100644 index 4f51399c19..0000000000 --- a/services/oss/tests/pytest/unit/agent/test_tool_refs.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Tool-reference normalization: the playground's loose tool entries -> resolver refs. - -``resolve_tools`` posts these refs to the backend resolver. Getting the discrimination wrong -(a Composio action read as a built-in, or vice versa) silently drops or misroutes a tool, so -these pure parsers are worth pinning. -""" - -from __future__ import annotations - -from oss.src.agent.tools import _normalize_tool_ref, _parse_gateway_slug - -_SLUG = "tools__composio__github__GET_THE_AUTHENTICATED_USER__github-tvn" - - -def test_parse_gateway_slug_underscore_form(): - assert _parse_gateway_slug(_SLUG) == { - "type": "composio", - "integration": "github", - "action": "GET_THE_AUTHENTICATED_USER", - "connection": "github-tvn", - } - - -def test_parse_gateway_slug_dot_form(): - assert _parse_gateway_slug("tools.composio.slack.SEND_MESSAGE.conn-1") == { - "type": "composio", - "integration": "slack", - "action": "SEND_MESSAGE", - "connection": "conn-1", - } - - -def test_parse_gateway_slug_rejects_non_matching(): - assert _parse_gateway_slug("tools__composio__too__few") is None # 4 segments - assert _parse_gateway_slug("tools__other__a__b__c") is None # not composio - assert _parse_gateway_slug(123) is None # not a string - assert _parse_gateway_slug(None) is None - - -def test_normalize_bare_string_is_builtin(): - assert _normalize_tool_ref("read") == {"type": "builtin", "name": "read"} - - -def test_normalize_typed_dict_passes_through(): - composio = { - "type": "composio", - "integration": "x", - "action": "y", - "connection": "z", - } - assert _normalize_tool_ref(composio) is composio - builtin = {"type": "builtin", "name": "read"} - assert _normalize_tool_ref(builtin) is builtin - - -def test_normalize_picker_gateway_entry_becomes_composio(): - ref = {"function": {"name": _SLUG}} - assert _normalize_tool_ref(ref) == { - "type": "composio", - "integration": "github", - "action": "GET_THE_AUTHENTICATED_USER", - "connection": "github-tvn", - } - - -def test_normalize_untyped_name_is_builtin_unless_it_is_a_slug(): - assert _normalize_tool_ref({"name": "grep"}) == {"type": "builtin", "name": "grep"} - # A name that is itself a gateway slug resolves to composio. - assert _normalize_tool_ref({"name": _SLUG})["type"] == "composio" - - -def test_normalize_unsupported_entries_are_dropped(): - assert _normalize_tool_ref({"foo": "bar"}) is None # no type, no usable name - assert _normalize_tool_ref(123) is None - assert _normalize_tool_ref(None) is None diff --git a/services/oss/tests/pytest/unit/agent/tools/__init__.py b/services/oss/tests/pytest/unit/agent/tools/__init__.py new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/services/oss/tests/pytest/unit/agent/tools/__init__.py @@ -0,0 +1 @@ + diff --git a/services/oss/tests/pytest/unit/agent/tools/test_gateway_mapping.py b/services/oss/tests/pytest/unit/agent/tools/test_gateway_mapping.py new file mode 100644 index 0000000000..3ca0690980 --- /dev/null +++ b/services/oss/tests/pytest/unit/agent/tools/test_gateway_mapping.py @@ -0,0 +1,19 @@ +from agenta.sdk.agents import GatewayToolConfig + +from oss.src.agent.tools import _to_gateway_reference + + +def test_gateway_reference_uses_canonical_sdk_shape(): + assert _to_gateway_reference( + GatewayToolConfig( + integration="github", + action="GET_USER", + connection="c1", + ) + ) == { + "type": "gateway", + "provider": "composio", + "integration": "github", + "action": "GET_USER", + "connection": "c1", + } diff --git a/services/oss/tests/pytest/unit/agent/tools/test_resolution.py b/services/oss/tests/pytest/unit/agent/tools/test_resolution.py new file mode 100644 index 0000000000..92ef5368d7 --- /dev/null +++ b/services/oss/tests/pytest/unit/agent/tools/test_resolution.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +import pytest + +from agenta.sdk.agents import ( + CodeToolSpec, + MissingMCPSecretError, + MissingToolSecretError, +) + +from oss.src.agent.tools import resolve_mcp_servers, resolve_tools +from oss.src.agent.tools import resolver as resolver_module +from oss.src.agent.tools import secrets as secrets_module + + +async def test_resolve_tools_builds_local_specs_with_scoped_secrets(monkeypatch): + async def _named_secrets(names): + assert names == ["TOKEN"] + return {"TOKEN": "secret"} + + monkeypatch.setattr(secrets_module, "resolve_named_secrets", _named_secrets) + resolved = await resolve_tools( + [ + "read", + { + "type": "code", + "name": "calc", + "script": "...", + "secrets": ["TOKEN"], + }, + { + "type": "client", + "name": "pick", + }, + ] + ) + assert resolved.builtin_names == ["read"] + code = next(spec for spec in resolved.tool_specs if spec.name == "calc") + assert isinstance(code, CodeToolSpec) + assert code.env == {"TOKEN": "secret"} + assert ( + next(spec for spec in resolved.tool_specs if spec.name == "pick").kind + == "client" + ) + + +async def test_missing_tool_secret_is_not_silently_omitted(monkeypatch): + async def _named_secrets(_names): + return {} + + monkeypatch.setattr(secrets_module, "resolve_named_secrets", _named_secrets) + with pytest.raises(MissingToolSecretError): + await resolve_tools( + [ + { + "type": "code", + "name": "calc", + "script": "...", + "secrets": ["TOKEN"], + } + ] + ) + + +async def test_mcp_is_disabled_at_service_composition_by_default(monkeypatch): + monkeypatch.delenv("AGENTA_AGENT_ENABLE_MCP", raising=False) + assert await resolve_mcp_servers([{"name": "github", "command": "npx"}]) == [] + + +async def test_missing_mcp_secret_is_explicit_when_enabled(monkeypatch): + monkeypatch.setattr(resolver_module, "_mcp_enabled", lambda: True) + + async def _named_secrets(_names): + return {} + + monkeypatch.setattr(secrets_module, "resolve_named_secrets", _named_secrets) + with pytest.raises(MissingMCPSecretError): + await resolve_mcp_servers( + [ + { + "name": "github", + "command": "npx", + "secrets": {"GITHUB_TOKEN": "missing"}, + } + ] + ) diff --git a/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/AgentConfigControl.tsx b/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/AgentConfigControl.tsx index e2fd6a199d..eeb3de30cb 100644 --- a/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/AgentConfigControl.tsx +++ b/web/packages/agenta-entity-ui/src/DrillInView/SchemaControls/AgentConfigControl.tsx @@ -2,20 +2,25 @@ * AgentConfigControl * * One composite control for the whole agent config, dispatched from - * `x-ag-type: "agent_config"` (see SchemaPropertyRenderer). It reuses the existing - * controls rather than inventing new ones: the model selector (GroupedChoiceControl), the - * tool picker (ToolSelectorPopover + ToolItemControl), enum selects (harness, sandbox, - * permission policy), and a textarea (instructions). The backend advertises the inline - * schema and reads this value (services/oss/src/agent/schemas.py + inputs.py). + * `x-ag-type: "agent_config"` / `x-ag-type-ref: "agent_config"` (see SchemaPropertyRenderer). + * It reuses the existing controls rather than inventing new ones: the model selector + * (GroupedChoiceControl), the tool picker (ToolSelectorPopover + ToolItemControl), the MCP + * server editor (McpServerItemControl), enum selects (harness, sandbox, permission policy), + * and a textarea (agents_md). The field shape is the `agent_config` catalog type generated + * from the SDK model (AgentConfigSchema in agenta.sdk.utils.types); the agent service ships a + * thin `x-ag-type-ref` the playground resolves and reads back (services/oss/src/agent). */ import {useCallback, useMemo} from "react" import type {SchemaProperty} from "@agenta/entities/shared" import {useDrillInUI} from "@agenta/ui/drill-in" import {cn} from "@agenta/ui/styles" +import {Plus} from "@phosphor-icons/react" +import {Button, Typography} from "antd" import {EnumSelectControl} from "./EnumSelectControl" import {GroupedChoiceControl} from "./GroupedChoiceControl" +import {McpServerItemControl} from "./McpServerItemControl" import {TextInputControl} from "./TextInputControl" import {ToolItemControl} from "./ToolItemControl" import {ToolSelectorPopover, type ToolSelectionMeta} from "./ToolSelectorPopover" @@ -110,14 +115,49 @@ export function AgentConfigControl({ [tools], ) + // MCP servers are a sibling of tools: a flat array on the agent config. Each entry is the + // open McpServer shape (name + stdio command/args/env or remote url, secret names), edited + // as JSON the backend resolver parses identically to `tools`. + const mcpServers = useMemo( + () => (Array.isArray(config.mcp_servers) ? (config.mcp_servers as unknown[]) : []), + [config.mcp_servers], + ) + const setMcpServers = useCallback( + (next: unknown[]) => setField("mcp_servers", next), + [setField], + ) + const handleAddMcpServer = useCallback( + () => setMcpServers([...mcpServers, {name: "", transport: "stdio", command: "", args: []}]), + [mcpServers, setMcpServers], + ) + const handleMcpServerChange = useCallback( + (index: number, next: Record) => { + const updated = [...mcpServers] + updated[index] = next + setMcpServers(updated) + }, + [mcpServers, setMcpServers], + ) + const handleMcpServerDelete = useCallback( + (index: number) => setMcpServers(mcpServers.filter((_, i) => i !== index)), + [mcpServers, setMcpServers], + ) + + // ``agents_md`` is the catalog-schema field; ``instructions`` is read as a fallback so an + // already-stored agent config (the legacy key) still populates the editor. + const agentsMd = + (config.agents_md as string | null | undefined) ?? + (config.instructions as string | null | undefined) ?? + null + return (
setField("instructions", v)} - description={props.instructions?.description as string | undefined} + value={agentsMd} + onChange={(v) => setField("agents_md", v)} + description={props.agents_md?.description as string | undefined} withTooltip={withTooltip} disabled={disabled} multiline @@ -176,6 +216,49 @@ export function AgentConfigControl({ )}
+ {/* MCP servers */} +
+ MCP servers + {mcpServers.length > 0 && ( +
+ {mcpServers.map((server, index) => { + const control = ( + handleMcpServerChange(index, v)} + onDelete={ + disabled ? undefined : () => handleMcpServerDelete(index) + } + disabled={disabled} + /> + ) + return EditorProvider ? ( + + {control} + + ) : ( + control + ) + })} +
+ )} + {!disabled && ( +
+ +
+ )} +
+ ) => void + /** Called when the server should be removed */ + onDelete?: () => void + /** Whether the control is read-only */ + disabled?: boolean + /** Additional CSS classes */ + className?: string +} + +function toServerObj(value: unknown): Record { + try { + if (typeof value === "string") + return value ? (JSON.parse(value) as Record) : {} + if (value && typeof value === "object" && !Array.isArray(value)) + return value as Record + } catch { + // fall through to empty object + } + return {} +} + +export const McpServerItemControl = memo(function McpServerItemControl({ + value, + onChange, + onDelete, + disabled = false, + className, +}: McpServerItemControlProps) { + const {SharedEditor} = useDrillInUI() + const serverObj = toServerObj(value) + const name = + typeof serverObj.name === "string" && serverObj.name ? serverObj.name : "MCP server" + + const [editorText, setEditorText] = useState(() => safeStringify(serverObj ?? {})) + + // Reset the editor text when the value changes from outside (add/remove/reorder). + const lastExternalRef = useRef(safeStringify(serverObj ?? {})) + useEffect(() => { + const next = safeStringify(toServerObj(value) ?? {}) + if (next !== lastExternalRef.current) { + lastExternalRef.current = next + setEditorText(next) + } + }, [value]) + + const handleEditorChange = useCallback( + (text: string) => { + if (disabled) return + setEditorText(text) + try { + const parsed = text ? (JSON.parse(text) as Record) : {} + lastExternalRef.current = safeStringify(parsed) + onChange?.(parsed) + } catch { + // Keep the invalid text in the editor; don't propagate until it parses. + } + }, + [disabled, onChange], + ) + + const header = ( +
+ + {name} + + {!disabled && onDelete && ( + +
+ ) + + if (!SharedEditor) { + return ( +
+ {header} +