From 7ff3f42e49fad6a14101640134a3201b0510819b Mon Sep 17 00:00:00 2001 From: Mahmoud Mabrouk Date: Sat, 20 Jun 2026 21:26:21 +0200 Subject: [PATCH 1/2] test(agent): vitest suite + CI for the agent runner; fix relay error bug Convert the hand-run tsx/node:assert scripts under services/agent/test/ into a vitest suite in tests/unit/ (describe/it, node:assert kept). Add vitest + typescript devDeps, scripts (test/typecheck/coverage), a node-env vitest.config (junit + v8 coverage), and broaden the tsconfig include so typecheck covers tests + config. Add a run-services-node-unit-tests job to 12-check-unit-tests.yml, and a services/agent/AGENTS.md. Testability seam: server.ts exports createAgentServer(run); cli.ts exports runCli(raw, stream, io) with an injectable engine + output sink, so the HTTP and CLI paths are tested with a fake engine (server.test.ts, cli.test.ts). src/entry.ts isEntrypoint keeps the entrypoints inert on import. Add src/version.ts + a richer GET /health; version 0.0.0 -> 0.1.0. Fix a real bug in tools/dispatch.ts: relayToolCall referenced an undefined callRef in its error/timeout messages (would throw ReferenceError and mask the real failure); use toolName, plus a focused relay regression test. Result: 10 test files / 47 tests pass, tsc --noEmit clean. Two tests are deferred because their deps live on sibling branches: skills.test.ts (needs engines/skills.ts from feat/agenta-on-rivet) and wire-contract.test.ts (needs the shared Python golden fixtures). They land when those reach this branch. Claude-Session: https://claude.ai/code/session_01GLA8RywSLTGiJvBrDnqZa2 --- .github/workflows/12-check-unit-tests.yml | 71 ++ .gitignore | 2 + .../typescript-structure/README.md | 35 + .../typescript-structure/context.md | 49 + .../typescript-structure/plan.md | 173 ++++ .../typescript-structure/research.md | 193 ++++ .../typescript-structure/status.md | 217 +++++ services/agent/AGENTS.md | 62 ++ services/agent/CLAUDE.md | 1 + services/agent/package.json | 16 +- services/agent/pnpm-lock.yaml | 913 +++++++++++++++++- services/agent/src/cli.ts | 113 ++- services/agent/src/entry.ts | 17 + services/agent/src/server.ts | 132 ++- services/agent/src/tools/dispatch.ts | 4 +- services/agent/src/version.ts | 35 + services/agent/test/code-tool.test.ts | 92 -- services/agent/test/continuation.test.ts | 66 -- services/agent/test/extension-tools.test.ts | 109 --- services/agent/test/mcp-servers.test.ts | 58 -- services/agent/test/responder.test.ts | 84 -- services/agent/test/stream-events.test.ts | 148 --- services/agent/test/tool-bridge.test.ts | 169 ---- services/agent/test/tool-dispatch.test.ts | 85 -- services/agent/tests/unit/cli.test.ts | 66 ++ services/agent/tests/unit/code-tool.test.ts | 89 ++ .../agent/tests/unit/continuation.test.ts | 72 ++ .../agent/tests/unit/extension-tools.test.ts | 108 +++ services/agent/tests/unit/mcp-servers.test.ts | 58 ++ services/agent/tests/unit/responder.test.ts | 92 ++ services/agent/tests/unit/server.test.ts | 109 +++ .../agent/tests/unit/stream-events.test.ts | 146 +++ services/agent/tests/unit/tool-bridge.test.ts | 157 +++ .../agent/tests/unit/tool-dispatch.test.ts | 123 +++ services/agent/tsconfig.json | 2 +- services/agent/vitest.config.ts | 20 + 36 files changed, 2959 insertions(+), 927 deletions(-) create mode 100644 docs/design/agent-workflows/typescript-structure/README.md create mode 100644 docs/design/agent-workflows/typescript-structure/context.md create mode 100644 docs/design/agent-workflows/typescript-structure/plan.md create mode 100644 docs/design/agent-workflows/typescript-structure/research.md create mode 100644 docs/design/agent-workflows/typescript-structure/status.md create mode 100644 services/agent/AGENTS.md create mode 120000 services/agent/CLAUDE.md create mode 100644 services/agent/src/entry.ts create mode 100644 services/agent/src/version.ts delete mode 100644 services/agent/test/code-tool.test.ts delete mode 100644 services/agent/test/continuation.test.ts delete mode 100644 services/agent/test/extension-tools.test.ts delete mode 100644 services/agent/test/mcp-servers.test.ts delete mode 100644 services/agent/test/responder.test.ts delete mode 100644 services/agent/test/stream-events.test.ts delete mode 100644 services/agent/test/tool-bridge.test.ts delete mode 100644 services/agent/test/tool-dispatch.test.ts create mode 100644 services/agent/tests/unit/cli.test.ts create mode 100644 services/agent/tests/unit/code-tool.test.ts create mode 100644 services/agent/tests/unit/continuation.test.ts create mode 100644 services/agent/tests/unit/extension-tools.test.ts create mode 100644 services/agent/tests/unit/mcp-servers.test.ts create mode 100644 services/agent/tests/unit/responder.test.ts create mode 100644 services/agent/tests/unit/server.test.ts create mode 100644 services/agent/tests/unit/stream-events.test.ts create mode 100644 services/agent/tests/unit/tool-bridge.test.ts create mode 100644 services/agent/tests/unit/tool-dispatch.test.ts create mode 100644 services/agent/vitest.config.ts diff --git a/.github/workflows/12-check-unit-tests.yml b/.github/workflows/12-check-unit-tests.yml index 157d81e470..c8bc699e65 100644 --- a/.github/workflows/12-check-unit-tests.yml +++ b/.github/workflows/12-check-unit-tests.yml @@ -301,3 +301,74 @@ jobs: files: services/oss/tests/results/junit.xml check_name: Application services Unit Test Results comment_mode: off + + run-services-node-unit-tests: + # The agent runner (services/agent) is a standalone Node/pnpm package, not part of the + # Python services suite above. It runs its own vitest unit tests plus a tsc typecheck gate. + # No "has_tests" guard on purpose: this suite is established, so a missing/empty suite must + # FAIL the job (vitest exits non-zero on no test files), not silently skip it. + if: | + github.event_name == 'workflow_dispatch' || + !github.event.pull_request.draft + runs-on: ubuntu-latest + permissions: + checks: write + pull-requests: write + contents: read + env: + AGENTA_LICENSE: oss + steps: + - uses: actions/checkout@v6 + + - name: Skip when package selection excludes services + if: github.event_name == 'workflow_dispatch' && !contains(fromJSON('["all","services-only"]'), inputs.packages) + run: exit 0 + + - name: Set up Node.js + if: github.event_name != 'workflow_dispatch' || contains(fromJSON('["all","services-only"]'), inputs.packages) + uses: actions/setup-node@v4 + with: + node-version: '24' + + - name: Enable Corepack + if: github.event_name != 'workflow_dispatch' || contains(fromJSON('["all","services-only"]'), inputs.packages) + run: corepack enable + + - name: Cache pnpm store + if: github.event_name != 'workflow_dispatch' || contains(fromJSON('["all","services-only"]'), inputs.packages) + uses: actions/cache@v4 + with: + path: ~/.pnpm-store + key: ${{ runner.os }}-services-agent-pnpm-${{ hashFiles('services/agent/pnpm-lock.yaml') }} + restore-keys: | + ${{ runner.os }}-services-agent-pnpm- + + - name: Set up pnpm store + if: github.event_name != 'workflow_dispatch' || contains(fromJSON('["all","services-only"]'), inputs.packages) + working-directory: services/agent + run: pnpm config set store-dir ~/.pnpm-store + + - name: Install dependencies + if: github.event_name != 'workflow_dispatch' || contains(fromJSON('["all","services-only"]'), inputs.packages) + working-directory: services/agent + run: pnpm install --frozen-lockfile + + - name: Typecheck (tsc --noEmit, src + tests + config) + if: github.event_name != 'workflow_dispatch' || contains(fromJSON('["all","services-only"]'), inputs.packages) + working-directory: services/agent + run: pnpm run typecheck + + # The code-tool unit test spawns python3 and node end-to-end; both are preinstalled on + # ubuntu runners (node is also set up above), so no setup-python step is needed. + - name: Run agent runner unit tests + if: github.event_name != 'workflow_dispatch' || contains(fromJSON('["all","services-only"]'), inputs.packages) + working-directory: services/agent + run: pnpm run test:unit + + - name: Publish agent runner unit test results + if: always() && (github.event_name != 'workflow_dispatch' || contains(fromJSON('["all","services-only"]'), inputs.packages)) + uses: EnricoMi/publish-unit-test-result-action@v2 + with: + files: services/agent/test-results/junit.xml + check_name: Agent Runner Unit Test Results + comment_mode: off diff --git a/.gitignore b/.gitignore index 6c91758e28..e363be21a7 100644 --- a/.gitignore +++ b/.gitignore @@ -65,6 +65,8 @@ sdks/python/oss/tests/results/ sdks/python/ee/tests/results/ services/oss/tests/results/ services/ee/tests/results/ +services/agent/test-results/ +services/agent/coverage/ .* !**/.gitkeep !.github/ diff --git a/docs/design/agent-workflows/typescript-structure/README.md b/docs/design/agent-workflows/typescript-structure/README.md new file mode 100644 index 0000000000..a86e689022 --- /dev/null +++ b/docs/design/agent-workflows/typescript-structure/README.md @@ -0,0 +1,35 @@ +# TypeScript structure for the agent runner + +Planning workspace for making the new TypeScript code in the agent-workflows project +usable, maintainable, and testable, with tests that run easily and run in CI. + +The new TypeScript lives mostly in one place: `services/agent/` (the Node "agent runner" +sidecar). This folder researches its current shape and proposes how to structure, test, +and gate it the way the rest of the monorepo already handles Python and frontend code. + +## Files + +- [context.md](context.md) — why this work exists, goals, non-goals, who it is for. +- [research.md](research.md) — what is actually in the repo today: where the TS lives, how + it builds, ships, and is (barely) tested; the conventions the repo already standardizes + for TS; a Python-to-TypeScript mental model; the gaps. +- [plan.md](plan.md) — the phased plan to close the gaps, with concrete file changes, + scripts, and CI wiring. +- [status.md](status.md) — source of truth for progress and open decisions. Read this + first to see where things stand. + +## TL;DR + +The runner code is well-organized (clear `engines/`, `tools/`, `tracing/` seams, a single +`protocol.ts` wire contract). The weak spots are tooling, not architecture: + +1. Eight test files exist but there is **no test runner and no `pnpm test`**. Each test is + a hand-run `tsx` script. +2. Those tests run in **no CI workflow**. The Node side is invisible to the unit-test gate. +3. There is **no typecheck gate** even though the code is already `strict: true`. +4. The TS side has **no test asserting the cross-language wire contract**, which is only + pinned from Python today. + +The plan adopts **vitest** (the runner `web/packages/*` already use), wires a Node job into +`12-check-unit-tests.yml`, adds a `tsc --noEmit` gate, and adds a golden-fixture round-trip +test so `protocol.ts` cannot drift from the Python wire silently. diff --git a/docs/design/agent-workflows/typescript-structure/context.md b/docs/design/agent-workflows/typescript-structure/context.md new file mode 100644 index 0000000000..e8e6edce4d --- /dev/null +++ b/docs/design/agent-workflows/typescript-structure/context.md @@ -0,0 +1,49 @@ +# Context + +## Why this work exists + +The agent-workflows project introduced the first substantial server-side TypeScript in a +repo that was Python on the backend and TypeScript only on the frontend. The new code is +the agent runner sidecar at `services/agent/`. It drives the agent harnesses (Pi, Claude +Code, rivet's `sandbox-agent`) because those are Node libraries with no Python SDK. The +Python agent service calls into it over one JSON contract. + +This code grew fast during the build-out. It works and it is reasonably well-factored, but +it sits outside the conventions the rest of the monorepo follows. The owner is a Python +developer and wants this TypeScript to feel as routine to maintain and test as the Python +does: a single command to run the tests, the tests running in CI, a typecheck gate, and a +clear place for new code and new tests to go. + +## Goals + +1. **Testable, easily.** One command (`pnpm test`) runs every unit test for the runner. + Watch mode and coverage work. Writing a new test is obvious and low-ceremony. +2. **Tested in CI.** The runner's tests run on every PR that touches it, with results + published the same way the Python and web suites are. +3. **Typechecked.** The `strict` TypeScript already configured produces a CI signal, so a + type error fails the build instead of reaching the dockerized sidecar at runtime. +4. **Contract-safe.** The wire contract between the Python service and the Node runner is + guarded from both sides, not just from Python. +5. **Maintainable and discoverable.** A new contributor (or agent) can find where runner + code and runner tests belong, following the same instruction-layering the repo uses for + `web/` and `api/`. + +## Non-goals + +- Rewriting or re-architecting the runner. The `engines` / `tools` / `tracing` split and + the `protocol.ts` contract stay. This is about tooling and structure, not a redesign. +- Folding `services/agent` into the `web/` pnpm workspace. It is a deployable sidecar with + its own Docker build and its own lockfile; it should stay a standalone package (see + research.md for the trade-off). +- Changing the frontend TypeScript (`web/oss/src/components/AgentChatSlice/`). That code + already lives in the web app under established conventions (vitest, package practices). + It is out of scope here. +- End-to-end / live-LLM acceptance tests for the runner. Those depend on real harness + credentials and are tracked separately in the agent-workflows test work. This plan is + about the fast unit/contract layer that can run on every PR with no secrets. + +## Who this is for + +The maintainer (Python-first) and any future contributor or agent touching +`services/agent`. research.md includes a Python-to-TypeScript mental model so the tooling +choices map onto things already familiar from the SDK and API side (uv, ruff, pytest). diff --git a/docs/design/agent-workflows/typescript-structure/plan.md b/docs/design/agent-workflows/typescript-structure/plan.md new file mode 100644 index 0000000000..29ea51f7fe --- /dev/null +++ b/docs/design/agent-workflows/typescript-structure/plan.md @@ -0,0 +1,173 @@ +# Plan + +Four phases, ordered so value lands early and nothing later depends on a refactor. Phases 1 +and 2 are the core ask (easy-to-run tests, tests in CI). Phase 3 protects the contract. +Phase 4 is structure and maintainability, adopted progressively. + +Effort estimates assume one developer familiar with the runner. They are deliberate, not +padded. + +## Phase 1 — Make the tests run with one command (~half day) + +Goal: `pnpm test` in `services/agent` runs every unit test, with watch and coverage. + +0. **Fix the latent bug the typecheck will expose.** `src/tools/dispatch.ts` references an + undefined `callRef` at lines 88 and 92 inside `relayToolCall`. Use the in-scope value + (`toolName`, or thread the spec's `callRef` in) so the error path stops throwing + `ReferenceError`. Found by Codex; this is the proof the typecheck gate has teeth. +1. Add dev deps to `services/agent/package.json`: `vitest`, `@vitest/coverage-v8`, **and + `typescript`** (currently absent: `node_modules/.bin/tsc` does not exist, so `typecheck` + cannot run without it). Match the versions `web/packages/*` pin (`vitest` `^4.1.x`); align + `@types/node` with Node 24. +2. Add `services/agent/vitest.config.ts`, modeled on `agenta-shared/vitest.config.ts`: + `include: ["tests/unit/**/*.test.ts"]`, `environment: "node"`, + `reporters: ["default", "junit"]` to `test-results/junit.xml`, v8 coverage over `src/`. +3. Add scripts to `package.json`: + + ```jsonc + "test": "pnpm run test:unit", + "test:unit": "vitest run", + "test:watch": "vitest", + "test:coverage": "vitest run --coverage", + "typecheck": "tsc --noEmit" + ``` + +4. Move `test/*.test.ts` to `tests/unit/*.test.ts` and wrap the bare `{ ... }` blocks in + `describe` / `it` so reporting and junit are per-case. **Do not bother rewriting every + `assert` to `expect`** (Codex's point): vitest runs `node:assert` fine, so the conversion + is just adding `describe`/`it` wrappers, not touching assertions. Keep filenames. The + dynamic-import-after-env pattern (e.g. `skills.test.ts`) stays valid; add + `vi.resetModules()` only where a file needs a clean module per case. +5. Update the `Run:` header comment in each test to `pnpm test` (or + `pnpm exec vitest run tests/unit/.test.ts` for a single file). + +Done when: `pnpm test` is green locally and prints a single summary across all files. + +## Phase 2 — Run them in CI (~half day) + +Goal: the runner's tests gate every PR that touches `services/agent`. + +1. Add a `run-services-node-unit-tests` job to `.github/workflows/12-check-unit-tests.yml`, + mirroring the existing `run-web-unit-tests` setup but scoped to the package: + - `actions/setup-node@v4` with `node-version: '24'`, `corepack enable`. + - Cache the pnpm store keyed on `services/agent/pnpm-lock.yaml`. + - `working-directory: services/agent`, `pnpm install --frozen-lockfile`, then + `pnpm run typecheck` and `pnpm run test:unit`. + - **Ensure `python3` is on the runner.** `test/code-tool.test.ts` spawns `python3` (and + `node`) through `runCodeTool`. ubuntu-latest ships python3, but make it explicit, or + split the subprocess code-tool test into an integration test the unit job can skip. + - Publish `services/agent/test-results/junit.xml` with + `EnricoMi/publish-unit-test-result-action@v2`, `check_name: Agent Runner Unit Tests`. +2. Path-filter the job. The workflow already triggers on `services/**`; gate the new job's + steps so it only does work when `services/agent/**` changed (the same `if:` pattern the + other jobs use for their package selection), to avoid installing Node on unrelated PRs. +3. Decide whether `typecheck` failing fails the job. Recommendation: yes. The code is + already `strict`; a type error should not merge. + +Done when: a PR touching `services/agent` shows an "Agent Runner Unit Tests" check, and a +deliberately broken type or assertion turns it red. + +## Phase 3 — Guard the wire contract from the TS side (~half day) + +Goal: a contract change must update Python and TypeScript together, or fail on both. + +**Codex correction (important):** `protocol.ts` is types only, erased at runtime. "Loading +JSON and round-tripping it through an interface" validates nothing at runtime. The contract +test needs real runtime checks, in two layers: + +1. Add `tests/utils/golden.ts` that loads the shared fixtures from + `sdks/python/oss/tests/pytest/unit/agents/golden/` (relative path from the runner, read + at test time). No copying; one source of truth. +2. **Runtime validation, not type assertion.** Either (a) introduce a zod (or equivalent) + schema that mirrors `protocol.ts` and `parse()` each golden fixture in + `tests/unit/wire-contract.test.ts`, or (b) write explicit structural assertions (required + keys present, types correct, the `ok` discriminant). Option (a) doubles as a real runtime + guard the server can use on inbound requests; option (b) is lighter but only a test. +3. **Type-level check, separately.** Use vitest's `expectTypeOf` (or a `tsd`-style check) so + a fixture that drifts from `AgentRunRequest` fails `typecheck`, independent of the runtime + assertions. +4. Exercise the pure helpers in `protocol.ts` (`messageText`, `resolvePromptText`, + `resolveRunSessionId`) against fixture-derived inputs. +5. Note in `protocol.ts` and Python `test_wire_contract.py` that the contract is now pinned + from both sides, so future editors look both ways. + +Done when: editing a field name in `protocol.ts` without updating the fixtures (or vice +versa) fails this test, at runtime and at typecheck. + +## Phase 4 — Structure and maintainability (progressive, no big bang) + +Adopt as the runner is touched, not in one sweep. + +1. **Add `services/agent/AGENTS.md`** (with a `CLAUDE.md` symlink, matching `web/`, `api/`). + Keep it short: the package is a standalone pnpm project; how to run/serve/test/typecheck; + where runner code goes (`src/{engines,tools,tracing}`) and where tests go + (`tests/unit`, fixtures in `tests/utils`); the wire contract is mirrored in Python + `wire.py` and pinned by golden fixtures, so change both sides; vitest is the runner. + Add a thin `.claude/rules` / `.cursor/rules` pointer if the repo expects one. +2. **Local typecheck gate (optional).** The root `.husky/pre-commit` already runs prettier + and gitleaks repo-wide. Optionally add `pnpm --dir services/agent typecheck` for changed + TS, or leave the gate to CI to keep commits fast. Recommendation: CI is the gate; skip + the local hook unless commits regularly land type errors. +3. **Linting (optional, phase-2 nice-to-have).** There is no eslint outside `web/`. + `prettier` (global hook) covers formatting. A small `typescript-eslint` flat config for + `services/agent` would add real value for async runner code (`no-floating-promises`, + `no-misused-promises`). Treat as optional; `tsc --strict` + prettier is an acceptable + floor. +4. **Extract a testability seam (Codex).** `server.ts` and `cli.ts` wire transport to the + engines inline, so HTTP/CLI behavior can only be tested with a live harness. Export + `createServer(runAgent)` and `runCli(runAgent)` that take the engine as an argument. Then + unit tests inject a fake engine returning a deterministic `AgentRunResult` and cover + `/health`, invalid-JSON handling, `POST /run`, NDJSON record ordering, and CLI exit codes, + with no Pi/Claude/rivet. This is the highest-value structural change for testability. +5. **Decompose the two large files opportunistically.** When next editing `engines/rivet.ts` + or `tracing/otel.ts`, pull a cohesive seam into its own module and unit-test it, the way + `responder.ts` was extracted from `rivet.ts`. Not a scheduled refactor. + +## Phase 5 — Make it a versioned, supportable service (Codex's main gap) + +The review's core point: the plan above makes the runner testable but does not make it a +first-class deployable. These items make the SDK and the sidecar safe to release on their +own cadences. Scope and sequence with the platform/release owner; some are bigger than a +half-day. + +1. **Protocol/version negotiation.** Add a `protocolVersion` (major) to the wire and have + `GET /health` (or a new `/capabilities`) return `runnerVersion`, `protocolVersion`, + supported engines, and harnesses. The Python adapter probes once and refuses an + incompatible major before the first run. Today `/health` returns only `{status:"ok"}` and + `package.json` is `0.0.0`. +2. **Release ownership.** Decide whether the sidecar version tracks the Agenta release or is + versioned independently, and stop shipping `0.0.0`. The SDK should pin a compatible runner + *protocol* range, not a package-version equality. +3. **Sidecar image publishing.** No CI publishes the runner image today (only api/web/services + images are built, e.g. in `42-railway-build.yml`). Add a build/publish job so the HTTP + sidecar (the production boundary) is actually distributable. +4. **Local code-tool execution policy.** `runCodeTool` scopes secret env, but a `code` tool + still runs an arbitrary `python3`/`node` process in the sidecar. State the sandbox, + resource, and network policy (it is already sandboxed in Daytona; the local/in-sidecar + path needs an explicit stance), so this is a deliberate posture, not an oversight. +5. **Config hygiene.** `services/oss/src/agent/app.py` reads `AGENTA_AGENT_*` via raw + `os.getenv`. The repo convention (root `AGENTS.md`) is to add config to + `api/oss/src/utils/env.py` and consume the shared `env` object. Align it. +6. **Fix the stale `local.py` docstring.** `sdks/python/.../adapters/local.py` says the Pi + runner is "shipped inside the wheel," which is not true today and is the likely source of + the wheel confusion. Either implement that path deliberately (see the packaging options in + the answer to question 1) or correct the docstring to match reality. + +## Sequencing and ownership + +- Phases 1 to 3 are independent of any runtime change and can land as one small PR or three + tiny ones. They add no production code paths, only tooling and tests. Start here. +- Phase 4 item 1 (`AGENTS.md`) is worth doing alongside Phase 1 so the new test location is + documented the moment it exists. Item 4 (the `createServer`/`runCli` seam) unblocks the + HTTP/CLI tests and is worth pulling forward. +- Phase 5 is a separate track, owned with whoever owns releases and deployment. It does not + block Phases 1 to 4, but it is what turns "tested code" into "supportable service." +- None of this blocks ongoing runner feature work; it runs in parallel. + +## What success looks like + +- `cd services/agent && pnpm test` runs the whole suite in one go, green, with a summary. +- A PR touching the runner gets a red/green unit-test + typecheck check automatically. +- `protocol.ts` cannot drift from the Python wire without a test failing. +- A new contributor reads `services/agent/AGENTS.md` and knows where code and tests go and + how to run them, without reading the whole tree. diff --git a/docs/design/agent-workflows/typescript-structure/research.md b/docs/design/agent-workflows/typescript-structure/research.md new file mode 100644 index 0000000000..98f1ce228c --- /dev/null +++ b/docs/design/agent-workflows/typescript-structure/research.md @@ -0,0 +1,193 @@ +# Research + +Findings from reading the repo on 2026-06-20. Everything below is observed in the tree, not +assumed. + +## 1. Where the new TypeScript actually lives + +Server-side TypeScript that did not exist before agent-workflows is concentrated in one +package: + +``` +services/agent/ standalone pnpm package "agenta-agent-pi-wrapper" + package.json ESM, type:module, pnpm 10.30, Node 24 + tsconfig.json strict, noEmit, moduleResolution Bundler + pnpm-lock.yaml its OWN lockfile (not in the web workspace) + src/ + cli.ts (88) entrypoint: stdin JSON in, stdout JSON out + server.ts (155) entrypoint: HTTP sidecar on :8765 (GET /health, POST /run) + protocol.ts (295) the /run wire contract: request, result, events, caps + responder.ts (77) permission/HITL policy seam (extracted from rivet.ts) + engines/ + pi.ts (403) drive the Pi SDK in-process + rivet.ts (1085) drive any harness over ACP via sandbox-agent + skills.ts (50) resolve forced-skill names to dirs on disk + tools/ (7 files) callback, code, dispatch, mcp-bridge, mcp-server, relay, ... + tracing/ + otel.ts (1026) turn a run into OTel spans nested under /invoke + extensions/ + agenta.ts (114) Pi extension, esbuild-bundled into dist/ for Pi to load + test/ (8 files) hand-run tsx scripts (see section 3) + skills/ SKILL.md bundled forced-skills for the Agenta harness + config/ fallback hello-world agent + docker/ Dockerfile (prod) + Dockerfile.dev + scripts/ build-extension.mjs (esbuild bundle of the extension) +``` + +Total runner source is ~4,100 lines. It is the only meaningful server-side TS in the repo. + +Other TypeScript exists but is **not** in scope: + +- `web/oss/src/components/AgentChatSlice/` — frontend, already under web conventions. +- `web/packages/*`, `web/oss`, `web/ee` — the established frontend, vitest + Playwright. +- `docs/`, `examples/` — Docusaurus and sample apps. + +So "TypeScript in different places" is really one homeless package (`services/agent`) plus +frontend code that already has a home. The plan targets the package. + +## 2. How the runner builds, runs, and ships today + +- **No compile step for the app.** It runs through `tsx` (a TS-aware Node loader). Both the + dev image (`tsx watch src/server.ts`) and the prod image (`tsx src/server.ts`) execute + the source directly. `tsconfig.json` is `noEmit: true`; it exists only for typechecking, + and nothing runs that typecheck. +- **One real build:** `scripts/build-extension.mjs` esbuild-bundles `src/extensions/agenta.ts` + into `dist/extensions/agenta.js` so Pi can load it anywhere. Both Dockerfiles run + `pnpm run build:extension`. +- **Two transports, one contract.** Python reaches the runner either over HTTP (the docker + sidecar) or by spawning the CLI as a subprocess. Both carry the same `/run` JSON. See + `sdks/python/agenta/sdk/agents/utils/ts_runner.py` (`deliver_http`, `deliver_subprocess`, + plus the NDJSON streaming variants). +- **Standalone package.** `services/agent` has its own `pnpm-lock.yaml` and is absent from + `web/pnpm-workspace.yaml`. That isolation is deliberate and worth keeping: the sidecar + image installs only the runner's deps, with no coupling to the web dependency graph. +- **No TS in the wheel today, but a docstring claims otherwise.** The SDK wheel is pure + Python (`uv_build`, zero `.ts`/`.js`). However `sdks/python/.../adapters/local.py` (the + unimplemented `LocalBackend`) says the Pi runner is "the bundled JS runner ... shipped + inside the wheel." That is aspirational and NOT YET IMPLEMENTED, but it is almost certainly + the source of the "is the TS part of the SDK / wheel" worry. The future-local-backend + question (bundle a built JS runner into the wheel vs require Docker/npm) is real and + undecided; see plan Phase 5 item 6 and the distribution options in status.md. + +Scripts present in `package.json` today: `run:cli`, `serve`, `serve:watch`, +`build:extension`, `login`. There is **no `test`, no `typecheck`, no `lint`, no `format`.** + +## 3. How it is tested today (the gap) + +There are 8 test files under `services/agent/test/`: + +``` +code-tool.test.ts continuation.test.ts mcp-servers.test.ts responder.test.ts +skills.test.ts stream-events.test.ts tool-bridge.test.ts tool-dispatch.test.ts +``` + +They are genuinely good tests in content. The problem is entirely in how they run: + +- Each file is a **standalone script** using `node:assert/strict`, with bare `{ ... }` + blocks for grouping and a `console.log("...: ok")` at the end. The header of each says + `Run: pnpm exec tsx test/.test.ts`. +- There is **no runner and no aggregation.** Running "the test suite" means running eight + commands by hand. A failure is a thrown assertion and a non-zero exit on one file; there + is no summary, no count, no `--watch`, no filtering, no coverage, no junit. +- They run in **no CI workflow.** `12-check-unit-tests.yml` has a `run-services-unit-tests` + job, but it only looks at `services/oss/tests/pytest/unit` (Python) and runs + `uv run python run-tests.py`. It never installs Node or touches `services/agent`. Every + vitest mention in CI refers to `web/packages`. So the runner's tests have never gated a + PR. +- There is **no TS-side contract test.** `protocol.ts` says the contract is pinned by + golden fixtures under `sdks/python/oss/tests/pytest/unit/agents/golden/` and checked by + the Python `test_wire_contract.py`. That guards the Python mirror (`wire.py`). Nothing on + the TS side asserts that `protocol.ts` still accepts those fixtures, so the runner can + drift from the contract and only Python would notice. + +## 4. What the repo already standardizes for TypeScript tests + +We do not need to invent a convention. The frontend already has one, and there is a written +spec: + +- **vitest is the repo's TS unit runner.** `web/packages/*` (agenta-shared, entities, + entity-ui, playground, annotation) each ship a `vitest.config.ts` and these scripts: + + ```jsonc + "test": "pnpm run test:unit", + "test:unit": "vitest run", + "test:watch": "vitest", + "test:coverage": "vitest run --coverage", + "typecheck": "tsc --noEmit" + ``` + + Config (from `agenta-shared/vitest.config.ts`): `include: ["tests/unit/**/*.test.ts"]`, + `environment: "node"`, `reporters: ["default", "junit"]` writing `test-results/junit.xml`, + and v8 coverage. This is exactly the shape a Node service wants. + +- **CI runs them generically.** The web job runs `pnpm -r --if-present test:unit` across + workspace packages and publishes `web/packages/*/test-results/junit.xml` via the + `publish-unit-test-result-action`. Any package that defines `test:unit` is picked up; the + rest are skipped. A new package following the same script names slots in for free. + +- **There is a folder-layout spec.** `docs/designs/testing/testing.structure.specs.md` + defines runner-first layout: `/tests//{unit,integration,acceptance,utils}` + plus `manual/` and `legacy/`. In practice the vitest packages collapse this to + `tests/unit/**/*.test.ts` (one runner, so no `vitest/` level). The agent runner's current + flat `test/` directory matches neither; aligning it to `tests/unit/` matches the closest + precedent (web packages) and the spec. + +## 5. Python-to-TypeScript mental model + +For mapping the tooling onto what the SDK/API side already does: + +| Concern | Python (api/, sdks/) | TypeScript (services/agent) | +|----------------------|---------------------------|------------------------------------| +| Package manager | `uv` | `pnpm` (own lockfile) | +| Run a script | `uv run python x.py` | `pnpm exec tsx x.ts` | +| Test runner | `pytest` | **vitest** (proposed) | +| One command to test | `uv run python run-tests.py` | `pnpm test` (proposed) | +| Type checker | `mypy` / pyright | `tsc --noEmit` (configured, unrun) | +| Formatter | `ruff format` | `prettier` (runs repo-wide in hooks) | +| Linter | `ruff check` | none today (eslint is web-only) | +| Fixtures | `conftest.py` fixtures | `tests/utils/` helper modules | +| CI unit gate | `12-check-unit-tests.yml` Python jobs | new Node job (proposed) | + +The headline: the TS runner has a formatter (via the global pre-commit) but no test runner, +no test gate, and no type gate. The Python side has all three. Closing that is the work. + +## 6. The cross-language contract is the seam that matters most + +`protocol.ts` is the single source of the `/run` types. `sdks/python/.../utils/wire.py` +hand-mirrors them. The contract is pinned by shared golden JSON +(`run_request.pi.json`, `run_request.claude.json`, `run_result.ok.json`, +`run_result.error.json`) and asserted by `test_wire_contract.py` on the Python side only. + +This is the highest-value place to add a TS test. A vitest test that loads those same +golden files and round-trips them through `protocol.ts` (parse the request shape, build a +result that matches the result fixture) means a contract change has to update both sides or +fail on both sides. It reuses fixtures that already exist, needs no harness and no network, +and directly protects the Python-to-Node boundary the whole feature rests on. + +## 7. Maintainability observations (not blockers) + +- **Architecture is sound.** Engines are peers behind one contract; tools are split by + concern; the responder seam was already extracted from `rivet.ts` (and is unit-tested). + `protocol.ts` carries thorough doc comments. A Python dev can navigate it. +- **Two large files.** `engines/rivet.ts` (1,085) and `tracing/otel.ts` (1,026) are the + obvious decomposition candidates. The responder extraction is the precedent: pull + cohesive seams out into separately testable units when you next touch them. Not a + big-bang refactor, and not a prerequisite for the test/CI work. +- **No `AGENTS.md` for the package.** The repo pushes area conventions into nested + `AGENTS.md` files (`web/AGENTS.md`, `api/AGENTS.md`) with a `CLAUDE.md` symlink. + `services/agent` has a strong `README.md` but no `AGENTS.md`, so the "where does runner + code/tests go, how do I run them" rules have nowhere to live. Adding one is cheap and + fits the repo's instruction-layering model. +- **Env-at-import-time.** Some modules read env on import (e.g. `skills.ts` reads + `AGENTA_AGENT_SKILLS_DIR`; the test sets it before a dynamic `import()`). vitest isolates + modules per test file, so this keeps working, but new tests touching such modules should + use dynamic import or `vi.resetModules()` rather than top-level import. + +## 8. One real decision to make + +**vitest vs `node:test`.** `node:test` is built in and adds zero dependencies, but it has +no first-class junit reporter or coverage UX and would diverge from the frontend. vitest +adds one dev dependency but matches `web/packages` exactly, gives junit + v8 coverage + +watch + filtering out of the box, and lets the CI wiring mirror the web job. Recommendation: +**vitest.** Everything in the plan assumes it; swapping to `node:test` would only change the +runner dependency and config, not the structure. diff --git a/docs/design/agent-workflows/typescript-structure/status.md b/docs/design/agent-workflows/typescript-structure/status.md new file mode 100644 index 0000000000..a3a403fbd7 --- /dev/null +++ b/docs/design/agent-workflows/typescript-structure/status.md @@ -0,0 +1,217 @@ +# Status + +Source of truth for this planning folder. Update as work proceeds. + +## Current state — 2026-06-20 + +Research complete. Plan drafted and then reviewed by Codex (gpt-5.5, xhigh). Plan widened in +response (see plan.md Phases 1, 3, 5). **Phase 1 is implemented and green.** + +### Phase 1 done (2026-06-20) + +- Fixed the `callRef` bug in `src/tools/dispatch.ts` (lines 88, 92 now use `toolName`). +- Added dev deps: `vitest` 4.1.9, `@vitest/coverage-v8` 4.1.9, `typescript` 5.9.3; bumped + `@types/node` to 24.13.2 (matches the Node 24 runtime). `pnpm-lock.yaml` updated. +- Added `vitest.config.ts` (node env, junit to `test-results/junit.xml`, v8 coverage). +- Added scripts: `test`, `test:unit`, `test:watch`, `test:coverage`, `typecheck`. +- Moved `test/*.test.ts` (9 files, including `extension-tools.test.ts` from the + `feat/agent-runner-engines` lane) to `tests/unit/*.test.ts`, wrapped in `describe`/`it`, + kept `node:assert`, fixed import depth to `../../src/`. +- Added `test-results/` and `coverage/` to `.gitignore`. + +Verified: `pnpm typecheck` exits 0 (and a planted type error makes it exit 2, so the gate has +teeth). `pnpm test` = 9 files, 42 tests, all pass, junit written. `pnpm test:coverage` works +(32.6% line coverage; engines are not exercised by unit tests yet, as expected). + +Not mine in the same working tree: `src/engines/pi.ts`, `src/engines/rivet.ts`, the +Dockerfiles, and `src/engines/skills.ts` were already modified/untracked from the parallel +`feat/agent-runner-engines` lane. The combined tree still typechecks and tests green. + +### Phase 2 done (2026-06-20) + +- Added job `run-services-node-unit-tests` to `.github/workflows/12-check-unit-tests.yml`, + mirroring the web (pnpm setup) and python-services (has_tests guard + package-selection + gate) jobs: Node 24 + corepack pnpm, `pnpm install --frozen-lockfile`, `pnpm run typecheck`, + `pnpm run test:unit` (working-directory `services/agent`), then publish + `services/agent/test-results/junit.xml` as "Agent Runner Unit Test Results". +- No `setup-python`: the code-tool test spawns `python3`/`node`, both preinstalled on ubuntu + runners. +- Verified locally: the workflow YAML parses and the job is present; + `pnpm install --frozen-lockfile` succeeds (lockfile matches package.json), so CI will not + fail on a lockfile mismatch. + +### Codex review of Phase 1+2 (xhigh) — all 5 findings fixed (2026-06-20) + +Codex confirmed the `callRef` fix is correct and the test conversion is assertion-faithful, +then found 5 issues. All fixed and verified: + +1. **High — CI could pass while running nothing.** The `has_tests` guard let the job skip + silently. Removed it; vitest exits non-zero on no test files, so a missing suite now fails. +2. **High — the nested `.gitignore` is itself ignored.** Root `.gitignore` line 68 (`.*`) + ignores every nested `.gitignore`, so the `services/agent/.gitignore` artifact rules could + never land. Reverted that edit; added `services/agent/test-results/` and + `services/agent/coverage/` to ROOT `.gitignore` (the repo's convention). Verified with + `git check-ignore`. +3. **Medium — typecheck did not cover tests/config.** Broadened `tsconfig.json` `include` to + `src + tests + vitest.config.ts`. Proven: a planted type error in a test file now fails + `pnpm typecheck`. +4. **Medium — brittle env isolation.** `skills.test.ts` now saves/restores + `AGENTA_AGENT_SKILLS_DIR` in `afterAll`; `responder.test.ts` has an `afterEach` that clears + `AGENTA_RIVET_DENY_PERMISSIONS` even if an assertion throws. +5. **Low — the fixed bug had no direct test.** Added two `relayToolCall` tests in + `tool-dispatch.test.ts`: the ok path returns the relayed text, and the empty-error path + asserts `tool relay failed for ` (this would have thrown `ReferenceError` before + the fix). + +Final state after Phase 1+2: `pnpm typecheck` exits 0 (covers src + tests + config; planted +errors exit 2). `pnpm test` = 9 files / 44 tests pass. `pnpm install --frozen-lockfile` clean. +Workflow YAML valid. + +### Phase 3 done (2026-06-20) + +The TS side of the cross-language wire contract (the "later PR" the Python +`test_wire_contract.py` names). Two layers, per Codex's correction that types are erased: + +- `tests/utils/golden.ts` reads the shared fixtures from + `sdks/python/oss/tests/pytest/unit/agents/golden/` in place via `node:fs` (no copy). +- `tests/unit/wire-contract.test.ts`: + - **Runtime**: loads `run_request.pi.json`, `run_request.claude.json`, `run_result.ok.json`, + `run_result.error.json`; asserts shapes; exercises `resolvePromptText`, + `resolveRunSessionId`, `messageText`; checks the camelCase capability keys and the + trailing untyped event the wire carries. + - **Compile-time**: `KNOWN_REQUEST_KEYS` (mirrored from the Python test) and the capability + keys are assigned to `(keyof AgentRunRequest)[]` / `(keyof HarnessCapabilities)[]`. If + `protocol.ts` renames or drops a field the wire still emits, `tsc` fails. + +Both gates proven: a wire key not on `AgentRunRequest` fails `tsc` (TS2322); clean restores +it. Final: `pnpm test` = **10 files / 51 tests** pass, `pnpm typecheck` exits 0. + +Phases 1, 2, and 3 are implemented, reviewed, and green. + +### Phase 4 done (2026-06-20) + +- `services/agent/AGENTS.md` + `CLAUDE.md` symlink (matches `web/`, `api/`): standalone pnpm + package, commands, where code/tests go, the mirrored wire contract, the testing seams. +- **Testability seam (Codex's #1 structural item):** `server.ts` exports + `createAgentServer(run)` / `createRequestListener(run)`; `cli.ts` exports + `runCli(raw, stream, io)` with an injectable engine and output sink (streaming stays live). + Both entrypoints auto-run only when they are the process entry (`src/entry.ts` + `isEntrypoint`), so importing them in tests is inert. +- New tests: `server.test.ts` (5) drives a real server on an ephemeral port with a fake + engine (/health, /run, 400 invalid JSON, 500 failure, NDJSON order); `cli.test.ts` (4) + drives `runCli` with a fake engine + collecting write (one-shot, invalid JSON, failure, + streaming order). +- Deferred (documented): `typescript-eslint` (tsc --strict + prettier is the floor; risks a + rabbit hole in existing engine code) and decomposing `rivet.ts`/`otel.ts` (opportunistic). + +### Phase 5 partial (2026-06-20) — runner side done; client/release/CI need decisions + +Implemented (self-contained, additive): +- `src/version.ts`: `PROTOCOL_VERSION = 1`, `RUNNER_VERSION` (from package.json), engines, + harnesses. `GET /health` now returns this identity instead of `{status:"ok"}`. Verified + live: `{"status":"ok","runner":"0.1.0","protocol":1,"engines":[...],"harnesses":[...]}`. +- `package.json` version `0.0.0` -> `0.1.0`. +- Fixed the misleading `sdks/python/.../adapters/local.py` docstring (the source of the wheel + worry): the runner is NOT in the wheel; runner-delivery is an open decision. + +Deferred (genuine decisions / other areas / would deepen entanglement): +- Client-side probe: the Python adapter should `GET /health` once and refuse an incompatible + protocol major (SDK `ts_runner.py`/adapters; needs the version-compat policy decided). +- Release ownership + SDK pinning a runner protocol range (decision: does the sidecar version + track the Agenta release or version independently?). +- Sidecar image publishing in CI (`42-railway-build.yml` builds only api/web/services today). +- Config hygiene: `services/oss/src/agent/app.py` raw `os.getenv` -> shared `env` object + (that file is modified by another lane right now; editing it would conflict). + +Final after Phases 4+5: `pnpm test` = **12 files / 60 tests** pass, `pnpm typecheck` exits 0. + +### Commit status (2026-06-20) + +Not committable as an independent unit yet. GitButler committed the new files cleanly (tests, +config, CI, docs) but refused to commit the edits to `package.json`, `dispatch.ts`, +`tsconfig.json` and the old-test deletions, because those files are owned by the in-flight +`feat/agent-runner-engines` commits below in the stack. A half-committed lane is broken, so +the lane was rolled back to snapshot `fce735461f`. All work is intact and green on disk. It +should land WITH the agent-runner feature (that lane's owner includes these files, or this +test work stacks cleanly once that feature is actually committed/pushed). + +## Codex review (xhigh) — 2026-06-20 + +Codex's verdict: the plan is directionally right but too narrow. It fixes test ergonomics +but does not yet make the runner a versioned, supportable server component. Verified findings +we accepted: + +- **Real bug (verified):** `services/agent/src/tools/dispatch.ts` references `callRef` at + lines 88 and 92, but that identifier is not defined in `relayToolCall` (only `spec.callRef` + exists elsewhere). On a Daytona relay failure/timeout, the error-message build throws + `ReferenceError` and masks the real error. A `tsc --noEmit` gate catches it. This is the + strongest argument for the typecheck gate, and it is a one-line fix. +- **`typescript` is not a dependency (verified):** `node_modules/.bin/tsc` does not exist. + The `typecheck` script needs `typescript` added; `tsx` does not provide `tsc`. +- **Phase 3 was naive (accepted):** TS interfaces are erased at runtime, so "round-trip the + golden JSON through `protocol.ts`" does nothing at runtime. Use runtime validation (a zod + schema or explicit structural assertions), plus a separate type-level check. +- **Testability seam (accepted):** export `createServer(runAgent)` / `runCli(runAgent)` so + HTTP and CLI paths can be tested with a fake engine, no live Pi/Claude/rivet. +- **CI detail (verified):** `test/code-tool.test.ts` spawns `python3`. The Node CI job needs + Python available, or that test gets split out. +- **Bigger gaps (accepted, now Phase 5):** no protocol/version negotiation, no sidecar image + publishing in CI, no release ownership (`package.json` is `0.0.0`), local code-tool + execution has no stated sandbox/resource policy, and `services/oss/src/agent/app.py` reads + `AGENTA_AGENT_*` via raw `os.getenv` instead of the shared env object. +- **Packaging smoking gun (verified):** `sdks/python/.../adapters/local.py` docstring says a + "bundled JS runner ... shipped inside the wheel," but it is marked NOT YET IMPLEMENTED. + Nothing TS is in the wheel today; the future `LocalBackend` plans to put a bundled JS + runner there. That aspirational note is the likely source of the wheel worry. + +Where Codex was wrong: it claimed 9 test files; there are 8 (`skills.test.ts` was already +counted). Minor. + +## What is true in the repo today + +- `services/agent` is a standalone pnpm package (own lockfile, Node 24, ESM, `tsx` runtime, + `strict` tsconfig with `noEmit`). +- 8 unit tests exist under `services/agent/test/`, written as hand-run `tsx` + `node:assert` + scripts. No `pnpm test`, no runner, no aggregation. +- Those tests run in NO CI workflow. `12-check-unit-tests.yml`'s services job is Python-only + (`services/oss/tests/pytest/unit`). +- No typecheck gate runs anywhere, despite `strict`. +- The wire contract is pinned from Python only (`test_wire_contract.py` + golden fixtures); + the TS `protocol.ts` has no test asserting it. +- The repo already standardizes vitest for TS units (`web/packages/*`), with a written + folder spec (`docs/designs/testing/testing.structure.specs.md`). + +## Open decisions + +1. **Runner: vitest vs node:test.** Recommended: vitest (matches `web/packages`, junit + + coverage + watch out of the box). Blocks Phase 1 config only; structure is the same + either way. +2. **Folder layout: move `test/` to `tests/unit/`?** Recommended: yes, to match web packages + and the structure spec. Low-risk mechanical move. +3. **Does `typecheck` failure fail CI?** Recommended: yes. +4. **Add eslint to `services/agent`?** Recommended: defer (optional Phase 4); prettier + + `tsc --strict` is the floor. + +## Progress + +- [x] Inventory the new TS and how it builds/ships +- [x] Confirm the test/CI/typecheck gaps (verified: no CI runs the runner tests) +- [x] Capture the repo's existing TS conventions (vitest, structure spec, CI shape) +- [x] Write context / research / plan +- [x] Phase 1: vitest + scripts + convert tests (green: 42 tests, typecheck gate live) +- [x] Phase 2: CI Node job + junit publish (added to 12-check-unit-tests.yml; YAML + frozen install verified) +- [x] Phase 3: golden-fixture contract test on the TS side (runtime + compile-time guards; both proven) +- [x] Phase 4: `AGENTS.md` + the `createAgentServer`/`runCli` seam + server/cli tests (eslint deferred) +- [~] Phase 5: runner-side version/`/health` + version bump + local.py docstring DONE; client probe, release scheme, image publishing, app.py config hygiene DEFERRED (decisions) +- [ ] Commit: lands with `feat/agent-runner-engines` (shared files block an independent commit) + +## Notes / caveats for the next reader + +- `services/agent` is intentionally NOT in `web/pnpm-workspace.yaml`. Keep it standalone so + the sidecar Docker build stays decoupled from the web dependency graph. +- The golden fixtures live under `sdks/python/oss/tests/pytest/unit/agents/golden/`. The TS + contract test should read them in place, not copy them. +- Frontend TS (`web/oss/src/components/AgentChatSlice/`) is out of scope; it already has a + home and conventions. +- Some runner modules read env at import time; new tests should dynamic-import after setting + env (vitest isolates modules per file). diff --git a/services/agent/AGENTS.md b/services/agent/AGENTS.md new file mode 100644 index 0000000000..135532260a --- /dev/null +++ b/services/agent/AGENTS.md @@ -0,0 +1,62 @@ +# Agent runner (TypeScript) conventions + +Scope: everything under `services/agent/`. This is the Node "agent runner" sidecar. It runs +the agent loop and serves one contract: a JSON `/run` request in, a structured result out. +The Python agent service (`services/oss/src/agent/`) decides *what* to run; this package +*runs* it. It lives in Node because the harnesses (Pi, Claude Code, rivet's `sandbox-agent`) +are Node libraries with no Python SDK. The repo-wide rules live in `/AGENTS.md`; the +architecture overview is this folder's `README.md`. + +## This is a standalone pnpm package + +Not part of the `web/` pnpm workspace. It has its OWN `pnpm-lock.yaml`, builds its own Docker +image, and pins Node 24 / pnpm 10.30 / ESM (`"type": "module"`). It runs through `tsx` (no +app compile step); the only build is `pnpm run build:extension` (esbuild-bundles the Pi +extension into `dist/`). Keep it standalone so the sidecar image stays decoupled from the web +dependency graph. + +## Commands + +```bash +pnpm install # from services/agent, with Node 24 on PATH +pnpm run serve # HTTP sidecar on :8765 (GET /health, POST /run) +pnpm run run:cli # one JSON request on stdin -> one result on stdout +pnpm test # vitest: all unit tests +pnpm run test:watch # vitest watch +pnpm run test:coverage # vitest + v8 coverage +pnpm run typecheck # tsc --noEmit (src + tests + vitest.config) +``` + +## Where code and tests go + +- Runtime code: `src/` — `engines/` (one engine per file: `pi`, `rivet`), `tools/`, + `tracing/`, `extensions/`. Entrypoints: `cli.ts`, `server.ts`. The `/run` wire contract is + `protocol.ts`. +- Tests: `tests/unit/**/*.test.ts` (vitest, `node:assert` is fine inside `it`). Shared test + helpers and fixtures live in `tests/utils/`. This mirrors `web/packages/*` and the repo + testing.structure spec. Do not add tests back under a flat `test/` directory. +- Build/test artifacts (`test-results/`, `coverage/`, `dist/`) are git-ignored from the ROOT + `.gitignore` — a nested `services/agent/.gitignore` does NOT take effect (the repo-wide + `.*` rule ignores all nested `.gitignore` files). + +## The wire contract is mirrored — change both sides + +`src/protocol.ts` is the source of the `/run` types. The Python side hand-mirrors them in +`sdks/python/agenta/sdk/agents/utils/wire.py`, and the contract is pinned by shared golden +fixtures in `sdks/python/oss/tests/pytest/unit/agents/golden/`. Both sides assert those +fixtures: Python in `test_wire_contract.py`, TypeScript in `tests/unit/wire-contract.test.ts`. +If you add, rename, or remove a wire field, update the golden, then `protocol.ts` AND +`wire.py` AND both contract tests, deliberately. The TS test has a compile-time key guard, so +a drifted `protocol.ts` fails `tsc`. + +## Testing seams + +`server.ts` and `cli.ts` export `createAgentServer(run)` / `runCli(raw, {run})` so the HTTP +and CLI behavior can be tested with a fake engine (no live Pi/Claude/rivet). Prefer testing +through those seams over importing the real engines. Engine-internal logic that is pure +(`tracing/otel.ts` state machine, `tools/*`, `engines/skills.ts`) is unit-tested directly. + +## Before committing + +There is no eslint here yet (deferred); `tsc --strict` + the repo-wide prettier hook are the +floor. Run `pnpm test` and `pnpm run typecheck` before pushing. diff --git a/services/agent/CLAUDE.md b/services/agent/CLAUDE.md new file mode 120000 index 0000000000..47dc3e3d86 --- /dev/null +++ b/services/agent/CLAUDE.md @@ -0,0 +1 @@ +AGENTS.md \ No newline at end of file diff --git a/services/agent/package.json b/services/agent/package.json index 231b6ff5f6..e3311615f9 100644 --- a/services/agent/package.json +++ b/services/agent/package.json @@ -1,6 +1,6 @@ { "name": "agenta-agent-pi-wrapper", - "version": "0.0.0", + "version": "0.1.0", "private": true, "type": "module", "packageManager": "pnpm@10.30.0", @@ -10,7 +10,12 @@ "serve": "tsx src/server.ts", "serve:watch": "tsx watch src/server.ts", "build:extension": "node scripts/build-extension.mjs", - "login": "pi" + "login": "pi", + "test": "pnpm run test:unit", + "test:unit": "vitest run", + "test:watch": "vitest", + "test:coverage": "vitest run --coverage", + "typecheck": "tsc --noEmit" }, "dependencies": { "@daytonaio/sdk": "^0.187.0", @@ -26,9 +31,12 @@ "sandbox-agent": "0.4.2" }, "devDependencies": { - "@types/node": "22.10.2", + "@types/node": "^24.0.0", + "@vitest/coverage-v8": "^4.1.4", "esbuild": "0.23.1", - "tsx": "4.19.2" + "tsx": "4.19.2", + "typescript": "^5.9.3", + "vitest": "^4.1.4" }, "pnpm": { "onlyBuiltDependencies": [ diff --git a/services/agent/pnpm-lock.yaml b/services/agent/pnpm-lock.yaml index 7bd7134915..62bde1acb0 100644 --- a/services/agent/pnpm-lock.yaml +++ b/services/agent/pnpm-lock.yaml @@ -43,14 +43,23 @@ importers: version: 0.4.2(@daytonaio/sdk@0.187.0(ws@8.21.0))(zod@4.4.3) devDependencies: '@types/node': - specifier: 22.10.2 - version: 22.10.2 + specifier: ^24.0.0 + version: 24.13.2 + '@vitest/coverage-v8': + specifier: ^4.1.4 + version: 4.1.9(vitest@4.1.9) esbuild: specifier: 0.23.1 version: 0.23.1 tsx: specifier: 4.19.2 version: 4.19.2 + typescript: + specifier: ^5.9.3 + version: 5.9.3 + vitest: + specifier: ^4.1.4 + version: 4.1.9(@opentelemetry/api@1.9.0)(@types/node@24.13.2)(@vitest/coverage-v8@4.1.9)(vite@8.0.16(@types/node@24.13.2)(esbuild@0.23.1)(jiti@2.7.0)(tsx@4.19.2)(yaml@2.9.0)) packages: @@ -269,10 +278,31 @@ packages: resolution: {integrity: sha512-iY8yvjE0y651BixKNPgmv1WrQc+GZ142sb0z4gYnChDDY2YqI4P/jsSopBWrKfAt7LOJAkOXt7rC/hms+WclQQ==} engines: {node: '>=18.0.0'} + '@babel/helper-string-parser@7.29.7': + resolution: {integrity: sha512-Pb5ijPrZ89GDH8223L4UP8i6QApWxs04RbPQJTeWDV0/keR2E36MeKnyr6LYmUUvqRRI+Iv87SuF1W6ErINzYw==} + engines: {node: '>=6.9.0'} + + '@babel/helper-validator-identifier@7.29.7': + resolution: {integrity: sha512-qehxGkRj55h/ff8EMaJ+cYhyaKlHIxqYDn682wQD7RNp9UujOQsHog2uS0r2vzr4pW+sXf90NeeayjcNaX3fFg==} + engines: {node: '>=6.9.0'} + + '@babel/parser@7.29.7': + resolution: {integrity: sha512-hnORnjP/1P/zFEndoeX+n+t1RwWRJiJpM/jO7FW32Kn9r5+sJB2JWOdYo4L6k78j15eCwY3Gm/7364B1EMwtNg==} + engines: {node: '>=6.0.0'} + hasBin: true + '@babel/runtime@7.29.7': resolution: {integrity: sha512-Nq8OhGWiZIZGV6hLHoyAKLLcJihP/xFeBMGJoUrxTX2psI8dCifzLhZISFb+VWS3wFMRDmCGw5R+dOySCqPLhw==} engines: {node: '>=6.9.0'} + '@babel/types@7.29.7': + resolution: {integrity: sha512-4zBIxpPzowiZpusoFkyGVwakdRJUyuH5PxQ/PrqghfdFWWasvnCdPfQXHrenDai+gyLARulZjZowCOj6fjT4pA==} + engines: {node: '>=6.9.0'} + + '@bcoe/v8-coverage@1.0.2': + resolution: {integrity: sha512-6zABk/ECA/QYSCQ1NGiVwwbQerUCZ+TQbp64Q3AgmfNvurHH0j8TtXa1qbShXA6qqkpAj4V5W8pP6mLe1mcMqA==} + engines: {node: '>=18'} + '@daytona/api-client@0.187.0': resolution: {integrity: sha512-riKOJ6eSuy67DL6iJlAa3Bfjnm4iQmkOdJk0B5hqrYMZeZmVDsgdiZtYvFpyoa+2KCZFNb0Gs5dQwO1d6NhGCw==} @@ -301,6 +331,15 @@ packages: resolution: {integrity: sha512-/ZhfFiHSBMH7AbDrBQIN+UWlJnl9tSEpLYICRGGMzmNfyCqX+30NYacIhyOEaD8R5rS6wJZysAOPU0yNwigbXw==} engines: {node: '>=22.19.0'} + '@emnapi/core@1.10.0': + resolution: {integrity: sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw==} + + '@emnapi/runtime@1.10.0': + resolution: {integrity: sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==} + + '@emnapi/wasi-threads@1.2.1': + resolution: {integrity: sha512-uTII7OYF+/Mes/MrcIOYp5yOtSMLBWSIoLPpcgwipoiKbli6k322tcoFsxoIIxPDqW01SQGAgko4EzZi2BNv2w==} + '@esbuild/aix-ppc64@0.23.1': resolution: {integrity: sha512-6VhYk1diRqrhBAqpJEdjASR/+WVRtfjpqKuNw11cLiaWpAT/Uu+nokB+UJnevzy/P9C/ty6AOe0dwueMrGh/iQ==} engines: {node: '>=18'} @@ -569,6 +608,16 @@ packages: resolution: {integrity: sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==} engines: {node: '>=18.0.0'} + '@jridgewell/resolve-uri@3.1.2': + resolution: {integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==} + engines: {node: '>=6.0.0'} + + '@jridgewell/sourcemap-codec@1.5.5': + resolution: {integrity: sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==} + + '@jridgewell/trace-mapping@0.3.31': + resolution: {integrity: sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==} + '@js-sdsl/ordered-map@4.4.2': resolution: {integrity: sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw==} @@ -643,6 +692,12 @@ packages: '@mistralai/mistralai@2.2.1': resolution: {integrity: sha512-uKU8CZmL2RzYKmplsU01hii4p3pe4HqJefpWNRWXm1Tcm0Sm4xXfwSLIy4k7ZCPlbETCGcp69E7hZs+WOJ5itQ==} + '@napi-rs/wasm-runtime@1.1.5': + resolution: {integrity: sha512-AWPoBRJ9tsnVhor4sjO7rkni+7p+2IAEFj6cx06UgP10jkQHqay/36uRV/bFkgrh18D9vb4cr8Q0Pthskgzy+Q==} + peerDependencies: + '@emnapi/core': ^1.7.1 + '@emnapi/runtime': ^1.7.1 + '@nodable/entities@2.2.0': resolution: {integrity: sha512-9uGyhaQavEUMC8AIddIjau4NsnsXhou+j5sBAGojCM1oxmQpVKTWR/9JxABD6UAv12vpIms55fPZKFQEhG6uBg==} @@ -952,6 +1007,9 @@ packages: resolution: {integrity: sha512-/UhIkaZgPutTFmQ7RnIJGgDXZmtEJ7Dvi86xNTFWcnRxVRNk/aotsqDJYeEvDP+FSMB2SdW+pQzNMcWP0rwuNA==} engines: {node: '>=14'} + '@oxc-project/types@0.133.0': + resolution: {integrity: sha512-KzkdCd6Uxqnf6l3HOw1xfatAlUURA0g14cvBYFyJ5SaNOQbOUvBr9PKArcPcrNIeRsBdgcUzOGrhKveVpvOIGA==} + '@protobufjs/aspromise@1.1.2': resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==} @@ -982,6 +1040,104 @@ packages: '@protobufjs/utf8@1.1.1': resolution: {integrity: sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg==} + '@rolldown/binding-android-arm64@1.0.3': + resolution: {integrity: sha512-454rs7jHngixp/NMxd5srYD57OnzSlZ/eFTETjORQHLwJG1lRtmNOJcBerZlfu4GjKqeq8aCCIQrMdHyhI51Hw==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [android] + + '@rolldown/binding-darwin-arm64@1.0.3': + resolution: {integrity: sha512-PcAhP+ynjURNyy8SKGl5DQP94aGuB/7JrXJb/t7P+hanXvQVMWzUvRRhBAcg/lNRadBhoUPqSoP4xw5tR/KBEA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [darwin] + + '@rolldown/binding-darwin-x64@1.0.3': + resolution: {integrity: sha512-9YpfeUvSE2RS7wysJ81uOZkXJz7f7Q55H2Gvp3VEw/EsahqDtrphrZ0EwDLK5vvKOzaCrBsjF8JmnMLcUt78Gg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [darwin] + + '@rolldown/binding-freebsd-x64@1.0.3': + resolution: {integrity: sha512-yB1IlAsSNHncV6SCTL27/MVGR5htvQsoGxIv5KMGXALp+Ll1wYsn+x98M9MW7qa+NdSbvrrY7ANI4wLJ0n1e6g==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [freebsd] + + '@rolldown/binding-linux-arm-gnueabihf@1.0.3': + resolution: {integrity: sha512-Yi30IVAAfLUCy2MseFjbB1jAMDl1VMCAas5StnYp8da9+CKvMd2H2cbEjWcw5NPaPqzvYkVIaF1nNUG+b7u/sw==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm] + os: [linux] + + '@rolldown/binding-linux-arm64-gnu@1.0.3': + resolution: {integrity: sha512-jsO7R8To+AdlYgUmN5sHSCZbfhtMBkO0WUx8iORQnPcMMdgr7qM2DQmMwgabs3GhNztdmoKkMKQFHD6DTMCIQw==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [linux] + libc: [glibc] + + '@rolldown/binding-linux-arm64-musl@1.0.3': + resolution: {integrity: sha512-VWkUHwWriDciit80wleYwKILoR/KMvxh/IdwS/paX+ZgpuRpCrKLUdadJbc0NpBEiyhpYawsJ73j9aCvOH+f7Q==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [linux] + libc: [musl] + + '@rolldown/binding-linux-ppc64-gnu@1.0.3': + resolution: {integrity: sha512-5f1laC0SlIR0yDbFCd8acUhvJIag6N3zC5P7oUPN6wX0aOma+uKJ0wBDH5aq7I1PVI2ttTlhJwzwRIBnLiSGEg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [ppc64] + os: [linux] + libc: [glibc] + + '@rolldown/binding-linux-s390x-gnu@1.0.3': + resolution: {integrity: sha512-Iq4ko0r4XsgbrF/LunNgHtAGLRRVE2kXonAXQ/MV0mC6jQpMOhW1SvtZja2EhC/kd05++bP78dsqBeIQyYJ6Yg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [s390x] + os: [linux] + libc: [glibc] + + '@rolldown/binding-linux-x64-gnu@1.0.3': + resolution: {integrity: sha512-B8m6tD5+/N5FeNQFbKlLA/2yVq9ycQP1SeedyEYYKWBNR3ZQbkvIUcNnDNM03lO1l5F2roiiFJGgvoLLyZXtSg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [linux] + libc: [glibc] + + '@rolldown/binding-linux-x64-musl@1.0.3': + resolution: {integrity: sha512-pSdpdUJHkuCxun9LE7jvgUB9qsRgaiyNNCX7m/AvHTcq67AiT/Yhoxvw5zPfhrM8k/BfP8ce/hMOpthKDpEUow==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [linux] + libc: [musl] + + '@rolldown/binding-openharmony-arm64@1.0.3': + resolution: {integrity: sha512-OXXS3RKJgX2uLwM+gYyuH5omcH8fL1LJs96pZGgtetVCahON57+d4SJHzTgZiOjxgGkSnpXpOsWuPDGAKAigEg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [openharmony] + + '@rolldown/binding-wasm32-wasi@1.0.3': + resolution: {integrity: sha512-JTtb8BWFynicNSoPrehsCzBtOKjZ6jhMiPFEmOiuXg1Fl8dn2KHQob+GuPSGR0dryQa1PQJbzjF3dqO/whhjLg==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [wasm32] + + '@rolldown/binding-win32-arm64-msvc@1.0.3': + resolution: {integrity: sha512-gEdFFEN70A/jxb2svrWsN3aDL7OUtmvlOy+6fa2jxG8K0wQ1ZbdeLGnidov6Yu5/733dI5ySfzFlQ/cb0bSz1g==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [arm64] + os: [win32] + + '@rolldown/binding-win32-x64-msvc@1.0.3': + resolution: {integrity: sha512-eXB7CHuaQdqmJcc3koCNtNPmT/bj2gc999kUFgBxG8Ac0NdgXc4rkCHhqrgrhN3zddvvvrgzj1e90SuSfmyIXA==} + engines: {node: ^20.19.0 || >=22.12.0} + cpu: [x64] + os: [win32] + + '@rolldown/pluginutils@1.0.1': + resolution: {integrity: sha512-2j9bGt5Jh8hj+vPtgzPtl72j0yRxHAyumoo6TNfAjsLB04UtpSvPbPcDcBMxz7n+9CYB0c1GxQFxYRg2jimqGw==} + '@sandbox-agent/cli-darwin-arm64@0.4.2': resolution: {integrity: sha512-+L1O8SI7k/LLhyB4dG0ghmz1cJHa0WtVjuRTrEE2gw/5EbGLWopPBsCVCmQ7snrQ4fPwtaiZDhfExcEj1VI7aw==} cpu: [arm64] @@ -1057,12 +1213,65 @@ packages: resolution: {integrity: sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A==} engines: {node: '>=14.0.0'} - '@types/node@22.10.2': - resolution: {integrity: sha512-Xxr6BBRCAOQixvonOye19wnzyDiUtTeqldOOmj3CkeblonbccA12PFwlufvRdrpjXxqnmUaeiU5EOA+7s5diUQ==} + '@standard-schema/spec@1.1.0': + resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==} + + '@tybys/wasm-util@0.10.2': + resolution: {integrity: sha512-RoBvJ2X0wuKlWFIjrwffGw1IqZHKQqzIchKaadZZfnNpsAYp2mM0h36JtPCjNDAHGgYez/15uMBpfGwchhiMgg==} + + '@types/chai@5.2.3': + resolution: {integrity: sha512-Mw558oeA9fFbv65/y4mHtXDs9bPnFMZAL/jxdPFUpOHHIXX91mcgEHbS5Lahr+pwZFR8A7GQleRWeI6cGFC2UA==} + + '@types/deep-eql@4.0.2': + resolution: {integrity: sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==} + + '@types/estree@1.0.9': + resolution: {integrity: sha512-GhdPgy1el4/ImP05X05Uw4cw2/M93BCUmnEvWZNStlCzEKME4Fkk+YpoA5OiHNQmoS7Cafb8Xa3Pya8m1Qrzeg==} + + '@types/node@24.13.2': + resolution: {integrity: sha512-fRa09kZTgu8o71KFcDjUFuc7F+dEbZYZmkI0mg5YBTRs0yMKjYHsq/c0urDKeDb+D5qVgXOdFcuu+DZPKOITwA==} '@types/retry@0.12.0': resolution: {integrity: sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==} + '@vitest/coverage-v8@4.1.9': + resolution: {integrity: sha512-G9/lgqibheLVBDRuya45EbsEXTYcWoSG+TLg7i2axuzx0Eq62eXn+aWXyaVdV5vKvFSWd6ywcX8hA7la9Pvu8g==} + peerDependencies: + '@vitest/browser': 4.1.9 + vitest: 4.1.9 + peerDependenciesMeta: + '@vitest/browser': + optional: true + + '@vitest/expect@4.1.9': + resolution: {integrity: sha512-vl/rYsUKcBr3SnQn166+XR5ZQcgMx3DQhFWdfli/cWpLnLUmbxZvyrJZotLFUryib+LtArYMSTJ5RbQ57ZqrlA==} + + '@vitest/mocker@4.1.9': + resolution: {integrity: sha512-EVkXzBjrPGM+cK8/ANWgBrkUCfJfb38/EfTSO8h7pWvKkyPkpWxvR7BkD2MyItMF62C97zAEoqdpUixwR/e+Rw==} + peerDependencies: + msw: ^2.4.9 + vite: ^6.0.0 || ^7.0.0 || ^8.0.0 + peerDependenciesMeta: + msw: + optional: true + vite: + optional: true + + '@vitest/pretty-format@4.1.9': + resolution: {integrity: sha512-s0iufns3iIFitdgm+YR7g1whCAaGtXz459VS9/PqyKDEEFgYIhsHOQmXgIgDuYCt7DeQmiZT0Qe2OA2p4ZPu5A==} + + '@vitest/runner@4.1.9': + resolution: {integrity: sha512-KXLMDtc7oe70+3mJfGrPUWPesswH+3sTxAMAMl8DG7I8IUQT4XW718dY5ID3vPUcmlu27CcKfY4P3h3I29SLJg==} + + '@vitest/snapshot@4.1.9': + resolution: {integrity: sha512-Jc7RKGNBo8Z28WYIm0Niej4xdSPByRf6mU58VpHQkd6Zh05rlnA+twjbK5HyeIGHxrzsc3mJgS43uM0CZKzaIA==} + + '@vitest/spy@4.1.9': + resolution: {integrity: sha512-fHpsS6mIi+PiEW+vcRVOMkX1oSaPKne3VOclSFICPcGOmfKgXPU5iAah+wcNcj2xPrCCmfq99IDGf+EojhhvhA==} + + '@vitest/utils@4.1.9': + resolution: {integrity: sha512-A51o8ymO5PpqlWNnBP9ZHPXDIpuMtTLlGSjN7la4US+LJzoUMyhwjA5QXlm39JexgwHKW4Xjs8Z2d3dLCXOeuA==} + '@zed-industries/claude-agent-acp@0.23.1': resolution: {integrity: sha512-aQ1gAm1MBalwEgE/VB/m4z6sXw/fRccNOW268pNLXnWV704ZuLbbm0N+oEv8KTmd53dJ6YzMhMpD8p5ig6C+sA==} deprecated: This package has been renamed to @agentclientprotocol/claude-agent-acp. Please migrate to continue receiving updates. @@ -1100,6 +1309,13 @@ packages: anynum@1.0.0: resolution: {integrity: sha512-xjR9/zBVnUOP6ztMIIgShjsxui80nQUQH+5xJnvrYLs+90bF25/KJqaAi8mk+B4RDtX1Nspi6fmp4YTEts8SfA==} + assertion-error@2.0.1: + resolution: {integrity: sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==} + engines: {node: '>=12'} + + ast-v8-to-istanbul@1.0.4: + resolution: {integrity: sha512-0bC0/4bTSrnwdhU3IsZDwEdojvuPrSg59OYZfKsLRtJZ0u8VBx9DebfqqG8bRdCC0I7vjgxmPi41P0lpkhJHtA==} + asynckit@0.4.0: resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} @@ -1141,6 +1357,10 @@ packages: resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==} engines: {node: '>= 0.4'} + chai@6.2.2: + resolution: {integrity: sha512-NUPRluOfOiTKBKvWPtSD4PhFvWCqOi0BGStNWs57X9js7XGTprSmFoz5F0tWhR4WPjNeR9jXqdC7/UpSJTnlRg==} + engines: {node: '>=18'} + chalk@5.6.2: resolution: {integrity: sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA==} engines: {node: ^12.17.0 || ^14.13 || >=16.0.0} @@ -1167,6 +1387,9 @@ packages: resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} engines: {node: '>= 0.8'} + convert-source-map@2.0.0: + resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==} + cross-spawn@7.0.6: resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} engines: {node: '>= 8'} @@ -1188,6 +1411,10 @@ packages: resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} engines: {node: '>=0.4.0'} + detect-libc@2.1.2: + resolution: {integrity: sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==} + engines: {node: '>=8'} + diff@8.0.4: resolution: {integrity: sha512-DPi0FmjiSU5EvQV0++GFDOJ9ASQUVFh5kD+OzOnYdi7n3Wpm9hWWGfB/O2blfHcMVTL5WkQXSnRiK9makhrcnw==} engines: {node: '>=0.3.1'} @@ -1214,6 +1441,9 @@ packages: resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==} engines: {node: '>= 0.4'} + es-module-lexer@2.1.0: + resolution: {integrity: sha512-n27zTYMjYu1aj4MjCWzSP7G9r75utsaoc8m61weK+W8JMBGGQybd43GstCXZ3WNmSFtGT9wi59qQTW6mhTR5LQ==} + es-object-atoms@1.1.2: resolution: {integrity: sha512-HWcBoN6NileqtSydK2FqHbS/LoDd2pqrnQHLyJzBj4kOp/ky2MWMN694xOfkK8/SnUsW2DH7EfyVlydKCsm1Zw==} engines: {node: '>= 0.4'} @@ -1231,6 +1461,9 @@ packages: resolution: {integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==} engines: {node: '>=6'} + estree-walker@3.0.3: + resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==} + events@3.3.0: resolution: {integrity: sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==} engines: {node: '>=0.8.x'} @@ -1239,6 +1472,10 @@ packages: resolution: {integrity: sha512-A5EmesHW6rfnZ9ysHQjPdJRni0SRar0tjtG5MNtm9n5TUvsYU8oozprtRD4AqHxcZWWlVuAmQo2nWKfN9oyjTw==} engines: {node: '>=0.10.0'} + expect-type@1.3.0: + resolution: {integrity: sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==} + engines: {node: '>=12.0.0'} + extend@3.0.2: resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==} @@ -1256,6 +1493,15 @@ packages: fastq@1.20.1: resolution: {integrity: sha512-GGToxJ/w1x32s/D2EKND7kTil4n8OVk/9mycTc4VDza13lOvpUZTGX3mFSCtV9ksdGBVzvsyAVLM6mHFThxXxw==} + fdir@6.5.0: + resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==} + engines: {node: '>=12.0.0'} + peerDependencies: + picomatch: ^3 || ^4 + peerDependenciesMeta: + picomatch: + optional: true + fetch-blob@3.2.0: resolution: {integrity: sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==} engines: {node: ^12.20 || >= 14.13} @@ -1342,6 +1588,10 @@ packages: graceful-fs@4.2.11: resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} + has-flag@4.0.0: + resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==} + engines: {node: '>=8'} + has-symbols@1.1.0: resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==} engines: {node: '>= 0.4'} @@ -1365,6 +1615,9 @@ packages: resolution: {integrity: sha512-Hc+ghLoSt6QaYZUv0WBiIvmMDZuZZ7oaDvdH8MbfOO4lOsxdXLEvuC6ePoGs9H1X9oCLyq6+NVN0MKqD+ydxyg==} engines: {node: ^20.17.0 || >=22.9.0} + html-escaper@2.0.2: + resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==} + http-proxy-agent@7.0.2: resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==} engines: {node: '>= 14'} @@ -1415,10 +1668,25 @@ packages: peerDependencies: ws: '*' + istanbul-lib-coverage@3.2.2: + resolution: {integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==} + engines: {node: '>=8'} + + istanbul-lib-report@3.0.1: + resolution: {integrity: sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==} + engines: {node: '>=10'} + + istanbul-reports@3.2.0: + resolution: {integrity: sha512-HGYWWS/ehqTV3xN10i23tkPkpH46MLCIMFNCaaKNavAXTF1RkqxawEPtnjnGZ6XKSInBKkiOA5BKS+aZiY3AvA==} + engines: {node: '>=8'} + jiti@2.7.0: resolution: {integrity: sha512-AC/7JofJvZGrrneWNaEnJeOLUx+JlGt7tNa0wZiRPT4MY1wmfKjt2+6O2p2uz2+skll8OZZmJMNqeke7kKbNgQ==} hasBin: true + js-tokens@10.0.0: + resolution: {integrity: sha512-lM/UBzQmfJRo9ABXbPWemivdCW8V2G8FHaHdypQaIy523snUjog0W71ayWXTjiR+ixeMyVHN2XcpnTd/liPg/Q==} + json-bigint@1.0.0: resolution: {integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==} @@ -1432,6 +1700,80 @@ packages: jws@4.0.1: resolution: {integrity: sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA==} + lightningcss-android-arm64@1.32.0: + resolution: {integrity: sha512-YK7/ClTt4kAK0vo6w3X+Pnm0D2cf2vPHbhOXdoNti1Ga0al1P4TBZhwjATvjNwLEBCnKvjJc2jQgHXH0NEwlAg==} + engines: {node: '>= 12.0.0'} + cpu: [arm64] + os: [android] + + lightningcss-darwin-arm64@1.32.0: + resolution: {integrity: sha512-RzeG9Ju5bag2Bv1/lwlVJvBE3q6TtXskdZLLCyfg5pt+HLz9BqlICO7LZM7VHNTTn/5PRhHFBSjk5lc4cmscPQ==} + engines: {node: '>= 12.0.0'} + cpu: [arm64] + os: [darwin] + + lightningcss-darwin-x64@1.32.0: + resolution: {integrity: sha512-U+QsBp2m/s2wqpUYT/6wnlagdZbtZdndSmut/NJqlCcMLTWp5muCrID+K5UJ6jqD2BFshejCYXniPDbNh73V8w==} + engines: {node: '>= 12.0.0'} + cpu: [x64] + os: [darwin] + + lightningcss-freebsd-x64@1.32.0: + resolution: {integrity: sha512-JCTigedEksZk3tHTTthnMdVfGf61Fky8Ji2E4YjUTEQX14xiy/lTzXnu1vwiZe3bYe0q+SpsSH/CTeDXK6WHig==} + engines: {node: '>= 12.0.0'} + cpu: [x64] + os: [freebsd] + + lightningcss-linux-arm-gnueabihf@1.32.0: + resolution: {integrity: sha512-x6rnnpRa2GL0zQOkt6rts3YDPzduLpWvwAF6EMhXFVZXD4tPrBkEFqzGowzCsIWsPjqSK+tyNEODUBXeeVHSkw==} + engines: {node: '>= 12.0.0'} + cpu: [arm] + os: [linux] + + lightningcss-linux-arm64-gnu@1.32.0: + resolution: {integrity: sha512-0nnMyoyOLRJXfbMOilaSRcLH3Jw5z9HDNGfT/gwCPgaDjnx0i8w7vBzFLFR1f6CMLKF8gVbebmkUN3fa/kQJpQ==} + engines: {node: '>= 12.0.0'} + cpu: [arm64] + os: [linux] + libc: [glibc] + + lightningcss-linux-arm64-musl@1.32.0: + resolution: {integrity: sha512-UpQkoenr4UJEzgVIYpI80lDFvRmPVg6oqboNHfoH4CQIfNA+HOrZ7Mo7KZP02dC6LjghPQJeBsvXhJod/wnIBg==} + engines: {node: '>= 12.0.0'} + cpu: [arm64] + os: [linux] + libc: [musl] + + lightningcss-linux-x64-gnu@1.32.0: + resolution: {integrity: sha512-V7Qr52IhZmdKPVr+Vtw8o+WLsQJYCTd8loIfpDaMRWGUZfBOYEJeyJIkqGIDMZPwPx24pUMfwSxxI8phr/MbOA==} + engines: {node: '>= 12.0.0'} + cpu: [x64] + os: [linux] + libc: [glibc] + + lightningcss-linux-x64-musl@1.32.0: + resolution: {integrity: sha512-bYcLp+Vb0awsiXg/80uCRezCYHNg1/l3mt0gzHnWV9XP1W5sKa5/TCdGWaR/zBM2PeF/HbsQv/j2URNOiVuxWg==} + engines: {node: '>= 12.0.0'} + cpu: [x64] + os: [linux] + libc: [musl] + + lightningcss-win32-arm64-msvc@1.32.0: + resolution: {integrity: sha512-8SbC8BR40pS6baCM8sbtYDSwEVQd4JlFTOlaD3gWGHfThTcABnNDBda6eTZeqbofalIJhFx0qKzgHJmcPTnGdw==} + engines: {node: '>= 12.0.0'} + cpu: [arm64] + os: [win32] + + lightningcss-win32-x64-msvc@1.32.0: + resolution: {integrity: sha512-Amq9B/SoZYdDi1kFrojnoqPLxYhQ4Wo5XiL8EVJrVsB8ARoC1PWW6VGtT0WKCemjy8aC+louJnjS7U18x3b06Q==} + engines: {node: '>= 12.0.0'} + cpu: [x64] + os: [win32] + + lightningcss@1.32.0: + resolution: {integrity: sha512-NXYBzinNrblfraPGyrbPoD19C1h9lfI/1mzgWYvXUTe414Gz/X1FD2XBZSZM7rRTrMA8JL3OtAaGifrIKhQ5yQ==} + engines: {node: '>= 12.0.0'} + lodash.camelcase@4.3.0: resolution: {integrity: sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==} @@ -1442,6 +1784,16 @@ packages: resolution: {integrity: sha512-RPimw/7aMdv2oqRrxKwvZXcPfwBrn/JZ2xYcY9Hus/6LaS3VOAKVWKWgNLCFSiOm1ESXinjsDlidVU7JlnCN2A==} engines: {node: 20 || >=22} + magic-string@0.30.21: + resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==} + + magicast@0.5.3: + resolution: {integrity: sha512-pVKE4UdSQ7DvHzivsCIFx2BJn1mHG6KsyrFcaxFx6tONdneEuThrDx0Cj3AMg58KyN4pzYT+LHOotxDQDjNvkw==} + + make-dir@4.0.0: + resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==} + engines: {node: '>=10'} + marked@15.0.12: resolution: {integrity: sha512-8dD6FusOQSrpv9Z1rdNMdlSgQOIP880DHqnohobOmYLElGEqAL/JvxvuxZO16r4HtjTlfPRDC1hbvxC9dPN2nA==} engines: {node: '>= 18'} @@ -1485,6 +1837,11 @@ packages: ms@2.1.3: resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} + nanoid@3.3.13: + resolution: {integrity: sha512-sPdqC6ByMVVGvF1ynvvMo0/o+oD1VX7DaHhijt1bFgjvBkHBib4t49GoNDhf2NDta4oeUNlaGbSt5K7qjZ955Q==} + engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} + hasBin: true + node-domexception@1.0.0: resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} engines: {node: '>=10.5.0'} @@ -1494,6 +1851,10 @@ packages: resolution: {integrity: sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==} engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + obug@2.1.3: + resolution: {integrity: sha512-9miFgM2OFba7hB+pRgvtV84pYTBaoTHohvmIgiRt6dRIzbwEOIaNaP+dIlGs2fNFoB0SeISs0Jz5WFVRid6Xyg==} + engines: {node: '>=12.20.0'} + openai@6.26.0: resolution: {integrity: sha512-zd23dbWTjiJ6sSAX6s0HrCZi41JwTA1bQVs0wLQPZ2/5o2gxOJA5wh7yOAUgwYybfhDXyhwlpeQf7Mlgx8EOCA==} hasBin: true @@ -1537,10 +1898,21 @@ packages: engines: {node: '>=20'} hasBin: true + picocolors@1.1.1: + resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} + picomatch@2.3.2: resolution: {integrity: sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==} engines: {node: '>=8.6'} + picomatch@4.0.4: + resolution: {integrity: sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==} + engines: {node: '>=12'} + + postcss@8.5.15: + resolution: {integrity: sha512-FfR8sjd4em2T6fb3I2MwAJU7HWVMr9zba+enmQeeWFfCbm+UOC/0X4DS8XtpUTMwWMGbjKYP7xjfNekzyGmB3A==} + engines: {node: ^10 || ^12 || >=14} + proper-lockfile@4.1.2: resolution: {integrity: sha512-TjNPblN4BwAWMXU8s9AEz4JmQxnD1NNL7bNOY/AKUzyamc379FWASUhc/K1pL2noVb+XmZKLL68cjzLsiOAMaA==} @@ -1586,6 +1958,11 @@ packages: resolution: {integrity: sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==} engines: {iojs: '>=1.0.0', node: '>=0.10.0'} + rolldown@1.0.3: + resolution: {integrity: sha512-i00lAJ2ks1BYr7rjNjKC7BcqAS7nVfiT3QX1SI5aY+AFHblCmaUf9OE9dbdzDvW6dJxbi2ZCZiy9v3CcwOiX3g==} + engines: {node: ^20.19.0 || >=22.12.0} + hasBin: true + run-parallel@1.2.0: resolution: {integrity: sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==} @@ -1641,9 +2018,22 @@ packages: resolution: {integrity: sha512-VsC6n6vz1ihYYyZZwX7YZSF5l5x36ca17OC+a69h94YqB7X6XLwf+5MOgynYir2SLFUbl8gIYvBo8K8RoNQ6bQ==} engines: {node: '>= 0.4'} + siginfo@2.0.0: + resolution: {integrity: sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==} + signal-exit@3.0.7: resolution: {integrity: sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==} + source-map-js@1.2.1: + resolution: {integrity: sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==} + engines: {node: '>=0.10.0'} + + stackback@0.0.2: + resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==} + + std-env@4.1.0: + resolution: {integrity: sha512-Rq7ybcX2RuC55r9oaPVEW7/xu3tj8u4GeBYHBWCychFtzMIr86A7e3PPEBPT37sHStKX3+TiX/Fr/ACmJLVlLQ==} + stream-browserify@3.0.0: resolution: {integrity: sha512-H73RAHsVBapbim0tU2JwwOiXUj+fikfiaoYAKHF3VJfA0pe2BCzkhAHBlLG6REzE+2WNZcxOXjK7lkso+9euLA==} @@ -1665,10 +2055,29 @@ packages: strnum@2.4.0: resolution: {integrity: sha512-sHrVyWWdq28RbhjuJdZsA1SnGRJV6NiXbk6AXBxDOsgAcA+lmpUZCYjOdLBxkXMwis6RRe7dlZt4VlIWFVzkmg==} + supports-color@7.2.0: + resolution: {integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==} + engines: {node: '>=8'} + tar@7.5.16: resolution: {integrity: sha512-56adEpPMouktRlBLXiaYFFzZ/3+JXa8P9n7WbR+ibIjtviN55mEaOkiysCnPnWm+7kkui1Dn8J9l+g6zV8731w==} engines: {node: '>=18'} + tinybench@2.9.0: + resolution: {integrity: sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==} + + tinyexec@1.2.4: + resolution: {integrity: sha512-SHf/r48b7vOrjve9PxJo3MN5v5yuyjHvdUcrQffT3WXMUfnGmHDVbC4k3sHJaJTgZCwpUplIaAo5ANtMyp3YHg==} + engines: {node: '>=18'} + + tinyglobby@0.2.17: + resolution: {integrity: sha512-wXR/dYpcqKmfWpEdZjiKJOwCNFndD0DMnrW/cYjVGttEkBfVgcLFHoNrlj47mjOVic9yyNu65alsgF4NQyTa2g==} + engines: {node: '>=12.0.0'} + + tinyrainbow@3.1.0: + resolution: {integrity: sha512-Bf+ILmBgretUrdJxzXM0SgXLZ3XfiaUuOj/IKQHuTXip+05Xn+uyEYdVg0kYDipTBcLrCVyUzAPz7QmArb0mmw==} + engines: {node: '>=14.0.0'} + to-regex-range@5.0.1: resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==} engines: {node: '>=8.0'} @@ -1687,8 +2096,13 @@ packages: typebox@1.1.38: resolution: {integrity: sha512-pZ0aQPmMmXoUvSbeuWf/Hzsc+avNw/Zd6VeE8CFgkVGWyuHPJvqeJJDeJqLve+K70LvjYIoleGcoJHPT17cWoA==} - undici-types@6.20.0: - resolution: {integrity: sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==} + typescript@5.9.3: + resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==} + engines: {node: '>=14.17'} + hasBin: true + + undici-types@7.18.2: + resolution: {integrity: sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==} undici@8.3.0: resolution: {integrity: sha512-TkUDgb6tl7KOGZ+7e8E3d2FYgUQgF6z5YypqjWmixVQSQERFcVrVg0ySADm2LVLRh5ljAaHTCR5Fmz3Q34rB7Q==} @@ -1697,6 +2111,90 @@ packages: util-deprecate@1.0.2: resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} + vite@8.0.16: + resolution: {integrity: sha512-h9bXPmJichP5fLmVQo3PyaGSDE2n3aPuomeAlVRm0JLmt4rY6zmPKd59HYI4LNW8oTK7tlTsuC7l/m7awx9Jcw==} + engines: {node: ^20.19.0 || >=22.12.0} + hasBin: true + peerDependencies: + '@types/node': ^20.19.0 || >=22.12.0 + '@vitejs/devtools': ^0.1.18 + esbuild: ^0.27.0 || ^0.28.0 + jiti: '>=1.21.0' + less: ^4.0.0 + sass: ^1.70.0 + sass-embedded: ^1.70.0 + stylus: '>=0.54.8' + sugarss: ^5.0.0 + terser: ^5.16.0 + tsx: ^4.8.1 + yaml: ^2.4.2 + peerDependenciesMeta: + '@types/node': + optional: true + '@vitejs/devtools': + optional: true + esbuild: + optional: true + jiti: + optional: true + less: + optional: true + sass: + optional: true + sass-embedded: + optional: true + stylus: + optional: true + sugarss: + optional: true + terser: + optional: true + tsx: + optional: true + yaml: + optional: true + + vitest@4.1.9: + resolution: {integrity: sha512-nE3/LEyc0z87uHYLZebqCUOaJr2hdtuPp7BQ4BosVFnfltxgAvMG08NyrSGlPpOUWvR27c5flSmYFTNr78L9GQ==} + engines: {node: ^20.0.0 || ^22.0.0 || >=24.0.0} + hasBin: true + peerDependencies: + '@edge-runtime/vm': '*' + '@opentelemetry/api': ^1.9.0 + '@types/node': ^20.0.0 || ^22.0.0 || >=24.0.0 + '@vitest/browser-playwright': 4.1.9 + '@vitest/browser-preview': 4.1.9 + '@vitest/browser-webdriverio': 4.1.9 + '@vitest/coverage-istanbul': 4.1.9 + '@vitest/coverage-v8': 4.1.9 + '@vitest/ui': 4.1.9 + happy-dom: '*' + jsdom: '*' + vite: ^6.0.0 || ^7.0.0 || ^8.0.0 + peerDependenciesMeta: + '@edge-runtime/vm': + optional: true + '@opentelemetry/api': + optional: true + '@types/node': + optional: true + '@vitest/browser-playwright': + optional: true + '@vitest/browser-preview': + optional: true + '@vitest/browser-webdriverio': + optional: true + '@vitest/coverage-istanbul': + optional: true + '@vitest/coverage-v8': + optional: true + '@vitest/ui': + optional: true + happy-dom: + optional: true + jsdom: + optional: true + web-streams-polyfill@3.3.3: resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==} engines: {node: '>= 8'} @@ -1706,6 +2204,11 @@ packages: engines: {node: '>= 8'} hasBin: true + why-is-node-running@2.3.0: + resolution: {integrity: sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==} + engines: {node: '>=8'} + hasBin: true + wrap-ansi@7.0.0: resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==} engines: {node: '>=10'} @@ -2217,8 +2720,23 @@ snapshots: '@aws/lambda-invoke-store@0.2.4': {} + '@babel/helper-string-parser@7.29.7': {} + + '@babel/helper-validator-identifier@7.29.7': {} + + '@babel/parser@7.29.7': + dependencies: + '@babel/types': 7.29.7 + '@babel/runtime@7.29.7': {} + '@babel/types@7.29.7': + dependencies: + '@babel/helper-string-parser': 7.29.7 + '@babel/helper-validator-identifier': 7.29.7 + + '@bcoe/v8-coverage@1.0.2': {} + '@daytona/api-client@0.187.0': dependencies: axios: 1.18.0 @@ -2332,6 +2850,22 @@ snapshots: get-east-asian-width: 1.6.0 marked: 15.0.12 + '@emnapi/core@1.10.0': + dependencies: + '@emnapi/wasi-threads': 1.2.1 + tslib: 2.8.1 + optional: true + + '@emnapi/runtime@1.10.0': + dependencies: + tslib: 2.8.1 + optional: true + + '@emnapi/wasi-threads@1.2.1': + dependencies: + tslib: 2.8.1 + optional: true + '@esbuild/aix-ppc64@0.23.1': optional: true @@ -2495,6 +3029,15 @@ snapshots: dependencies: minipass: 7.1.3 + '@jridgewell/resolve-uri@3.1.2': {} + + '@jridgewell/sourcemap-codec@1.5.5': {} + + '@jridgewell/trace-mapping@0.3.31': + dependencies: + '@jridgewell/resolve-uri': 3.1.2 + '@jridgewell/sourcemap-codec': 1.5.5 + '@js-sdsl/ordered-map@4.4.2': {} '@mariozechner/clipboard-darwin-arm64@0.3.9': @@ -2550,6 +3093,13 @@ snapshots: - bufferutil - utf-8-validate + '@napi-rs/wasm-runtime@1.1.5(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0)': + dependencies: + '@emnapi/core': 1.10.0 + '@emnapi/runtime': 1.10.0 + '@tybys/wasm-util': 0.10.2 + optional: true + '@nodable/entities@2.2.0': {} '@nodelib/fs.scandir@2.1.5': @@ -2937,6 +3487,8 @@ snapshots: '@opentelemetry/semantic-conventions@1.41.1': {} + '@oxc-project/types@0.133.0': {} + '@protobufjs/aspromise@1.1.2': {} '@protobufjs/base64@1.1.2': {} @@ -2959,6 +3511,57 @@ snapshots: '@protobufjs/utf8@1.1.1': {} + '@rolldown/binding-android-arm64@1.0.3': + optional: true + + '@rolldown/binding-darwin-arm64@1.0.3': + optional: true + + '@rolldown/binding-darwin-x64@1.0.3': + optional: true + + '@rolldown/binding-freebsd-x64@1.0.3': + optional: true + + '@rolldown/binding-linux-arm-gnueabihf@1.0.3': + optional: true + + '@rolldown/binding-linux-arm64-gnu@1.0.3': + optional: true + + '@rolldown/binding-linux-arm64-musl@1.0.3': + optional: true + + '@rolldown/binding-linux-ppc64-gnu@1.0.3': + optional: true + + '@rolldown/binding-linux-s390x-gnu@1.0.3': + optional: true + + '@rolldown/binding-linux-x64-gnu@1.0.3': + optional: true + + '@rolldown/binding-linux-x64-musl@1.0.3': + optional: true + + '@rolldown/binding-openharmony-arm64@1.0.3': + optional: true + + '@rolldown/binding-wasm32-wasi@1.0.3': + dependencies: + '@emnapi/core': 1.10.0 + '@emnapi/runtime': 1.10.0 + '@napi-rs/wasm-runtime': 1.1.5(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0) + optional: true + + '@rolldown/binding-win32-arm64-msvc@1.0.3': + optional: true + + '@rolldown/binding-win32-x64-msvc@1.0.3': + optional: true + + '@rolldown/pluginutils@1.0.1': {} + '@sandbox-agent/cli-darwin-arm64@0.4.2': optional: true @@ -3043,12 +3646,83 @@ snapshots: '@smithy/util-buffer-from': 2.2.0 tslib: 2.8.1 - '@types/node@22.10.2': + '@standard-schema/spec@1.1.0': {} + + '@tybys/wasm-util@0.10.2': + dependencies: + tslib: 2.8.1 + optional: true + + '@types/chai@5.2.3': + dependencies: + '@types/deep-eql': 4.0.2 + assertion-error: 2.0.1 + + '@types/deep-eql@4.0.2': {} + + '@types/estree@1.0.9': {} + + '@types/node@24.13.2': dependencies: - undici-types: 6.20.0 + undici-types: 7.18.2 '@types/retry@0.12.0': {} + '@vitest/coverage-v8@4.1.9(vitest@4.1.9)': + dependencies: + '@bcoe/v8-coverage': 1.0.2 + '@vitest/utils': 4.1.9 + ast-v8-to-istanbul: 1.0.4 + istanbul-lib-coverage: 3.2.2 + istanbul-lib-report: 3.0.1 + istanbul-reports: 3.2.0 + magicast: 0.5.3 + obug: 2.1.3 + std-env: 4.1.0 + tinyrainbow: 3.1.0 + vitest: 4.1.9(@opentelemetry/api@1.9.0)(@types/node@24.13.2)(@vitest/coverage-v8@4.1.9)(vite@8.0.16(@types/node@24.13.2)(esbuild@0.23.1)(jiti@2.7.0)(tsx@4.19.2)(yaml@2.9.0)) + + '@vitest/expect@4.1.9': + dependencies: + '@standard-schema/spec': 1.1.0 + '@types/chai': 5.2.3 + '@vitest/spy': 4.1.9 + '@vitest/utils': 4.1.9 + chai: 6.2.2 + tinyrainbow: 3.1.0 + + '@vitest/mocker@4.1.9(vite@8.0.16(@types/node@24.13.2)(esbuild@0.23.1)(jiti@2.7.0)(tsx@4.19.2)(yaml@2.9.0))': + dependencies: + '@vitest/spy': 4.1.9 + estree-walker: 3.0.3 + magic-string: 0.30.21 + optionalDependencies: + vite: 8.0.16(@types/node@24.13.2)(esbuild@0.23.1)(jiti@2.7.0)(tsx@4.19.2)(yaml@2.9.0) + + '@vitest/pretty-format@4.1.9': + dependencies: + tinyrainbow: 3.1.0 + + '@vitest/runner@4.1.9': + dependencies: + '@vitest/utils': 4.1.9 + pathe: 2.0.3 + + '@vitest/snapshot@4.1.9': + dependencies: + '@vitest/pretty-format': 4.1.9 + '@vitest/utils': 4.1.9 + magic-string: 0.30.21 + pathe: 2.0.3 + + '@vitest/spy@4.1.9': {} + + '@vitest/utils@4.1.9': + dependencies: + '@vitest/pretty-format': 4.1.9 + convert-source-map: 2.0.0 + tinyrainbow: 3.1.0 + '@zed-industries/claude-agent-acp@0.23.1': dependencies: '@agentclientprotocol/sdk': 0.17.0(zod@4.4.3) @@ -3083,6 +3757,14 @@ snapshots: anynum@1.0.0: {} + assertion-error@2.0.1: {} + + ast-v8-to-istanbul@1.0.4: + dependencies: + '@jridgewell/trace-mapping': 0.3.31 + estree-walker: 3.0.3 + js-tokens: 10.0.0 + asynckit@0.4.0: {} axios@1.18.0: @@ -3127,6 +3809,8 @@ snapshots: es-errors: 1.3.0 function-bind: 1.1.2 + chai@6.2.2: {} + chalk@5.6.2: {} chownr@3.0.0: {} @@ -3149,6 +3833,8 @@ snapshots: dependencies: delayed-stream: 1.0.0 + convert-source-map@2.0.0: {} + cross-spawn@7.0.6: dependencies: path-key: 3.1.1 @@ -3163,6 +3849,8 @@ snapshots: delayed-stream@1.0.0: {} + detect-libc@2.1.2: {} + diff@8.0.4: {} dotenv@17.4.2: {} @@ -3183,6 +3871,8 @@ snapshots: es-errors@1.3.0: {} + es-module-lexer@2.1.0: {} + es-object-atoms@1.1.2: dependencies: es-errors: 1.3.0 @@ -3223,12 +3913,18 @@ snapshots: escalade@3.2.0: {} + estree-walker@3.0.3: + dependencies: + '@types/estree': 1.0.9 + events@3.3.0: {} expand-tilde@2.0.2: dependencies: homedir-polyfill: 1.0.3 + expect-type@1.3.0: {} + extend@3.0.2: {} fast-glob@3.3.3: @@ -3255,6 +3951,10 @@ snapshots: dependencies: reusify: 1.1.0 + fdir@6.5.0(picomatch@4.0.4): + optionalDependencies: + picomatch: 4.0.4 + fetch-blob@3.2.0: dependencies: node-domexception: 1.0.0 @@ -3354,6 +4054,8 @@ snapshots: graceful-fs@4.2.11: {} + has-flag@4.0.0: {} + has-symbols@1.1.0: {} has-tostringtag@1.0.2: @@ -3374,6 +4076,8 @@ snapshots: dependencies: lru-cache: 11.5.1 + html-escaper@2.0.2: {} + http-proxy-agent@7.0.2: dependencies: agent-base: 7.1.4 @@ -3424,8 +4128,23 @@ snapshots: dependencies: ws: 8.21.0 + istanbul-lib-coverage@3.2.2: {} + + istanbul-lib-report@3.0.1: + dependencies: + istanbul-lib-coverage: 3.2.2 + make-dir: 4.0.0 + supports-color: 7.2.0 + + istanbul-reports@3.2.0: + dependencies: + html-escaper: 2.0.2 + istanbul-lib-report: 3.0.1 + jiti@2.7.0: {} + js-tokens@10.0.0: {} + json-bigint@1.0.0: dependencies: bignumber.js: 9.3.1 @@ -3446,12 +4165,75 @@ snapshots: jwa: 2.0.1 safe-buffer: 5.2.1 + lightningcss-android-arm64@1.32.0: + optional: true + + lightningcss-darwin-arm64@1.32.0: + optional: true + + lightningcss-darwin-x64@1.32.0: + optional: true + + lightningcss-freebsd-x64@1.32.0: + optional: true + + lightningcss-linux-arm-gnueabihf@1.32.0: + optional: true + + lightningcss-linux-arm64-gnu@1.32.0: + optional: true + + lightningcss-linux-arm64-musl@1.32.0: + optional: true + + lightningcss-linux-x64-gnu@1.32.0: + optional: true + + lightningcss-linux-x64-musl@1.32.0: + optional: true + + lightningcss-win32-arm64-msvc@1.32.0: + optional: true + + lightningcss-win32-x64-msvc@1.32.0: + optional: true + + lightningcss@1.32.0: + dependencies: + detect-libc: 2.1.2 + optionalDependencies: + lightningcss-android-arm64: 1.32.0 + lightningcss-darwin-arm64: 1.32.0 + lightningcss-darwin-x64: 1.32.0 + lightningcss-freebsd-x64: 1.32.0 + lightningcss-linux-arm-gnueabihf: 1.32.0 + lightningcss-linux-arm64-gnu: 1.32.0 + lightningcss-linux-arm64-musl: 1.32.0 + lightningcss-linux-x64-gnu: 1.32.0 + lightningcss-linux-x64-musl: 1.32.0 + lightningcss-win32-arm64-msvc: 1.32.0 + lightningcss-win32-x64-msvc: 1.32.0 + lodash.camelcase@4.3.0: {} long@5.3.2: {} lru-cache@11.5.1: {} + magic-string@0.30.21: + dependencies: + '@jridgewell/sourcemap-codec': 1.5.5 + + magicast@0.5.3: + dependencies: + '@babel/parser': 7.29.7 + '@babel/types': 7.29.7 + source-map-js: 1.2.1 + + make-dir@4.0.0: + dependencies: + semver: 7.8.0 + marked@15.0.12: {} math-intrinsics@1.1.0: {} @@ -3483,6 +4265,8 @@ snapshots: ms@2.1.3: {} + nanoid@3.3.13: {} + node-domexception@1.0.0: {} node-fetch@3.3.2: @@ -3491,6 +4275,8 @@ snapshots: fetch-blob: 3.2.0 formdata-polyfill: 4.0.10 + obug@2.1.3: {} + openai@6.26.0(ws@8.21.0)(zod@4.4.3): optionalDependencies: ws: 8.21.0 @@ -3521,8 +4307,18 @@ snapshots: '@agentclientprotocol/sdk': 0.26.0(zod@3.25.76) zod: 3.25.76 + picocolors@1.1.1: {} + picomatch@2.3.2: {} + picomatch@4.0.4: {} + + postcss@8.5.15: + dependencies: + nanoid: 3.3.13 + picocolors: 1.1.1 + source-map-js: 1.2.1 + proper-lockfile@4.1.2: dependencies: graceful-fs: 4.2.11 @@ -3540,7 +4336,7 @@ snapshots: '@protobufjs/path': 1.1.2 '@protobufjs/pool': 1.1.0 '@protobufjs/utf8': 1.1.1 - '@types/node': 22.10.2 + '@types/node': 24.13.2 long: 5.3.2 protobufjs@8.0.1: @@ -3555,7 +4351,7 @@ snapshots: '@protobufjs/path': 1.1.2 '@protobufjs/pool': 1.1.0 '@protobufjs/utf8': 1.1.1 - '@types/node': 22.10.2 + '@types/node': 24.13.2 long: 5.3.2 proxy-from-env@2.1.0: {} @@ -3585,6 +4381,27 @@ snapshots: reusify@1.1.0: {} + rolldown@1.0.3: + dependencies: + '@oxc-project/types': 0.133.0 + '@rolldown/pluginutils': 1.0.1 + optionalDependencies: + '@rolldown/binding-android-arm64': 1.0.3 + '@rolldown/binding-darwin-arm64': 1.0.3 + '@rolldown/binding-darwin-x64': 1.0.3 + '@rolldown/binding-freebsd-x64': 1.0.3 + '@rolldown/binding-linux-arm-gnueabihf': 1.0.3 + '@rolldown/binding-linux-arm64-gnu': 1.0.3 + '@rolldown/binding-linux-arm64-musl': 1.0.3 + '@rolldown/binding-linux-ppc64-gnu': 1.0.3 + '@rolldown/binding-linux-s390x-gnu': 1.0.3 + '@rolldown/binding-linux-x64-gnu': 1.0.3 + '@rolldown/binding-linux-x64-musl': 1.0.3 + '@rolldown/binding-openharmony-arm64': 1.0.3 + '@rolldown/binding-wasm32-wasi': 1.0.3 + '@rolldown/binding-win32-arm64-msvc': 1.0.3 + '@rolldown/binding-win32-x64-msvc': 1.0.3 + run-parallel@1.2.0: dependencies: queue-microtask: 1.2.3 @@ -3611,8 +4428,16 @@ snapshots: shell-quote@1.8.4: {} + siginfo@2.0.0: {} + signal-exit@3.0.7: {} + source-map-js@1.2.1: {} + + stackback@0.0.2: {} + + std-env@4.1.0: {} + stream-browserify@3.0.0: dependencies: inherits: 2.0.4 @@ -3638,6 +4463,10 @@ snapshots: dependencies: anynum: 1.0.0 + supports-color@7.2.0: + dependencies: + has-flag: 4.0.0 + tar@7.5.16: dependencies: '@isaacs/fs-minipass': 4.0.1 @@ -3646,6 +4475,17 @@ snapshots: minizlib: 3.1.0 yallist: 5.0.0 + tinybench@2.9.0: {} + + tinyexec@1.2.4: {} + + tinyglobby@0.2.17: + dependencies: + fdir: 6.5.0(picomatch@4.0.4) + picomatch: 4.0.4 + + tinyrainbow@3.1.0: {} + to-regex-range@5.0.1: dependencies: is-number: 7.0.0 @@ -3663,18 +4503,69 @@ snapshots: typebox@1.1.38: {} - undici-types@6.20.0: {} + typescript@5.9.3: {} + + undici-types@7.18.2: {} undici@8.3.0: {} util-deprecate@1.0.2: {} + vite@8.0.16(@types/node@24.13.2)(esbuild@0.23.1)(jiti@2.7.0)(tsx@4.19.2)(yaml@2.9.0): + dependencies: + lightningcss: 1.32.0 + picomatch: 4.0.4 + postcss: 8.5.15 + rolldown: 1.0.3 + tinyglobby: 0.2.17 + optionalDependencies: + '@types/node': 24.13.2 + esbuild: 0.23.1 + fsevents: 2.3.3 + jiti: 2.7.0 + tsx: 4.19.2 + yaml: 2.9.0 + + vitest@4.1.9(@opentelemetry/api@1.9.0)(@types/node@24.13.2)(@vitest/coverage-v8@4.1.9)(vite@8.0.16(@types/node@24.13.2)(esbuild@0.23.1)(jiti@2.7.0)(tsx@4.19.2)(yaml@2.9.0)): + dependencies: + '@vitest/expect': 4.1.9 + '@vitest/mocker': 4.1.9(vite@8.0.16(@types/node@24.13.2)(esbuild@0.23.1)(jiti@2.7.0)(tsx@4.19.2)(yaml@2.9.0)) + '@vitest/pretty-format': 4.1.9 + '@vitest/runner': 4.1.9 + '@vitest/snapshot': 4.1.9 + '@vitest/spy': 4.1.9 + '@vitest/utils': 4.1.9 + es-module-lexer: 2.1.0 + expect-type: 1.3.0 + magic-string: 0.30.21 + obug: 2.1.3 + pathe: 2.0.3 + picomatch: 4.0.4 + std-env: 4.1.0 + tinybench: 2.9.0 + tinyexec: 1.2.4 + tinyglobby: 0.2.17 + tinyrainbow: 3.1.0 + vite: 8.0.16(@types/node@24.13.2)(esbuild@0.23.1)(jiti@2.7.0)(tsx@4.19.2)(yaml@2.9.0) + why-is-node-running: 2.3.0 + optionalDependencies: + '@opentelemetry/api': 1.9.0 + '@types/node': 24.13.2 + '@vitest/coverage-v8': 4.1.9(vitest@4.1.9) + transitivePeerDependencies: + - msw + web-streams-polyfill@3.3.3: {} which@2.0.2: dependencies: isexe: 2.0.0 + why-is-node-running@2.3.0: + dependencies: + siginfo: 2.0.0 + stackback: 0.0.2 + wrap-ansi@7.0.0: dependencies: ansi-styles: 4.3.0 diff --git a/services/agent/src/cli.ts b/services/agent/src/cli.ts index 7f45ebb714..6909992b51 100644 --- a/services/agent/src/cli.ts +++ b/services/agent/src/cli.ts @@ -1,88 +1,109 @@ /** * WP-2 Pi wrapper CLI: the JSON transport for the Harness port. * - * Reads one JSON `AgentRunRequest` from stdin, runs Pi once, and writes one JSON - * `AgentRunResult` to stdout. stdout carries the result and nothing else; logs go - * to stderr. This is the one-shot "json adapter" the design doc describes; a - * long-lived RPC adapter can replace it later behind the same Python-side port. + * Reads one JSON `AgentRunRequest` from stdin, runs the agent once, and writes one JSON + * `AgentRunResult` to stdout. stdout carries the result and nothing else; logs go to stderr. + * With `--stream`, writes NDJSON instead: one `{kind:"event"}` line per event the moment it + * is built, then exactly one terminal `{kind:"result"}` line. + * + * `runCli(raw, stream, io)` is the testable seam: it takes the raw stdin string and an + * injectable engine runner + output sink, and returns the exit code. Tests pass a fake engine + * and a collecting `write`, so no stdin/stdout/process.exit mocking is needed; production + * defaults to the real engine and `process.stdout` (which keeps streaming live). */ import type { AgentRunRequest, AgentRunResult, EmitEvent, - StreamRecord, } from "./protocol.ts"; import { runPi } from "./engines/pi.ts"; import { runRivet } from "./engines/rivet.ts"; +import { isEntrypoint } from "./entry.ts"; -// Engine: `rivet` drives a harness over ACP via a rivet daemon; `pi` (default) is the -// legacy in-process Pi path. The request's `backend` wins, then the AGENT_BACKEND env. -function runAgent( +/** Run one request through an engine. Tests inject a fake to avoid a live harness. */ +export type RunAgent = ( request: AgentRunRequest, emit?: EmitEvent, -): Promise { +) => Promise; + +// Engine: `rivet` drives a harness over ACP via a rivet daemon; `pi` (default) is the +// legacy in-process Pi path. The request's `backend` wins, then the AGENT_BACKEND env. +const runAgent: RunAgent = (request, emit) => { const backend = (request.backend ?? process.env.AGENT_BACKEND ?? "pi").toLowerCase(); return backend === "rivet" ? runRivet(request, emit) : runPi(request, emit); -} +}; -async function readStdin(): Promise { - const chunks: Buffer[] = []; - for await (const chunk of process.stdin) { - chunks.push(chunk as Buffer); - } - return Buffer.concat(chunks).toString("utf8"); +function errorMessage(err: unknown): string { + return err instanceof Error ? err.stack ?? err.message : String(err); } -// One-shot mode: the whole result as a single JSON document (the `/invoke` contract). -function emitResult(result: AgentRunResult): void { - process.stdout.write(JSON.stringify(result)); +export interface CliIO { + /** Engine runner; defaults to the real backend dispatch. */ + run?: RunAgent; + /** Output sink; defaults to `process.stdout`. Called incrementally so streaming stays live. */ + write?: (chunk: string) => void; } -// Streaming mode (`--stream`): one NDJSON record per line — an `{kind:"event"}` line the -// moment each event is built, then exactly one terminal `{kind:"result"}` line. -function writeRecord(record: StreamRecord): void { - process.stdout.write(JSON.stringify(record) + "\n"); -} - -async function main(): Promise { - const stream = process.argv.includes("--stream"); - const raw = await readStdin(); +/** + * Run one request and return the process exit code (0 = ok, 1 = failure/invalid input). + * Output is delivered through `io.write` as it is produced. + */ +export async function runCli( + raw: string, + stream: boolean, + io: CliIO = {}, +): Promise { + const run = io.run ?? runAgent; + const write = io.write ?? ((chunk: string) => void process.stdout.write(chunk)); let request: AgentRunRequest; try { request = raw.trim() ? (JSON.parse(raw) as AgentRunRequest) : {}; } catch (err) { const failure: AgentRunResult = { ok: false, error: `Invalid JSON on stdin: ${String(err)}` }; - if (stream) writeRecord({ kind: "result", result: failure }); - else emitResult(failure); - process.exit(1); + write(stream ? JSON.stringify({ kind: "result", result: failure }) + "\n" : JSON.stringify(failure)); + return 1; } if (!stream) { try { - const result = await runAgent(request); - emitResult(result); - process.exit(result.ok ? 0 : 1); + const result = await run(request); + write(JSON.stringify(result)); + return result.ok ? 0 : 1; } catch (err) { - emitResult({ - ok: false, - error: err instanceof Error ? err.stack ?? err.message : String(err), - }); - process.exit(1); + write(JSON.stringify({ ok: false, error: errorMessage(err) })); + return 1; } - return; } - const emit: EmitEvent = (event) => writeRecord({ kind: "event", event }); + const emit: EmitEvent = (event) => write(JSON.stringify({ kind: "event", event }) + "\n"); let result: AgentRunResult; try { - result = await runAgent(request, emit); + result = await run(request, emit); } catch (err) { - result = { ok: false, error: err instanceof Error ? err.stack ?? err.message : String(err) }; + result = { ok: false, error: errorMessage(err) }; } // Streaming delivered the events live, so don't echo them in the terminal record. - writeRecord({ kind: "result", result: { ...result, events: [] } }); - process.exit(result.ok ? 0 : 1); + write(JSON.stringify({ kind: "result", result: { ...result, events: [] } }) + "\n"); + return result.ok ? 0 : 1; } -main(); +async function readStdin(): Promise { + const chunks: Buffer[] = []; + for await (const chunk of process.stdin) { + chunks.push(chunk as Buffer); + } + return Buffer.concat(chunks).toString("utf8"); +} + +async function main(): Promise { + const stream = process.argv.includes("--stream"); + const raw = await readStdin(); + const code = await runCli(raw, stream); + process.exit(code); +} + +// Only run when this file is the process entry (`tsx src/cli.ts`); importing it is inert. +if (isEntrypoint(import.meta.url)) { + void main(); +} diff --git a/services/agent/src/entry.ts b/services/agent/src/entry.ts new file mode 100644 index 0000000000..877aac822e --- /dev/null +++ b/services/agent/src/entry.ts @@ -0,0 +1,17 @@ +/** + * True when `moduleUrl` is the process entry point, so an entrypoint module runs its `main()` + * under `tsx src/x.ts` but stays inert when imported by a test. Compares the resolved real + * paths of `process.argv[1]` and the module's own file. + */ +import { argv } from "node:process"; +import { realpathSync } from "node:fs"; +import { fileURLToPath } from "node:url"; + +export function isEntrypoint(moduleUrl: string): boolean { + if (!argv[1]) return false; + try { + return realpathSync(argv[1]) === realpathSync(fileURLToPath(moduleUrl)); + } catch { + return false; + } +} diff --git a/services/agent/src/server.ts b/services/agent/src/server.ts index aae23c4480..71c95c0111 100644 --- a/services/agent/src/server.ts +++ b/services/agent/src/server.ts @@ -4,13 +4,21 @@ * Same contract as the CLI, exposed over HTTP so the wrapper can run as its own * container (a sidecar) that the Python service calls in-network: * - * GET /health -> { status: "ok" } + * GET /health -> runner identity ({ status, runner, protocol, engines, harnesses }) * POST /run -> body is an AgentRunRequest, response is an AgentRunResult * * Uses Node's built-in http server (no framework dependency). Pi auth comes from * PI_CODING_AGENT_DIR / ~/.pi/agent, mounted into the container. + * + * `createAgentServer(run)` is the testable seam: it builds the server around an injectable + * engine runner so the HTTP behavior can be tested with a fake engine (no live harness). */ -import { createServer, type IncomingMessage, type ServerResponse } from "node:http"; +import { + createServer, + type IncomingMessage, + type Server, + type ServerResponse, +} from "node:http"; import type { AgentRunRequest, @@ -20,6 +28,8 @@ import type { } from "./protocol.ts"; import { runPi } from "./engines/pi.ts"; import { runRivet } from "./engines/rivet.ts"; +import { runnerInfo } from "./version.ts"; +import { isEntrypoint } from "./entry.ts"; const PORT = Number(process.env.PORT ?? 8765); @@ -29,18 +39,21 @@ const PORT = Number(process.env.PORT ?? 8765); // request shape (a rivet request carries `harness`/`sandbox`). const DEFAULT_BACKEND = (process.env.AGENT_BACKEND ?? "auto").toLowerCase(); -function runAgent( +/** Run one request through an engine. Tests inject a fake to avoid a live harness. */ +export type RunAgent = ( request: AgentRunRequest, emit?: EmitEvent, signal?: AbortSignal, -): Promise { +) => Promise; + +const runAgent: RunAgent = (request, emit, signal) => { const backend = (request.backend ?? DEFAULT_BACKEND).toLowerCase(); if (backend === "rivet") return runRivet(request, emit, signal); if (backend === "pi") return runPi(request, emit); return request.harness || request.sandbox ? runRivet(request, emit, signal) : runPi(request, emit); -} +}; /** * Stream a run as NDJSON: one `{kind:"event"}` line per event the moment it is built, then @@ -48,9 +61,10 @@ function runAgent( * with `Accept: application/x-ndjson`; the one-shot `/run` path is left untouched. */ async function runAndStream( - req: IncomingMessage, + _req: IncomingMessage, res: ServerResponse, request: AgentRunRequest, + run: RunAgent, ): Promise { res.writeHead(200, { "content-type": "application/x-ndjson", @@ -75,7 +89,7 @@ async function runAndStream( let result: AgentRunResult; try { - result = await runAgent(request, emit, controller.signal); + result = await run(request, emit, controller.signal); } catch (err) { const message = err instanceof Error ? err.stack ?? err.message : String(err); result = { ok: false, error: message }; @@ -102,54 +116,68 @@ async function readBody(req: IncomingMessage): Promise { return Buffer.concat(chunks).toString("utf8"); } -const server = createServer(async (req, res) => { - try { - if (req.method === "GET" && req.url === "/health") { - return send(res, 200, { status: "ok" }); - } - - if (req.method === "POST" && req.url === "/run") { - const raw = await readBody(req); - let request: AgentRunRequest; - try { - request = raw.trim() ? (JSON.parse(raw) as AgentRunRequest) : {}; - } catch (err) { - return send(res, 400, { ok: false, error: `Invalid JSON: ${String(err)}` }); +/** Build the HTTP request listener around a given engine runner (the testable seam). */ +export function createRequestListener( + run: RunAgent, +): (req: IncomingMessage, res: ServerResponse) => Promise { + return async (req, res) => { + try { + if (req.method === "GET" && req.url === "/health") { + return send(res, 200, runnerInfo()); } - const wantsStream = (req.headers["accept"] ?? "").includes( - "application/x-ndjson", - ); - if (wantsStream) { - await runAndStream(req, res, request); - return; + if (req.method === "POST" && req.url === "/run") { + const raw = await readBody(req); + let request: AgentRunRequest; + try { + request = raw.trim() ? (JSON.parse(raw) as AgentRunRequest) : {}; + } catch (err) { + return send(res, 400, { ok: false, error: `Invalid JSON: ${String(err)}` }); + } + + const wantsStream = (req.headers["accept"] ?? "").includes( + "application/x-ndjson", + ); + if (wantsStream) { + await runAndStream(req, res, request, run); + return; + } + + const result = await run(request); + return send(res, result.ok ? 200 : 500, result); } - const result = await runAgent(request); - return send(res, result.ok ? 200 : 500, result); + return send(res, 404, { ok: false, error: "Not found" }); + } catch (err) { + const message = err instanceof Error ? err.stack ?? err.message : String(err); + return send(res, 500, { ok: false, error: message }); } + }; +} - return send(res, 404, { ok: false, error: "Not found" }); - } catch (err) { - const message = err instanceof Error ? err.stack ?? err.message : String(err); - return send(res, 500, { ok: false, error: message }); - } -}); - -// The rivet SDK can reject a background promise (e.g. an adapter install or the Daytona -// preview SSE failing) outside any awaited path. Node's default turns that into an -// uncaught exception that kills the whole process — taking every in-flight request with -// it (the caller sees "Server disconnected"). Log and keep serving instead; the failing -// run still returns its own error to its caller. -process.on("unhandledRejection", (reason) => { - process.stderr.write( - `[pi-wrapper] unhandledRejection: ${reason instanceof Error ? (reason.stack ?? reason.message) : String(reason)}\n`, - ); -}); -process.on("uncaughtException", (err) => { - process.stderr.write(`[pi-wrapper] uncaughtException: ${err.stack ?? err.message}\n`); -}); - -server.listen(PORT, () => { - process.stderr.write(`[pi-wrapper] http server listening on :${PORT}\n`); -}); +/** Create the sidecar HTTP server. Defaults to the real engine dispatch; tests pass a fake. */ +export function createAgentServer(run: RunAgent = runAgent): Server { + return createServer(createRequestListener(run)); +} + +// Only run as a server when this file is the process entry (`tsx src/server.ts`); importing +// it (e.g. from a test) is inert. +if (isEntrypoint(import.meta.url)) { + // The rivet SDK can reject a background promise (e.g. an adapter install or the Daytona + // preview SSE failing) outside any awaited path. Node's default turns that into an + // uncaught exception that kills the whole process — taking every in-flight request with + // it (the caller sees "Server disconnected"). Log and keep serving instead; the failing + // run still returns its own error to its caller. + process.on("unhandledRejection", (reason) => { + process.stderr.write( + `[pi-wrapper] unhandledRejection: ${reason instanceof Error ? (reason.stack ?? reason.message) : String(reason)}\n`, + ); + }); + process.on("uncaughtException", (err) => { + process.stderr.write(`[pi-wrapper] uncaughtException: ${err.stack ?? err.message}\n`); + }); + + createAgentServer().listen(PORT, () => { + process.stderr.write(`[pi-wrapper] http server listening on :${PORT}\n`); + }); +} diff --git a/services/agent/src/tools/dispatch.ts b/services/agent/src/tools/dispatch.ts index fd68a87b72..ee9845b9f1 100644 --- a/services/agent/src/tools/dispatch.ts +++ b/services/agent/src/tools/dispatch.ts @@ -85,11 +85,11 @@ export async function relayToolCall( /* best-effort cleanup */ } if (res.ok) return res.text ?? ""; - throw new Error(res.error || `tool relay failed for ${callRef}`); + throw new Error(res.error || `tool relay failed for ${toolName}`); } await sleep(RELAY_POLL_MS); } - throw new Error(`tool relay timed out for ${callRef}`); + throw new Error(`tool relay timed out for ${toolName}`); } /** diff --git a/services/agent/src/version.ts b/services/agent/src/version.ts new file mode 100644 index 0000000000..5c34701fd3 --- /dev/null +++ b/services/agent/src/version.ts @@ -0,0 +1,35 @@ +/** + * Runner identity, surfaced on `GET /health` so a client can detect an incompatible runner + * before the first run (the version-skew guard). + * + * `PROTOCOL_VERSION` is the MAJOR of the `/run` wire contract in `protocol.ts`. Bump it only + * for a change that is not backward compatible; a client that probes `/health` can then + * refuse a runner whose protocol major it does not support. `RUNNER_VERSION` is the package + * version (the build), distinct from the protocol. + */ +import pkg from "../package.json"; + +export const PROTOCOL_VERSION = 1; +export const RUNNER_VERSION: string = pkg.version; +export const ENGINES = ["pi", "rivet"] as const; +export const HARNESSES = ["pi", "claude", "agenta"] as const; + +export interface RunnerInfo { + status: "ok"; + /** Package build version (e.g. "0.1.0"). */ + runner: string; + /** Wire-contract major. A client refuses a major it does not understand. */ + protocol: number; + engines: readonly string[]; + harnesses: readonly string[]; +} + +export function runnerInfo(): RunnerInfo { + return { + status: "ok", + runner: RUNNER_VERSION, + protocol: PROTOCOL_VERSION, + engines: ENGINES, + harnesses: HARNESSES, + }; +} diff --git a/services/agent/test/code-tool.test.ts b/services/agent/test/code-tool.test.ts deleted file mode 100644 index 0711f57b41..0000000000 --- a/services/agent/test/code-tool.test.ts +++ /dev/null @@ -1,92 +0,0 @@ -/** - * Unit test for the code-tool executor (runCodeTool). - * - * Exercises both runtimes end-to-end through real subprocesses: a python tool, node tools - * written as a bare top-level `function main` (the F2 regression) and as an explicit - * `module.exports.main`, an async node `main`, the F3 env-isolation guarantee (provider keys - * do NOT leak in; declared scoped secrets DO), and the non-zero-exit reject path. - * - * Run: pnpm exec tsx test/code-tool.test.ts - */ -import assert from "node:assert/strict"; - -import { runCodeTool } from "../src/tools/code.ts"; - -// --- Python: bare `def main(**kw)` ------------------------------------------ -{ - const code = 'def main(**kw):\n return {"sum": kw.get("a", 0) + kw.get("b", 0)}\n'; - const out = await runCodeTool("python", code, undefined, { a: 2, b: 3 }); - assert.deepEqual(JSON.parse(out), { sum: 5 }, "python bare main returns the right JSON"); -} - -// --- Node: bare top-level `function main` (F2 regression) ------------------- -{ - const code = "function main(inputs) { return { got: inputs }; }"; - const out = await runCodeTool("node", code, undefined, { hello: "world" }); - assert.deepEqual( - JSON.parse(out), - { got: { hello: "world" } }, - "node bare function main executes and echoes the input", - ); -} - -// --- Node: explicit `module.exports.main` ----------------------------------- -{ - const code = "module.exports.main = function (inputs) { return { via: 'exports', got: inputs }; };"; - const out = await runCodeTool("node", code, undefined, { x: 1 }); - assert.deepEqual( - JSON.parse(out), - { via: "exports", got: { x: 1 } }, - "node module.exports.main works", - ); -} - -// --- Node: async `main` returning a Promise --------------------------------- -{ - const code = - "async function main(inputs) { await new Promise((r) => setTimeout(r, 5)); return { doubled: inputs.n * 2 }; }"; - const out = await runCodeTool("node", code, undefined, { n: 21 }); - assert.deepEqual(JSON.parse(out), { doubled: 42 }, "node async main resolves"); -} - -// --- F3: provider keys do NOT leak; scoped secrets DO ----------------------- -{ - const hadKey = "OPENAI_API_KEY" in process.env; - const prevKey = process.env.OPENAI_API_KEY; - process.env.OPENAI_API_KEY = "leak-me-xyz"; - try { - // The provider key sits in process.env but must not reach the snippet. - const leakCode = "function main() { return { key: process.env.OPENAI_API_KEY ?? 'absent' }; }"; - const leakOut = await runCodeTool("node", leakCode, undefined, {}); - assert.deepEqual( - JSON.parse(leakOut), - { key: "absent" }, - "F3: OPENAI_API_KEY did NOT leak into the snippet env", - ); - - // A secret declared on the tool (passed via the scoped `env` arg) must be visible. - const scopedCode = - "function main() { return { secret: process.env.MY_TOOL_SECRET ?? 'absent' }; }"; - const scopedOut = await runCodeTool("node", scopedCode, { MY_TOOL_SECRET: "ok" }, {}); - assert.deepEqual( - JSON.parse(scopedOut), - { secret: "ok" }, - "F3: scoped MY_TOOL_SECRET IS visible to the snippet", - ); - } finally { - if (hadKey) process.env.OPENAI_API_KEY = prevKey; - else delete process.env.OPENAI_API_KEY; - } -} - -// --- Non-zero exit / throw rejects ------------------------------------------ -{ - const code = "function main() { throw new Error('boom'); }"; - await assert.rejects( - () => runCodeTool("node", code, undefined, {}), - /boom|exited/, - "a throwing snippet rejects", - ); -} - -console.log("code-tool.test.ts: all assertions passed"); diff --git a/services/agent/test/continuation.test.ts b/services/agent/test/continuation.test.ts deleted file mode 100644 index c9f9d4356c..0000000000 --- a/services/agent/test/continuation.test.ts +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Unit tests for the cross-turn HITL continuation substrate. - * - * Under the cold model the harness rebuilds context from the replayed transcript, and ACP - * prompt content blocks cannot carry tool calls/results. So a resolved interaction (an - * approved tool that ran, a client-fulfilled tool) must survive into the replay as text. - * `messageTranscript` encodes tool turns; `buildTurnText` keeps them in the replayed history. - * - * Run: pnpm exec tsx test/continuation.test.ts - */ -import assert from "node:assert/strict"; - -import { messageTranscript, buildTurnText } from "../src/engines/rivet.ts"; -import { - resolveRunSessionId, - type AgentRunRequest, - type ContentBlock, -} from "../src/protocol.ts"; - -// --- messageTranscript ------------------------------------------------------- -assert.equal(messageTranscript("hello"), "hello"); -assert.equal(messageTranscript([{ type: "text", text: "a" }, { type: "text", text: "b" }]), "a\nb"); -assert.equal( - messageTranscript([{ type: "tool_call", toolName: "getWeather", input: { city: "Paris" } }]), - '[called getWeather({"city":"Paris"})]', -); -assert.equal( - messageTranscript([{ type: "tool_result", toolName: "getWeather", output: { temp: 24 } }]), - '[getWeather returned: {"temp":24}]', -); -assert.equal( - messageTranscript([{ type: "tool_result", toolName: "send", output: "boom", isError: true }]), - "[send error: boom]", -); - -// --- session id metadata ------------------------------------------------------ -assert.equal( - resolveRunSessionId({ sessionId: "sess_platform" }, "runner-ephemeral"), - "sess_platform", -); -assert.equal(resolveRunSessionId({}, "runner-ephemeral"), "runner-ephemeral"); - -// --- buildTurnText keeps a resolved tool turn in the replay ------------------ -{ - const req: AgentRunRequest = { - messages: [ - { role: "user", content: "weather in Paris?" }, - { - role: "assistant", - content: [{ type: "tool_call", toolName: "getWeather", input: { city: "Paris" } } as ContentBlock], - }, - { - role: "tool", - content: [{ type: "tool_result", toolName: "getWeather", output: { temp: 24 } } as ContentBlock], - }, - { role: "user", content: "and tomorrow?" }, - ], - }; - const text = buildTurnText(req); - assert.ok(text.includes("called getWeather"), "tool call survives replay"); - assert.ok(text.includes("getWeather returned"), "tool result survives replay"); - assert.ok(text.includes("and tomorrow?"), "latest user prompt is the live turn"); - assert.ok(text.startsWith("Conversation so far:"), "transcript header present"); -} - -console.log("continuation.test.ts: all assertions passed"); diff --git a/services/agent/test/extension-tools.test.ts b/services/agent/test/extension-tools.test.ts deleted file mode 100644 index 5db5e22177..0000000000 --- a/services/agent/test/extension-tools.test.ts +++ /dev/null @@ -1,109 +0,0 @@ -/** - * Regression: the Agenta Pi extension registers custom tools from AGENTA_TOOL_PUBLIC_SPECS. - * - * Guards QA finding F-005 (docs/design/agent-workflows/qa/findings.md): a build where the - * extension stopped reading AGENTA_TOOL_PUBLIC_SPECS shipped custom tools that the model never - * saw, so it improvised with bash and failed. This pins the contract at the source: given the - * public-spec env the runner sets (buildPiExtensionEnv in engines/rivet.ts), the extension - * factory calls pi.registerTool once per spec, passes the JSON Schema through, and gives each - * tool an execute() that relays to the runner. It is also inert when the env is absent. - * - * Run: pnpm exec tsx test/extension-tools.test.ts - */ -import assert from "node:assert/strict"; - -import factory from "../src/extensions/agenta.ts"; - -const TOOL_ENV = [ - "AGENTA_TOOL_PUBLIC_SPECS", - "AGENTA_TOOL_RELAY_DIR", - "AGENTA_TRACEPARENT", - "AGENTA_OTLP_ENDPOINT", - "AGENTA_USAGE_OUT", - "AGENTA_CAPTURE_CONTENT", -]; - -function fakePi() { - const registered: any[] = []; - return { - registered, - registerTool(spec: any) { - registered.push(spec); - }, - on() {}, - }; -} - -function clearEnv() { - for (const key of TOOL_ENV) delete process.env[key]; -} - -// --- registers one tool per public spec, schema passed through -------------- -{ - clearEnv(); - process.env.AGENTA_TOOL_PUBLIC_SPECS = JSON.stringify([ - { - name: "secret_math", - description: "qa math", - inputSchema: { - type: "object", - properties: { x: { type: "integer" } }, - required: ["x"], - }, - }, - { name: "no_schema_tool", description: "no schema" }, - ]); - process.env.AGENTA_TOOL_RELAY_DIR = "/tmp/agenta-relay-test"; - - const pi = fakePi(); - factory(pi as any); - - assert.equal(pi.registered.length, 2, "registers one tool per public spec"); - assert.deepEqual( - pi.registered.map((t) => t.name), - ["secret_math", "no_schema_tool"], - "registers each spec by name", - ); - - const math = pi.registered[0]; - assert.equal(math.description, "qa math", "carries the description"); - assert.ok( - math.parameters && math.parameters.properties && math.parameters.properties.x, - "passes the JSON Schema through to Pi", - ); - assert.equal(typeof math.execute, "function", "each tool has an execute() that relays"); - - const noSchema = pi.registered[1]; - assert.ok( - noSchema.parameters, - "a spec without inputSchema falls back to a schema, never undefined", - ); -} - -// --- inert without the tool env (the F-005 bug shape: never delivered) ------ -{ - clearEnv(); - const pi = fakePi(); - factory(pi as any); - assert.equal( - pi.registered.length, - 0, - "no tool env => registers nothing (no silent partial state)", - ); -} - -// --- specs present but relay dir missing => does not register --------------- -{ - clearEnv(); - process.env.AGENTA_TOOL_PUBLIC_SPECS = JSON.stringify([{ name: "x" }]); - const pi = fakePi(); - factory(pi as any); - assert.equal( - pi.registered.length, - 0, - "specs without a relay dir do not register (incomplete wiring is not honored)", - ); -} - -clearEnv(); -console.log("extension-tools.test.ts: all assertions passed"); diff --git a/services/agent/test/mcp-servers.test.ts b/services/agent/test/mcp-servers.test.ts deleted file mode 100644 index 97e821429f..0000000000 --- a/services/agent/test/mcp-servers.test.ts +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Unit tests for the user-declared MCP server conversion (Agent B's Slice 4, wired in rivet). - * - * Agent B's `resolve_mcp_servers` emits the McpServerConfig wire shape - * ({name,transport,command,args,env,url?,tools?}, env as a Record), pinned in the Python - * test_wire_contract. This covers the TS half: converting that to the ACP stdio entry the - * session consumes (env as a {name,value} list), skipping remote/http, and not enforcing the - * per-server tools allowlist over ACP in v1. - * - * Run: pnpm exec tsx test/mcp-servers.test.ts - */ -import assert from "node:assert/strict"; - -import { toAcpMcpServers } from "../src/engines/rivet.ts"; -import type { McpServerConfig } from "../src/protocol.ts"; - -assert.deepEqual(toAcpMcpServers(undefined), [], "undefined -> []"); -assert.deepEqual(toAcpMcpServers([]), [], "[] -> []"); - -// stdio server: env Record -> ACP {name,value} list; defaults applied. -{ - const servers: McpServerConfig[] = [ - { - name: "github", - transport: "stdio", - command: "npx", - args: ["-y", "@modelcontextprotocol/server-github"], - env: { GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_x", LOG_LEVEL: "info" }, - tools: ["create_issue"], // allowlist not enforced over ACP v1 (logged), server still delivered - }, - ]; - const out = toAcpMcpServers(servers); - assert.equal(out.length, 1); - assert.equal(out[0].name, "github"); - assert.equal(out[0].command, "npx"); - assert.deepEqual(out[0].args, ["-y", "@modelcontextprotocol/server-github"]); - assert.deepEqual(out[0].env, [ - { name: "GITHUB_PERSONAL_ACCESS_TOKEN", value: "ghp_x" }, - { name: "LOG_LEVEL", value: "info" }, - ]); -} - -// remote/http is skipped (no auth on the wire by design); stdio without command is skipped. -{ - const out = toAcpMcpServers([ - { name: "remote", transport: "http", url: "https://example.com/mcp" }, - { name: "broken", transport: "stdio" }, // no command - ]); - assert.deepEqual(out, [], "http + command-less stdio both skipped"); -} - -// missing env / args default to empty. -{ - const out = toAcpMcpServers([{ name: "fs", transport: "stdio", command: "mcp-fs" }]); - assert.deepEqual(out, [{ name: "fs", command: "mcp-fs", args: [], env: [] }]); -} - -console.log("mcp-servers.test.ts: all assertions passed"); diff --git a/services/agent/test/responder.test.ts b/services/agent/test/responder.test.ts deleted file mode 100644 index e06ae43e00..0000000000 --- a/services/agent/test/responder.test.ts +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Unit tests for the interaction responder seam and the otel `emitEvent` hook. - * - * Covers the behavior parity of the responder (it replaces the old inline auto-approve in - * rivet.ts) and that an out-of-stream event (an `interaction_request`) routed through - * `emitEvent` lands in both the live sink and the batch `events()` log. No harness, no - * network. - * - * Run: pnpm exec tsx test/responder.test.ts - */ -import assert from "node:assert/strict"; - -import { createRivetOtel } from "../src/tracing/otel.ts"; -import type { AgentEvent } from "../src/protocol.ts"; -import { - PolicyResponder, - decisionToReply, - policyFromRequest, -} from "../src/responder.ts"; - -// --- policyFromRequest ------------------------------------------------------- -{ - delete process.env.AGENTA_RIVET_DENY_PERMISSIONS; - assert.equal(policyFromRequest(undefined), "auto"); - assert.equal(policyFromRequest("auto"), "auto"); - assert.equal(policyFromRequest("deny"), "deny"); - - process.env.AGENTA_RIVET_DENY_PERMISSIONS = "true"; - assert.equal(policyFromRequest(undefined), "deny", "env forces deny"); - assert.equal(policyFromRequest("auto"), "deny", "env overrides auto"); - delete process.env.AGENTA_RIVET_DENY_PERMISSIONS; -} - -// --- decisionToReply (parity with the old inline mapping) -------------------- -{ - assert.equal(decisionToReply("allow", ["always", "once", "reject"]), "always"); - assert.equal(decisionToReply("allow", ["once", "reject"]), "once"); - assert.equal(decisionToReply("allow", []), "once", "allow falls back to once"); - assert.equal(decisionToReply("deny", ["always", "once", "reject"]), "reject"); - assert.equal(decisionToReply("deny", []), "reject", "deny falls back to reject"); -} - -// --- PolicyResponder --------------------------------------------------------- -{ - const auto = new PolicyResponder("auto"); - const deny = new PolicyResponder("deny"); - const req = { id: "p1", availableReplies: ["once", "reject"] }; - assert.equal(await auto.onPermission(req), "allow"); - assert.equal(await deny.onPermission(req), "deny"); -} - -// --- emitEvent: streaming path (sink + batch) -------------------------------- -{ - const emitted: AgentEvent[] = []; - const run = createRivetOtel({ harness: "claude", model: "anthropic/x", emit: (e) => emitted.push(e) }); - run.start({ prompt: "hi" }); - const interaction: AgentEvent = { - type: "interaction_request", - id: "p1", - kind: "permission", - payload: { availableReplies: ["once", "reject"] }, - }; - run.emitEvent(interaction); - - const live = emitted.find((e) => e.type === "interaction_request"); - assert.ok(live, "interaction_request flushed to the live sink"); - assert.equal((live as any).id, "p1"); - assert.ok( - run.events().some((e) => e.type === "interaction_request"), - "interaction_request also recorded in the batch log", - ); -} - -// --- emitEvent: one-shot path (batch only) ----------------------------------- -{ - const run = createRivetOtel({ harness: "claude", model: "anthropic/x" }); - run.start({ prompt: "hi" }); - run.emitEvent({ type: "data", name: "weather", data: { temp: 24 } }); - const ev = run.events().find((e) => e.type === "data"); - assert.ok(ev, "data event recorded with no live sink"); - assert.equal((ev as any).name, "weather"); -} - -console.log("responder.test.ts: all assertions passed"); diff --git a/services/agent/test/stream-events.test.ts b/services/agent/test/stream-events.test.ts deleted file mode 100644 index f27e31fc23..0000000000 --- a/services/agent/test/stream-events.test.ts +++ /dev/null @@ -1,148 +0,0 @@ -/** - * Unit test for the createRivetOtel delta/lifecycle state machine. - * - * Drives `handleUpdate` with a hand-built ACP `session/update` sequence (Claude-style - * cumulative text snapshots, a tool call between two text runs, a reasoning run) and asserts - * the streaming and one-shot event shapes. No harness, no network: spans are built offline - * and never flushed. - * - * Run: pnpm exec tsx test/stream-events.test.ts - */ -import assert from "node:assert/strict"; - -import { createRivetOtel } from "../src/tracing/otel.ts"; -import type { AgentEvent } from "../src/protocol.ts"; - -const textChunk = (text: string) => ({ - sessionUpdate: "agent_message_chunk", - content: { type: "text", text }, -}); -const thoughtChunk = (text: string) => ({ - sessionUpdate: "agent_thought_chunk", - content: { type: "text", text }, -}); -const toolCall = (id: string, title: string, rawInput: unknown) => ({ - sessionUpdate: "tool_call", - toolCallId: id, - title, - rawInput, -}); -const toolDone = (id: string, text: string) => ({ - sessionUpdate: "tool_call_update", - toolCallId: id, - status: "completed", - content: [{ content: { type: "text", text } }], -}); -const usage = () => ({ sessionUpdate: "usage_update", used: 100, cost: { amount: 0.01 } }); - -// The same ACP sequence drives both modes: two text runs around a tool call, then reasoning. -function drive(run: ReturnType): void { - run.start({ prompt: "weather in Paris?" }); - run.handleUpdate(textChunk("Hello ")); // pure delta - run.handleUpdate(textChunk("Hello world")); // cumulative snapshot (Claude-style) - run.handleUpdate(toolCall("call_1", "getWeather", { city: "Paris" })); - run.handleUpdate(toolDone("call_1", "sunny")); - run.handleUpdate(textChunk("Hello world It is sunny.")); // resumes after the tool - run.handleUpdate(thoughtChunk("thinking...")); - run.handleUpdate(usage()); -} - -const types = (events: AgentEvent[]) => events.map((e) => e.type); -const ofType = (events: AgentEvent[], t: T) => - events.filter((e) => e.type === t) as Extract[]; - -// --- Scenario 1: streaming (emit set) --------------------------------------- -{ - const emitted: AgentEvent[] = []; - const run = createRivetOtel({ harness: "claude", model: "anthropic/x", emit: (e) => emitted.push(e) }); - drive(run); - const finalText = run.finish(); - - // No coalesced text events on the streaming path. - assert.equal(ofType(emitted, "message").length, 0, "no coalesced message when streaming"); - assert.equal(ofType(emitted, "thought").length, 0, "no coalesced thought when streaming"); - - // Exactly one terminal done. - assert.equal(ofType(emitted, "done").length, 1, "exactly one done"); - - // Two text blocks (split by the tool call), one reasoning block, balanced start/end. - const mStart = ofType(emitted, "message_start"); - const mEnd = ofType(emitted, "message_end"); - assert.equal(mStart.length, 2, "two message_start"); - assert.equal(mEnd.length, 2, "two message_end"); - assert.deepEqual(mStart.map((e) => e.id), ["msg-0", "msg-1"], "stable monotonic text ids"); - const rStart = ofType(emitted, "reasoning_start"); - const rEnd = ofType(emitted, "reasoning_end"); - assert.equal(rStart.length, 1, "one reasoning_start"); - assert.equal(rEnd.length, 1, "one reasoning_end"); - - // Deltas are pure and reconstruct the full text, with no overlap/repeat. - const text = ofType(emitted, "message_delta").map((e) => e.delta).join(""); - assert.equal(text, "Hello world It is sunny.", "concatenated deltas == full text"); - assert.equal(text, finalText, "deltas match finish() output"); - const reasoning = ofType(emitted, "reasoning_delta").map((e) => e.delta).join(""); - assert.equal(reasoning, "thinking...", "concatenated reasoning deltas"); - - // Ordering invariant: each block's start precedes its deltas precede its end; tool result - // lands before the second text block opens. - const seq = types(emitted); - assert.ok(seq.indexOf("message_end") < seq.indexOf("tool_call"), "first text block closes before the tool call"); - assert.ok(seq.indexOf("tool_result") < seq.lastIndexOf("message_start"), "tool result precedes the second text block"); - for (const id of ["msg-0", "msg-1", "reason-2"]) { - const idxs = emitted - .map((e, i) => ((e as any).id === id ? { i, t: e.type } : null)) - .filter(Boolean) as { i: number; t: string }[]; - assert.ok(idxs[0].t.endsWith("_start"), `${id} starts with *_start`); - assert.ok(idxs[idxs.length - 1].t.endsWith("_end"), `${id} ends with *_end`); - } -} - -// --- Scenario 2: one-shot (no emit) ----------------------------------------- -{ - const run = createRivetOtel({ harness: "claude", model: "anthropic/x" }); - drive(run); - const finalText = run.finish(); - const events = run.events(); - - // Coalesced text/thought, no delta lifecycle events. - const messages = ofType(events, "message"); - assert.equal(messages.length, 1, "one coalesced message"); - assert.equal(messages[0].text, "Hello world It is sunny.", "coalesced text == final"); - assert.equal(messages[0].text, finalText); - assert.equal(ofType(events, "thought").length, 1, "one coalesced thought"); - for (const t of ["message_start", "message_delta", "message_end", "reasoning_start", "reasoning_delta", "reasoning_end"]) { - assert.equal(events.filter((e) => e.type === t).length, 0, `no ${t} on the one-shot path`); - } - - // The structured tool/usage events are still present, with exactly one done. - assert.equal(ofType(events, "tool_call").length, 1, "tool_call present"); - assert.equal(ofType(events, "tool_result").length, 1, "tool_result present"); - assert.equal(ofType(events, "usage").length, 1, "usage present"); - assert.equal(ofType(events, "done").length, 1, "exactly one done"); -} - -// --- Scenario 3: span-less mode still records ACP events --------------------- -{ - const run = createRivetOtel({ harness: "pi", model: "openai-codex/x", emitSpans: false }); - drive(run); - run.setUsage({ input: 4, output: 6, total: 10, cost: 0.02 }); - const finalText = run.finish(); - const events = run.events(); - - assert.equal(finalText, "Hello world It is sunny."); - assert.equal(ofType(events, "message").length, 1, "message present without spans"); - assert.equal(ofType(events, "thought").length, 1, "thought present without spans"); - assert.equal(ofType(events, "tool_call").length, 1, "tool_call present without spans"); - assert.equal(ofType(events, "tool_result").length, 1, "tool_result present without spans"); - const usageEvents = ofType(events, "usage"); - assert.equal(usageEvents.length, 1, "usage present without spans"); - assert.deepEqual( - usageEvents[0], - { type: "usage", input: 4, output: 6, total: 10, cost: 0.02 }, - "final usage replaces stream-only usage before done", - ); - assert.equal(ofType(events, "done").length, 1, "exactly one done without spans"); - assert.ok(types(events).indexOf("usage") < types(events).indexOf("done"), "usage precedes done"); -} - -console.log("stream-events.test.ts: all assertions passed"); diff --git a/services/agent/test/tool-bridge.test.ts b/services/agent/test/tool-bridge.test.ts deleted file mode 100644 index 4dac2b3f9d..0000000000 --- a/services/agent/test/tool-bridge.test.ts +++ /dev/null @@ -1,169 +0,0 @@ -/** - * Unit tests for buildToolMcpServers (the tool MCP bridge attachment decision). - * - * Regression cover for F4: attachment must be decided per tool kind, not on the callback - * endpoint alone. A `code` tool runs locally in mcp-server.ts and needs no endpoint, so a run - * whose tools are all `code` must still attach the `agenta-tools` server. Only `callback`-kind - * tools require AGENTA_TOOL_CALLBACK_ENDPOINT; missing it must degrade those tools, not drop the - * whole server. `client` tools are browser-fulfilled and never justify attaching the bridge. - * - * Run: pnpm exec tsx test/tool-bridge.test.ts - */ -import assert from "node:assert/strict"; - -import { buildToolMcpServers } from "../src/tools/mcp-bridge.ts"; -import type { ResolvedToolSpec, ToolCallbackContext } from "../src/protocol.ts"; - -/** Look up an env var value by name in the ACP {name,value} list (undefined if absent). */ -function envValue( - env: { name: string; value: string }[], - name: string, -): string | undefined { - return env.find((e) => e.name === name)?.value; -} - -const relayDir = "/tmp/agenta-tools"; - -// code-only specs + no callback -> one server, with public specs and relay dir. -{ - const specs: ResolvedToolSpec[] = [ - { - name: "adder", - description: "Add numbers", - kind: "code", - runtime: "python", - code: "def main(**k): return 1", - env: { PRIVATE: "secret" }, - }, - ]; - const out = buildToolMcpServers(specs, relayDir); - assert.equal(out.length, 1, "code-only run still attaches the server"); - assert.equal(out[0].name, "agenta-tools"); - assert.ok( - envValue(out[0].env, "AGENTA_TOOL_PUBLIC_SPECS") !== undefined, - "AGENTA_TOOL_PUBLIC_SPECS is set", - ); - assert.equal( - envValue(out[0].env, "AGENTA_TOOL_CALLBACK_ENDPOINT"), - undefined, - "no endpoint env for code-only run", - ); - assert.equal(envValue(out[0].env, "AGENTA_TOOL_RELAY_DIR"), relayDir); - assert.equal(envValue(out[0].env, "AGENTA_TOOL_CALLBACK_AUTH"), undefined); - assert.equal(envValue(out[0].env, "AGENTA_TOOL_SPECS"), undefined); - // Only public metadata round-trips; private executor fields stay runner-side. - assert.deepEqual(JSON.parse(envValue(out[0].env, "AGENTA_TOOL_PUBLIC_SPECS")!), [ - { name: "adder", description: "Add numbers" }, - ]); -} - -// callback specs + a callback with endpoint -> still no endpoint/auth in child env. -{ - const specs: ResolvedToolSpec[] = [ - { name: "search", kind: "callback", callRef: "composio.search" }, - ]; - const callback: ToolCallbackContext = { - endpoint: "https://agenta.example/tools/call", - authorization: "Bearer tok", - }; - const out = buildToolMcpServers(specs, callback, relayDir); - assert.equal(out.length, 1); - assert.equal( - envValue(out[0].env, "AGENTA_TOOL_CALLBACK_ENDPOINT"), - undefined, - "endpoint env is never exposed to the bridge", - ); - assert.equal( - envValue(out[0].env, "AGENTA_TOOL_CALLBACK_AUTH"), - undefined, - "auth env is never exposed to the bridge", - ); - assert.equal(envValue(out[0].env, "AGENTA_TOOL_RELAY_DIR"), relayDir); -} - -// callback spec + endpoint but no authorization -> still only public metadata + relay dir. -{ - const specs: ResolvedToolSpec[] = [ - { name: "search", kind: "callback", callRef: "composio.search" }, - ]; - const out = buildToolMcpServers(specs, { endpoint: "https://agenta.example/tools/call" }, relayDir); - assert.equal(out.length, 1); - assert.equal( - envValue(out[0].env, "AGENTA_TOOL_CALLBACK_ENDPOINT"), - undefined, - ); - assert.equal( - envValue(out[0].env, "AGENTA_TOOL_CALLBACK_AUTH"), - undefined, - "no AUTH env when authorization absent", - ); -} - -// absent kind defaults to callback (back-compat): endpoint still wired when present. -{ - const specs: ResolvedToolSpec[] = [{ name: "legacy", callRef: "composio.legacy" }]; - const out = buildToolMcpServers(specs, { endpoint: "https://agenta.example/tools/call" }, relayDir); - assert.equal(out.length, 1, "back-compat (no kind) attaches as a callback tool"); - assert.equal( - envValue(out[0].env, "AGENTA_TOOL_CALLBACK_ENDPOINT"), - undefined, - ); -} - -// mixed code+callback specs + NO endpoint -> still one server (so code works), endpoint omitted. -{ - const specs: ResolvedToolSpec[] = [ - { name: "adder", kind: "code", runtime: "python", code: "def main(**k): return 1" }, - { name: "search", kind: "callback", callRef: "composio.search" }, - ]; - const out = buildToolMcpServers(specs, relayDir); - assert.notDeepEqual(out, [], "mixed run with no endpoint must not return []"); - assert.equal(out.length, 1, "still attaches the server so the code tool works"); - assert.equal( - envValue(out[0].env, "AGENTA_TOOL_CALLBACK_ENDPOINT"), - undefined, - "endpoint env omitted when missing", - ); - // Both executable specs are advertised, but only as public metadata. - assert.deepEqual(JSON.parse(envValue(out[0].env, "AGENTA_TOOL_PUBLIC_SPECS")!), [ - { name: "adder" }, - { name: "search" }, - ]); -} - -// empty specs -> []. -assert.deepEqual(buildToolMcpServers([], undefined), [], "empty specs -> []"); - -// client-only specs -> [] (no executable tools; the bridge does not advertise client tools). -{ - const specs: ResolvedToolSpec[] = [{ name: "confirm", kind: "client" }]; - assert.deepEqual( - buildToolMcpServers(specs, undefined), - [], - "client-only -> [] (nothing executable here)", - ); - // Even with an endpoint, client-only stays empty. - assert.deepEqual( - buildToolMcpServers(specs, { endpoint: "https://agenta.example/tools/call" }, relayDir), - [], - "client-only -> [] even with an endpoint", - ); -} - -// client tools alongside an executable one are dropped from AGENTA_TOOL_SPECS, server attaches. -{ - const specs: ResolvedToolSpec[] = [ - { name: "confirm", kind: "client" }, - { name: "adder", kind: "code", runtime: "python", code: "def main(**k): return 1" }, - ]; - const out = buildToolMcpServers(specs, relayDir); - assert.equal(out.length, 1, "executable spec attaches the server"); - const passed: ResolvedToolSpec[] = JSON.parse(envValue(out[0].env, "AGENTA_TOOL_PUBLIC_SPECS")!); - assert.deepEqual( - passed.map((s) => s.name), - ["adder"], - "client spec excluded from the executable list passed to the bridge", - ); -} - -console.log("tool-bridge.test.ts: all assertions passed"); diff --git a/services/agent/test/tool-dispatch.test.ts b/services/agent/test/tool-dispatch.test.ts deleted file mode 100644 index 8ec779d396..0000000000 --- a/services/agent/test/tool-dispatch.test.ts +++ /dev/null @@ -1,85 +0,0 @@ -/** - * Unit tests for the shared tool-dispatch module (tools/dispatch.ts) and its routing. - * - * The kind-dispatch ("branch on spec.kind to execute a resolved tool") used to be duplicated - * across engines/pi.ts, extensions/agenta.ts, and tools/mcp-server.ts. It now lives once in - * `runResolvedTool`. These tests cover both the routing into that function and the call-site - * advertising behavior that stays per-site: - * - buildCustomTools (pi.ts) skips `client` specs, builds a tool per `code`/`callback` spec, - * and skips a `callback` spec with no callback endpoint. - * - runResolvedTool runs a real `code` snippet end-to-end (python) and throws for `client`. - * - * No network and no harness: the `code` path shells out to python3 (available locally); the - * `callback`/relay paths are not exercised here (they need a live /tools/call or a relay dir). - * - * Run: pnpm exec tsx test/tool-dispatch.test.ts - */ -import assert from "node:assert/strict"; - -import { buildCustomTools } from "../src/engines/pi.ts"; -import { runResolvedTool } from "../src/tools/dispatch.ts"; -import type { ResolvedToolSpec, ToolCallbackContext } from "../src/protocol.ts"; - -const callback: ToolCallbackContext = { endpoint: "https://agenta.test/tools/call" }; - -const clientSpec: ResolvedToolSpec = { name: "client_tool", kind: "client" }; -const codeSpec: ResolvedToolSpec = { - name: "code_tool", - kind: "code", - runtime: "python", - code: 'def main(**kw):\n return {"echo": kw}\n', -}; -const callbackSpec: ResolvedToolSpec = { - name: "callback_tool", - kind: "callback", - callRef: "composio.SOME_ACTION", -}; - -// --- buildCustomTools routing ----------------------------------------------- -{ - const tools = buildCustomTools([clientSpec, codeSpec, callbackSpec], callback); - const names = tools.map((t) => t.name); - - // `client` is browser-fulfilled, so it is never registered in-process. - assert.ok(!names.includes("client_tool"), "client spec is skipped"); - // `code` and `callback` each produce exactly one tool with the spec's name. - assert.ok(names.includes("code_tool"), "code spec produces a tool"); - assert.ok(names.includes("callback_tool"), "callback spec produces a tool"); - assert.equal(tools.length, 2, "only the two executable specs produce tools"); -} - -// A `callback` spec with no callback endpoint is skipped (logged), but a sibling `code` -// spec still registers (code never needs the endpoint). -{ - const tools = buildCustomTools([codeSpec, callbackSpec], undefined); - const names = tools.map((t) => t.name); - assert.ok(names.includes("code_tool"), "code spec still registers without an endpoint"); - assert.ok( - !names.includes("callback_tool"), - "callback spec is skipped when no callback endpoint", - ); - assert.equal(tools.length, 1, "only the code spec registers without an endpoint"); -} - -// --- runResolvedTool: code executes; client throws -------------------------- -{ - const text = await runResolvedTool(codeSpec, { greeting: "hi", n: 3 }, { - toolCallId: "call-1", - }); - const parsed = JSON.parse(text); - assert.deepEqual( - parsed, - { echo: { greeting: "hi", n: 3 } }, - "code tool runs the snippet and returns its JSON output containing the input", - ); -} - -{ - await assert.rejects( - () => runResolvedTool(clientSpec, {}, { toolCallId: "call-2" }), - /browser-fulfilled/, - "client tool throws (never executed in-sandbox)", - ); -} - -console.log("tool-dispatch.test.ts: all assertions passed"); diff --git a/services/agent/tests/unit/cli.test.ts b/services/agent/tests/unit/cli.test.ts new file mode 100644 index 0000000000..2481b6895a --- /dev/null +++ b/services/agent/tests/unit/cli.test.ts @@ -0,0 +1,66 @@ +/** + * Unit tests for the stdin/stdout CLI transport via the `runCli(raw, stream, io)` seam. + * + * Injects a FAKE engine and a collecting `write`, so no stdin/stdout/process.exit mocking is + * needed. Covers the one-shot happy path, invalid JSON, a failing result, and the streaming + * order (event lines then exactly one terminal result line). No harness, no process exit. + * + * Run: pnpm test (or: pnpm exec vitest run tests/unit/cli.test.ts) + */ +import { describe, it } from "vitest"; +import assert from "node:assert/strict"; + +import { runCli, type RunAgent } from "../../src/cli.ts"; + +const okRun: RunAgent = async () => ({ ok: true, output: "hi" }); + +function collector() { + const chunks: string[] = []; + return { chunks, write: (s: string) => chunks.push(s), text: () => chunks.join("") }; +} + +describe("runCli", () => { + it("one-shot: writes the result JSON and returns exit 0", async () => { + const out = collector(); + const code = await runCli(JSON.stringify({ backend: "pi" }), false, { run: okRun, write: out.write }); + assert.equal(code, 0); + assert.deepEqual(JSON.parse(out.text()), { ok: true, output: "hi" }); + }); + + it("invalid JSON: returns exit 1 with an error result", async () => { + const out = collector(); + const code = await runCli("{not json", false, { run: okRun, write: out.write }); + assert.equal(code, 1); + const res = JSON.parse(out.text()) as { ok: boolean; error: string }; + assert.equal(res.ok, false); + assert.match(res.error, /Invalid JSON on stdin/); + }); + + it("a failing result returns exit 1", async () => { + const out = collector(); + const code = await runCli("{}", false, { + run: async () => ({ ok: false, error: "boom" }), + write: out.write, + }); + assert.equal(code, 1); + assert.equal((JSON.parse(out.text()) as { error: string }).error, "boom"); + }); + + it("stream: event lines then exactly one terminal result line", async () => { + const out = collector(); + const streamRun: RunAgent = async (_req, emit) => { + emit?.({ type: "message", text: "a" }); + emit?.({ type: "message", text: "b" }); + return { ok: true, output: "ab", events: [{ type: "message", text: "a" }] }; + }; + const code = await runCli("{}", true, { run: streamRun, write: out.write }); + assert.equal(code, 0); + const records = out + .text() + .trim() + .split("\n") + .map((line) => JSON.parse(line) as { kind: string; result?: { events: unknown[] } }); + assert.deepEqual(records.map((r) => r.kind), ["event", "event", "result"]); + assert.deepEqual(records[2].result!.events, [], "terminal result does not echo events"); + }); +}); diff --git a/services/agent/tests/unit/code-tool.test.ts b/services/agent/tests/unit/code-tool.test.ts new file mode 100644 index 0000000000..5a3566614d --- /dev/null +++ b/services/agent/tests/unit/code-tool.test.ts @@ -0,0 +1,89 @@ +/** + * Unit test for the code-tool executor (runCodeTool). + * + * Exercises both runtimes end-to-end through real subprocesses: a python tool, node tools + * written as a bare top-level `function main` (the F2 regression) and as an explicit + * `module.exports.main`, an async node `main`, the F3 env-isolation guarantee (provider keys + * do NOT leak in; declared scoped secrets DO), and the non-zero-exit reject path. + * + * Needs `python3` and `node` on PATH (both present locally and on ubuntu CI runners). + * + * Run: pnpm test (or: pnpm exec vitest run tests/unit/code-tool.test.ts) + */ +import { describe, it } from "vitest"; +import assert from "node:assert/strict"; + +import { runCodeTool } from "../../src/tools/code.ts"; + +describe("runCodeTool", () => { + it("runs a python bare `def main(**kw)`", async () => { + const code = 'def main(**kw):\n return {"sum": kw.get("a", 0) + kw.get("b", 0)}\n'; + const out = await runCodeTool("python", code, undefined, { a: 2, b: 3 }); + assert.deepEqual(JSON.parse(out), { sum: 5 }, "python bare main returns the right JSON"); + }); + + it("runs a node bare top-level `function main` (F2 regression)", async () => { + const code = "function main(inputs) { return { got: inputs }; }"; + const out = await runCodeTool("node", code, undefined, { hello: "world" }); + assert.deepEqual( + JSON.parse(out), + { got: { hello: "world" } }, + "node bare function main executes and echoes the input", + ); + }); + + it("runs a node explicit `module.exports.main`", async () => { + const code = "module.exports.main = function (inputs) { return { via: 'exports', got: inputs }; };"; + const out = await runCodeTool("node", code, undefined, { x: 1 }); + assert.deepEqual( + JSON.parse(out), + { via: "exports", got: { x: 1 } }, + "node module.exports.main works", + ); + }); + + it("runs an async node `main` returning a Promise", async () => { + const code = + "async function main(inputs) { await new Promise((r) => setTimeout(r, 5)); return { doubled: inputs.n * 2 }; }"; + const out = await runCodeTool("node", code, undefined, { n: 21 }); + assert.deepEqual(JSON.parse(out), { doubled: 42 }, "node async main resolves"); + }); + + it("F3: provider keys do NOT leak; scoped secrets DO", async () => { + const hadKey = "OPENAI_API_KEY" in process.env; + const prevKey = process.env.OPENAI_API_KEY; + process.env.OPENAI_API_KEY = "leak-me-xyz"; + try { + // The provider key sits in process.env but must not reach the snippet. + const leakCode = "function main() { return { key: process.env.OPENAI_API_KEY ?? 'absent' }; }"; + const leakOut = await runCodeTool("node", leakCode, undefined, {}); + assert.deepEqual( + JSON.parse(leakOut), + { key: "absent" }, + "F3: OPENAI_API_KEY did NOT leak into the snippet env", + ); + + // A secret declared on the tool (passed via the scoped `env` arg) must be visible. + const scopedCode = + "function main() { return { secret: process.env.MY_TOOL_SECRET ?? 'absent' }; }"; + const scopedOut = await runCodeTool("node", scopedCode, { MY_TOOL_SECRET: "ok" }, {}); + assert.deepEqual( + JSON.parse(scopedOut), + { secret: "ok" }, + "F3: scoped MY_TOOL_SECRET IS visible to the snippet", + ); + } finally { + if (hadKey) process.env.OPENAI_API_KEY = prevKey; + else delete process.env.OPENAI_API_KEY; + } + }); + + it("rejects when the snippet throws / exits non-zero", async () => { + const code = "function main() { throw new Error('boom'); }"; + await assert.rejects( + () => runCodeTool("node", code, undefined, {}), + /boom|exited/, + "a throwing snippet rejects", + ); + }); +}); diff --git a/services/agent/tests/unit/continuation.test.ts b/services/agent/tests/unit/continuation.test.ts new file mode 100644 index 0000000000..9d7215ebec --- /dev/null +++ b/services/agent/tests/unit/continuation.test.ts @@ -0,0 +1,72 @@ +/** + * Unit tests for the cross-turn HITL continuation substrate. + * + * Under the cold model the harness rebuilds context from the replayed transcript, and ACP + * prompt content blocks cannot carry tool calls/results. So a resolved interaction (an + * approved tool that ran, a client-fulfilled tool) must survive into the replay as text. + * `messageTranscript` encodes tool turns; `buildTurnText` keeps them in the replayed history. + * + * Run: pnpm test (or: pnpm exec vitest run tests/unit/continuation.test.ts) + */ +import { describe, it } from "vitest"; +import assert from "node:assert/strict"; + +import { messageTranscript, buildTurnText } from "../../src/engines/rivet.ts"; +import { + resolveRunSessionId, + type AgentRunRequest, + type ContentBlock, +} from "../../src/protocol.ts"; + +describe("messageTranscript", () => { + it("encodes plain text, content blocks, and tool turns", () => { + assert.equal(messageTranscript("hello"), "hello"); + assert.equal(messageTranscript([{ type: "text", text: "a" }, { type: "text", text: "b" }]), "a\nb"); + assert.equal( + messageTranscript([{ type: "tool_call", toolName: "getWeather", input: { city: "Paris" } }]), + '[called getWeather({"city":"Paris"})]', + ); + assert.equal( + messageTranscript([{ type: "tool_result", toolName: "getWeather", output: { temp: 24 } }]), + '[getWeather returned: {"temp":24}]', + ); + assert.equal( + messageTranscript([{ type: "tool_result", toolName: "send", output: "boom", isError: true }]), + "[send error: boom]", + ); + }); +}); + +describe("resolveRunSessionId", () => { + it("prefers the platform session id, falling back to the ephemeral one", () => { + assert.equal( + resolveRunSessionId({ sessionId: "sess_platform" }, "runner-ephemeral"), + "sess_platform", + ); + assert.equal(resolveRunSessionId({}, "runner-ephemeral"), "runner-ephemeral"); + }); +}); + +describe("buildTurnText", () => { + it("keeps a resolved tool turn in the replay", () => { + const req: AgentRunRequest = { + messages: [ + { role: "user", content: "weather in Paris?" }, + { + role: "assistant", + content: [{ type: "tool_call", toolName: "getWeather", input: { city: "Paris" } } as ContentBlock], + }, + { + role: "tool", + content: [{ type: "tool_result", toolName: "getWeather", output: { temp: 24 } } as ContentBlock], + }, + { role: "user", content: "and tomorrow?" }, + ], + }; + const text = buildTurnText(req); + assert.ok(text.includes("called getWeather"), "tool call survives replay"); + assert.ok(text.includes("getWeather returned"), "tool result survives replay"); + assert.ok(text.includes("and tomorrow?"), "latest user prompt is the live turn"); + assert.ok(text.startsWith("Conversation so far:"), "transcript header present"); + }); +}); diff --git a/services/agent/tests/unit/extension-tools.test.ts b/services/agent/tests/unit/extension-tools.test.ts new file mode 100644 index 0000000000..674dad09a6 --- /dev/null +++ b/services/agent/tests/unit/extension-tools.test.ts @@ -0,0 +1,108 @@ +/** + * Regression: the Agenta Pi extension registers custom tools from AGENTA_TOOL_PUBLIC_SPECS. + * + * Guards QA finding F-005 (docs/design/agent-workflows/qa/findings.md): a build where the + * extension stopped reading AGENTA_TOOL_PUBLIC_SPECS shipped custom tools that the model never + * saw, so it improvised with bash and failed. This pins the contract at the source: given the + * public-spec env the runner sets (buildPiExtensionEnv in engines/rivet.ts), the extension + * factory calls pi.registerTool once per spec, passes the JSON Schema through, and gives each + * tool an execute() that relays to the runner. It is also inert when the env is absent. + * + * Run: pnpm test (or: pnpm exec vitest run tests/unit/extension-tools.test.ts) + */ +import { afterEach, describe, it } from "vitest"; +import assert from "node:assert/strict"; + +import factory from "../../src/extensions/agenta.ts"; + +const TOOL_ENV = [ + "AGENTA_TOOL_PUBLIC_SPECS", + "AGENTA_TOOL_RELAY_DIR", + "AGENTA_TRACEPARENT", + "AGENTA_OTLP_ENDPOINT", + "AGENTA_USAGE_OUT", + "AGENTA_CAPTURE_CONTENT", +]; + +function fakePi() { + const registered: any[] = []; + return { + registered, + registerTool(spec: any) { + registered.push(spec); + }, + on() {}, + }; +} + +function clearEnv() { + for (const key of TOOL_ENV) delete process.env[key]; +} + +afterEach(clearEnv); + +describe("agenta extension tool registration", () => { + it("registers one tool per public spec, schema passed through", () => { + clearEnv(); + process.env.AGENTA_TOOL_PUBLIC_SPECS = JSON.stringify([ + { + name: "secret_math", + description: "qa math", + inputSchema: { + type: "object", + properties: { x: { type: "integer" } }, + required: ["x"], + }, + }, + { name: "no_schema_tool", description: "no schema" }, + ]); + process.env.AGENTA_TOOL_RELAY_DIR = "/tmp/agenta-relay-test"; + + const pi = fakePi(); + factory(pi as any); + + assert.equal(pi.registered.length, 2, "registers one tool per public spec"); + assert.deepEqual( + pi.registered.map((t) => t.name), + ["secret_math", "no_schema_tool"], + "registers each spec by name", + ); + + const math = pi.registered[0]; + assert.equal(math.description, "qa math", "carries the description"); + assert.ok( + math.parameters && math.parameters.properties && math.parameters.properties.x, + "passes the JSON Schema through to Pi", + ); + assert.equal(typeof math.execute, "function", "each tool has an execute() that relays"); + + const noSchema = pi.registered[1]; + assert.ok( + noSchema.parameters, + "a spec without inputSchema falls back to a schema, never undefined", + ); + }); + + it("is inert without the tool env (the F-005 bug shape: never delivered)", () => { + clearEnv(); + const pi = fakePi(); + factory(pi as any); + assert.equal( + pi.registered.length, + 0, + "no tool env => registers nothing (no silent partial state)", + ); + }); + + it("does not register when specs are present but the relay dir is missing", () => { + clearEnv(); + process.env.AGENTA_TOOL_PUBLIC_SPECS = JSON.stringify([{ name: "x" }]); + const pi = fakePi(); + factory(pi as any); + assert.equal( + pi.registered.length, + 0, + "specs without a relay dir do not register (incomplete wiring is not honored)", + ); + }); +}); diff --git a/services/agent/tests/unit/mcp-servers.test.ts b/services/agent/tests/unit/mcp-servers.test.ts new file mode 100644 index 0000000000..d77e63297b --- /dev/null +++ b/services/agent/tests/unit/mcp-servers.test.ts @@ -0,0 +1,58 @@ +/** + * Unit tests for the user-declared MCP server conversion (Agent B's Slice 4, wired in rivet). + * + * Agent B's `resolve_mcp_servers` emits the McpServerConfig wire shape + * ({name,transport,command,args,env,url?,tools?}, env as a Record), pinned in the Python + * test_wire_contract. This covers the TS half: converting that to the ACP stdio entry the + * session consumes (env as a {name,value} list), skipping remote/http, and not enforcing the + * per-server tools allowlist over ACP in v1. + * + * Run: pnpm test (or: pnpm exec vitest run tests/unit/mcp-servers.test.ts) + */ +import { describe, it } from "vitest"; +import assert from "node:assert/strict"; + +import { toAcpMcpServers } from "../../src/engines/rivet.ts"; +import type { McpServerConfig } from "../../src/protocol.ts"; + +describe("toAcpMcpServers", () => { + it("maps empty input to []", () => { + assert.deepEqual(toAcpMcpServers(undefined), [], "undefined -> []"); + assert.deepEqual(toAcpMcpServers([]), [], "[] -> []"); + }); + + it("converts a stdio server's env Record to an ACP {name,value} list", () => { + const servers: McpServerConfig[] = [ + { + name: "github", + transport: "stdio", + command: "npx", + args: ["-y", "@modelcontextprotocol/server-github"], + env: { GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_x", LOG_LEVEL: "info" }, + tools: ["create_issue"], // allowlist not enforced over ACP v1 (logged), server still delivered + }, + ]; + const out = toAcpMcpServers(servers); + assert.equal(out.length, 1); + assert.equal(out[0].name, "github"); + assert.equal(out[0].command, "npx"); + assert.deepEqual(out[0].args, ["-y", "@modelcontextprotocol/server-github"]); + assert.deepEqual(out[0].env, [ + { name: "GITHUB_PERSONAL_ACCESS_TOKEN", value: "ghp_x" }, + { name: "LOG_LEVEL", value: "info" }, + ]); + }); + + it("skips remote/http and command-less stdio servers", () => { + const out = toAcpMcpServers([ + { name: "remote", transport: "http", url: "https://example.com/mcp" }, + { name: "broken", transport: "stdio" }, // no command + ]); + assert.deepEqual(out, [], "http + command-less stdio both skipped"); + }); + + it("defaults missing env / args to empty", () => { + const out = toAcpMcpServers([{ name: "fs", transport: "stdio", command: "mcp-fs" }]); + assert.deepEqual(out, [{ name: "fs", command: "mcp-fs", args: [], env: [] }]); + }); +}); diff --git a/services/agent/tests/unit/responder.test.ts b/services/agent/tests/unit/responder.test.ts new file mode 100644 index 0000000000..ebe4eb0412 --- /dev/null +++ b/services/agent/tests/unit/responder.test.ts @@ -0,0 +1,92 @@ +/** + * Unit tests for the interaction responder seam and the otel `emitEvent` hook. + * + * Covers the behavior parity of the responder (it replaces the old inline auto-approve in + * rivet.ts) and that an out-of-stream event (an `interaction_request`) routed through + * `emitEvent` lands in both the live sink and the batch `events()` log. No harness, no + * network. + * + * Run: pnpm test (or: pnpm exec vitest run tests/unit/responder.test.ts) + */ +import { afterEach, describe, it } from "vitest"; +import assert from "node:assert/strict"; + +import { createRivetOtel } from "../../src/tracing/otel.ts"; +import type { AgentEvent } from "../../src/protocol.ts"; +import { + PolicyResponder, + decisionToReply, + policyFromRequest, +} from "../../src/responder.ts"; + +// Defensive cleanup: policyFromRequest reads this env var; never let it leak past a test +// (e.g. if an assertion throws mid-test, before the inline delete runs). +afterEach(() => { + delete process.env.AGENTA_RIVET_DENY_PERMISSIONS; +}); + +describe("policyFromRequest", () => { + it("honors the arg and the env override", () => { + delete process.env.AGENTA_RIVET_DENY_PERMISSIONS; + assert.equal(policyFromRequest(undefined), "auto"); + assert.equal(policyFromRequest("auto"), "auto"); + assert.equal(policyFromRequest("deny"), "deny"); + + process.env.AGENTA_RIVET_DENY_PERMISSIONS = "true"; + assert.equal(policyFromRequest(undefined), "deny", "env forces deny"); + assert.equal(policyFromRequest("auto"), "deny", "env overrides auto"); + delete process.env.AGENTA_RIVET_DENY_PERMISSIONS; + }); +}); + +describe("decisionToReply (parity with the old inline mapping)", () => { + it("maps allow/deny onto the available replies", () => { + assert.equal(decisionToReply("allow", ["always", "once", "reject"]), "always"); + assert.equal(decisionToReply("allow", ["once", "reject"]), "once"); + assert.equal(decisionToReply("allow", []), "once", "allow falls back to once"); + assert.equal(decisionToReply("deny", ["always", "once", "reject"]), "reject"); + assert.equal(decisionToReply("deny", []), "reject", "deny falls back to reject"); + }); +}); + +describe("PolicyResponder", () => { + it("auto allows and deny denies", async () => { + const auto = new PolicyResponder("auto"); + const deny = new PolicyResponder("deny"); + const req = { id: "p1", availableReplies: ["once", "reject"] }; + assert.equal(await auto.onPermission(req), "allow"); + assert.equal(await deny.onPermission(req), "deny"); + }); +}); + +describe("emitEvent", () => { + it("streaming path: flushes to the live sink and the batch log", () => { + const emitted: AgentEvent[] = []; + const run = createRivetOtel({ harness: "claude", model: "anthropic/x", emit: (e) => emitted.push(e) }); + run.start({ prompt: "hi" }); + const interaction: AgentEvent = { + type: "interaction_request", + id: "p1", + kind: "permission", + payload: { availableReplies: ["once", "reject"] }, + }; + run.emitEvent(interaction); + + const live = emitted.find((e) => e.type === "interaction_request"); + assert.ok(live, "interaction_request flushed to the live sink"); + assert.equal((live as any).id, "p1"); + assert.ok( + run.events().some((e) => e.type === "interaction_request"), + "interaction_request also recorded in the batch log", + ); + }); + + it("one-shot path: records in the batch log only", () => { + const run = createRivetOtel({ harness: "claude", model: "anthropic/x" }); + run.start({ prompt: "hi" }); + run.emitEvent({ type: "data", name: "weather", data: { temp: 24 } }); + const ev = run.events().find((e) => e.type === "data"); + assert.ok(ev, "data event recorded with no live sink"); + assert.equal((ev as any).name, "weather"); + }); +}); diff --git a/services/agent/tests/unit/server.test.ts b/services/agent/tests/unit/server.test.ts new file mode 100644 index 0000000000..badf61db2c --- /dev/null +++ b/services/agent/tests/unit/server.test.ts @@ -0,0 +1,109 @@ +/** + * Unit tests for the HTTP transport via the `createAgentServer(run)` seam. + * + * Starts a real server on an ephemeral port with a FAKE engine (no Pi/Claude/rivet) and makes + * real requests. Covers /health, the /run happy path, invalid JSON (400), a failing result + * (500), and the NDJSON streaming order (events first, then exactly one terminal result). + * + * Run: pnpm test (or: pnpm exec vitest run tests/unit/server.test.ts) + */ +import { describe, it } from "vitest"; +import assert from "node:assert/strict"; +import type { AddressInfo } from "node:net"; + +import { createAgentServer, type RunAgent } from "../../src/server.ts"; + +async function listen(run: RunAgent): Promise<{ url: string; close: () => Promise }> { + const server = createAgentServer(run); + await new Promise((resolve) => server.listen(0, "127.0.0.1", resolve)); + const { port } = server.address() as AddressInfo; + return { + url: `http://127.0.0.1:${port}`, + close: () => new Promise((resolve) => server.close(() => resolve())), + }; +} + +const okRun: RunAgent = async () => ({ ok: true, output: "hi", events: [] }); + +describe("createAgentServer", () => { + it("GET /health returns runner identity", async () => { + const s = await listen(okRun); + try { + const res = await fetch(`${s.url}/health`); + assert.equal(res.status, 200); + const body = (await res.json()) as Record; + assert.equal(body.status, "ok"); + assert.equal(typeof body.runner, "string"); + assert.equal(typeof body.protocol, "number"); + assert.ok(Array.isArray(body.engines) && (body.engines as unknown[]).includes("pi")); + assert.ok(Array.isArray(body.harnesses)); + } finally { + await s.close(); + } + }); + + it("POST /run returns the engine result (200)", async () => { + const s = await listen(okRun); + try { + const res = await fetch(`${s.url}/run`, { method: "POST", body: JSON.stringify({ backend: "pi" }) }); + assert.equal(res.status, 200); + const body = (await res.json()) as { ok: boolean; output: string }; + assert.equal(body.ok, true); + assert.equal(body.output, "hi"); + } finally { + await s.close(); + } + }); + + it("POST /run with invalid JSON returns 400", async () => { + const s = await listen(okRun); + try { + const res = await fetch(`${s.url}/run`, { method: "POST", body: "{not json" }); + assert.equal(res.status, 400); + const body = (await res.json()) as { ok: boolean; error: string }; + assert.equal(body.ok, false); + assert.match(body.error, /Invalid JSON/); + } finally { + await s.close(); + } + }); + + it("a failing result returns 500", async () => { + const failRun: RunAgent = async () => ({ ok: false, error: "boom" }); + const s = await listen(failRun); + try { + const res = await fetch(`${s.url}/run`, { method: "POST", body: "{}" }); + assert.equal(res.status, 500); + const body = (await res.json()) as { ok: boolean; error: string }; + assert.equal(body.ok, false); + assert.equal(body.error, "boom"); + } finally { + await s.close(); + } + }); + + it("NDJSON stream: events first, then exactly one terminal result with no echoed events", async () => { + const streamRun: RunAgent = async (_req, emit) => { + emit?.({ type: "message", text: "a" }); + emit?.({ type: "message", text: "b" }); + return { ok: true, output: "ab", events: [{ type: "message", text: "a" }] }; + }; + const s = await listen(streamRun); + try { + const res = await fetch(`${s.url}/run`, { + method: "POST", + headers: { accept: "application/x-ndjson" }, + body: "{}", + }); + assert.equal(res.status, 200); + const records = (await res.text()) + .trim() + .split("\n") + .map((line) => JSON.parse(line) as { kind: string; result?: { events: unknown[] } }); + assert.deepEqual(records.map((r) => r.kind), ["event", "event", "result"]); + assert.deepEqual(records[2].result!.events, [], "terminal result does not echo events"); + } finally { + await s.close(); + } + }); +}); diff --git a/services/agent/tests/unit/stream-events.test.ts b/services/agent/tests/unit/stream-events.test.ts new file mode 100644 index 0000000000..ff9bd1437b --- /dev/null +++ b/services/agent/tests/unit/stream-events.test.ts @@ -0,0 +1,146 @@ +/** + * Unit test for the createRivetOtel delta/lifecycle state machine. + * + * Drives `handleUpdate` with a hand-built ACP `session/update` sequence (Claude-style + * cumulative text snapshots, a tool call between two text runs, a reasoning run) and asserts + * the streaming and one-shot event shapes. No harness, no network: spans are built offline + * and never flushed. + * + * Run: pnpm test (or: pnpm exec vitest run tests/unit/stream-events.test.ts) + */ +import { describe, it } from "vitest"; +import assert from "node:assert/strict"; + +import { createRivetOtel } from "../../src/tracing/otel.ts"; +import type { AgentEvent } from "../../src/protocol.ts"; + +const textChunk = (text: string) => ({ + sessionUpdate: "agent_message_chunk", + content: { type: "text", text }, +}); +const thoughtChunk = (text: string) => ({ + sessionUpdate: "agent_thought_chunk", + content: { type: "text", text }, +}); +const toolCall = (id: string, title: string, rawInput: unknown) => ({ + sessionUpdate: "tool_call", + toolCallId: id, + title, + rawInput, +}); +const toolDone = (id: string, text: string) => ({ + sessionUpdate: "tool_call_update", + toolCallId: id, + status: "completed", + content: [{ content: { type: "text", text } }], +}); +const usage = () => ({ sessionUpdate: "usage_update", used: 100, cost: { amount: 0.01 } }); + +// The same ACP sequence drives both modes: two text runs around a tool call, then reasoning. +function drive(run: ReturnType): void { + run.start({ prompt: "weather in Paris?" }); + run.handleUpdate(textChunk("Hello ")); // pure delta + run.handleUpdate(textChunk("Hello world")); // cumulative snapshot (Claude-style) + run.handleUpdate(toolCall("call_1", "getWeather", { city: "Paris" })); + run.handleUpdate(toolDone("call_1", "sunny")); + run.handleUpdate(textChunk("Hello world It is sunny.")); // resumes after the tool + run.handleUpdate(thoughtChunk("thinking...")); + run.handleUpdate(usage()); +} + +const types = (events: AgentEvent[]) => events.map((e) => e.type); +const ofType = (events: AgentEvent[], t: T) => + events.filter((e) => e.type === t) as Extract[]; + +describe("createRivetOtel state machine", () => { + it("scenario 1: streaming (emit set) yields pure deltas and balanced lifecycle", () => { + const emitted: AgentEvent[] = []; + const run = createRivetOtel({ harness: "claude", model: "anthropic/x", emit: (e) => emitted.push(e) }); + drive(run); + const finalText = run.finish(); + + // No coalesced text events on the streaming path. + assert.equal(ofType(emitted, "message").length, 0, "no coalesced message when streaming"); + assert.equal(ofType(emitted, "thought").length, 0, "no coalesced thought when streaming"); + + // Exactly one terminal done. + assert.equal(ofType(emitted, "done").length, 1, "exactly one done"); + + // Two text blocks (split by the tool call), one reasoning block, balanced start/end. + const mStart = ofType(emitted, "message_start"); + const mEnd = ofType(emitted, "message_end"); + assert.equal(mStart.length, 2, "two message_start"); + assert.equal(mEnd.length, 2, "two message_end"); + assert.deepEqual(mStart.map((e) => e.id), ["msg-0", "msg-1"], "stable monotonic text ids"); + const rStart = ofType(emitted, "reasoning_start"); + const rEnd = ofType(emitted, "reasoning_end"); + assert.equal(rStart.length, 1, "one reasoning_start"); + assert.equal(rEnd.length, 1, "one reasoning_end"); + + // Deltas are pure and reconstruct the full text, with no overlap/repeat. + const text = ofType(emitted, "message_delta").map((e) => e.delta).join(""); + assert.equal(text, "Hello world It is sunny.", "concatenated deltas == full text"); + assert.equal(text, finalText, "deltas match finish() output"); + const reasoning = ofType(emitted, "reasoning_delta").map((e) => e.delta).join(""); + assert.equal(reasoning, "thinking...", "concatenated reasoning deltas"); + + // Ordering invariant: each block's start precedes its deltas precede its end; tool result + // lands before the second text block opens. + const seq = types(emitted); + assert.ok(seq.indexOf("message_end") < seq.indexOf("tool_call"), "first text block closes before the tool call"); + assert.ok(seq.indexOf("tool_result") < seq.lastIndexOf("message_start"), "tool result precedes the second text block"); + for (const id of ["msg-0", "msg-1", "reason-2"]) { + const idxs = emitted + .map((e, i) => ((e as any).id === id ? { i, t: e.type } : null)) + .filter(Boolean) as { i: number; t: string }[]; + assert.ok(idxs[0].t.endsWith("_start"), `${id} starts with *_start`); + assert.ok(idxs[idxs.length - 1].t.endsWith("_end"), `${id} ends with *_end`); + } + }); + + it("scenario 2: one-shot (no emit) coalesces text/thought and keeps structured events", () => { + const run = createRivetOtel({ harness: "claude", model: "anthropic/x" }); + drive(run); + const finalText = run.finish(); + const events = run.events(); + + // Coalesced text/thought, no delta lifecycle events. + const messages = ofType(events, "message"); + assert.equal(messages.length, 1, "one coalesced message"); + assert.equal(messages[0].text, "Hello world It is sunny.", "coalesced text == final"); + assert.equal(messages[0].text, finalText); + assert.equal(ofType(events, "thought").length, 1, "one coalesced thought"); + for (const t of ["message_start", "message_delta", "message_end", "reasoning_start", "reasoning_delta", "reasoning_end"]) { + assert.equal(events.filter((e) => e.type === t).length, 0, `no ${t} on the one-shot path`); + } + + // The structured tool/usage events are still present, with exactly one done. + assert.equal(ofType(events, "tool_call").length, 1, "tool_call present"); + assert.equal(ofType(events, "tool_result").length, 1, "tool_result present"); + assert.equal(ofType(events, "usage").length, 1, "usage present"); + assert.equal(ofType(events, "done").length, 1, "exactly one done"); + }); + + it("scenario 3: span-less mode still records ACP events and final usage", () => { + const run = createRivetOtel({ harness: "pi", model: "openai-codex/x", emitSpans: false }); + drive(run); + run.setUsage({ input: 4, output: 6, total: 10, cost: 0.02 }); + const finalText = run.finish(); + const events = run.events(); + + assert.equal(finalText, "Hello world It is sunny."); + assert.equal(ofType(events, "message").length, 1, "message present without spans"); + assert.equal(ofType(events, "thought").length, 1, "thought present without spans"); + assert.equal(ofType(events, "tool_call").length, 1, "tool_call present without spans"); + assert.equal(ofType(events, "tool_result").length, 1, "tool_result present without spans"); + const usageEvents = ofType(events, "usage"); + assert.equal(usageEvents.length, 1, "usage present without spans"); + assert.deepEqual( + usageEvents[0], + { type: "usage", input: 4, output: 6, total: 10, cost: 0.02 }, + "final usage replaces stream-only usage before done", + ); + assert.equal(ofType(events, "done").length, 1, "exactly one done without spans"); + assert.ok(types(events).indexOf("usage") < types(events).indexOf("done"), "usage precedes done"); + }); +}); diff --git a/services/agent/tests/unit/tool-bridge.test.ts b/services/agent/tests/unit/tool-bridge.test.ts new file mode 100644 index 0000000000..fcd7eb6a13 --- /dev/null +++ b/services/agent/tests/unit/tool-bridge.test.ts @@ -0,0 +1,157 @@ +/** + * Unit tests for buildToolMcpServers (the tool MCP bridge attachment decision). + * + * Regression cover for F4: attachment must be decided per tool kind, not on the callback + * endpoint alone. A `code` tool runs locally in mcp-server.ts and needs no endpoint, so a run + * whose tools are all `code` must still attach the `agenta-tools` server. Only `callback`-kind + * tools require AGENTA_TOOL_CALLBACK_ENDPOINT; missing it must degrade those tools, not drop the + * whole server. `client` tools are browser-fulfilled and never justify attaching the bridge. + * + * Run: pnpm test (or: pnpm exec vitest run tests/unit/tool-bridge.test.ts) + */ +import { describe, it } from "vitest"; +import assert from "node:assert/strict"; + +import { buildToolMcpServers } from "../../src/tools/mcp-bridge.ts"; +import type { ResolvedToolSpec, ToolCallbackContext } from "../../src/protocol.ts"; + +/** Look up an env var value by name in the ACP {name,value} list (undefined if absent). */ +function envValue( + env: { name: string; value: string }[], + name: string, +): string | undefined { + return env.find((e) => e.name === name)?.value; +} + +const relayDir = "/tmp/agenta-tools"; + +describe("buildToolMcpServers", () => { + it("attaches the server for a code-only run, with public specs and relay dir", () => { + const specs: ResolvedToolSpec[] = [ + { + name: "adder", + description: "Add numbers", + kind: "code", + runtime: "python", + code: "def main(**k): return 1", + env: { PRIVATE: "secret" }, + }, + ]; + const out = buildToolMcpServers(specs, relayDir); + assert.equal(out.length, 1, "code-only run still attaches the server"); + assert.equal(out[0].name, "agenta-tools"); + assert.ok( + envValue(out[0].env, "AGENTA_TOOL_PUBLIC_SPECS") !== undefined, + "AGENTA_TOOL_PUBLIC_SPECS is set", + ); + assert.equal( + envValue(out[0].env, "AGENTA_TOOL_CALLBACK_ENDPOINT"), + undefined, + "no endpoint env for code-only run", + ); + assert.equal(envValue(out[0].env, "AGENTA_TOOL_RELAY_DIR"), relayDir); + assert.equal(envValue(out[0].env, "AGENTA_TOOL_CALLBACK_AUTH"), undefined); + assert.equal(envValue(out[0].env, "AGENTA_TOOL_SPECS"), undefined); + // Only public metadata round-trips; private executor fields stay runner-side. + assert.deepEqual(JSON.parse(envValue(out[0].env, "AGENTA_TOOL_PUBLIC_SPECS")!), [ + { name: "adder", description: "Add numbers" }, + ]); + }); + + it("never exposes endpoint/auth env to the bridge child (callback + full callback)", () => { + const specs: ResolvedToolSpec[] = [ + { name: "search", kind: "callback", callRef: "composio.search" }, + ]; + const callback: ToolCallbackContext = { + endpoint: "https://agenta.example/tools/call", + authorization: "Bearer tok", + }; + const out = buildToolMcpServers(specs, callback, relayDir); + assert.equal(out.length, 1); + assert.equal( + envValue(out[0].env, "AGENTA_TOOL_CALLBACK_ENDPOINT"), + undefined, + "endpoint env is never exposed to the bridge", + ); + assert.equal( + envValue(out[0].env, "AGENTA_TOOL_CALLBACK_AUTH"), + undefined, + "auth env is never exposed to the bridge", + ); + assert.equal(envValue(out[0].env, "AGENTA_TOOL_RELAY_DIR"), relayDir); + }); + + it("omits AUTH env when authorization is absent (endpoint but no auth)", () => { + const specs: ResolvedToolSpec[] = [ + { name: "search", kind: "callback", callRef: "composio.search" }, + ]; + const out = buildToolMcpServers(specs, { endpoint: "https://agenta.example/tools/call" }, relayDir); + assert.equal(out.length, 1); + assert.equal(envValue(out[0].env, "AGENTA_TOOL_CALLBACK_ENDPOINT"), undefined); + assert.equal( + envValue(out[0].env, "AGENTA_TOOL_CALLBACK_AUTH"), + undefined, + "no AUTH env when authorization absent", + ); + }); + + it("treats an absent kind as callback (back-compat)", () => { + const specs: ResolvedToolSpec[] = [{ name: "legacy", callRef: "composio.legacy" }]; + const out = buildToolMcpServers(specs, { endpoint: "https://agenta.example/tools/call" }, relayDir); + assert.equal(out.length, 1, "back-compat (no kind) attaches as a callback tool"); + assert.equal(envValue(out[0].env, "AGENTA_TOOL_CALLBACK_ENDPOINT"), undefined); + }); + + it("attaches one server for a mixed code+callback run with no endpoint", () => { + const specs: ResolvedToolSpec[] = [ + { name: "adder", kind: "code", runtime: "python", code: "def main(**k): return 1" }, + { name: "search", kind: "callback", callRef: "composio.search" }, + ]; + const out = buildToolMcpServers(specs, relayDir); + assert.notDeepEqual(out, [], "mixed run with no endpoint must not return []"); + assert.equal(out.length, 1, "still attaches the server so the code tool works"); + assert.equal( + envValue(out[0].env, "AGENTA_TOOL_CALLBACK_ENDPOINT"), + undefined, + "endpoint env omitted when missing", + ); + // Both executable specs are advertised, but only as public metadata. + assert.deepEqual(JSON.parse(envValue(out[0].env, "AGENTA_TOOL_PUBLIC_SPECS")!), [ + { name: "adder" }, + { name: "search" }, + ]); + }); + + it("returns [] for empty specs", () => { + assert.deepEqual(buildToolMcpServers([], undefined), [], "empty specs -> []"); + }); + + it("returns [] for client-only specs (nothing executable, even with an endpoint)", () => { + const specs: ResolvedToolSpec[] = [{ name: "confirm", kind: "client" }]; + assert.deepEqual( + buildToolMcpServers(specs, undefined), + [], + "client-only -> [] (nothing executable here)", + ); + assert.deepEqual( + buildToolMcpServers(specs, { endpoint: "https://agenta.example/tools/call" }, relayDir), + [], + "client-only -> [] even with an endpoint", + ); + }); + + it("drops client tools from the advertised list but still attaches for an executable sibling", () => { + const specs: ResolvedToolSpec[] = [ + { name: "confirm", kind: "client" }, + { name: "adder", kind: "code", runtime: "python", code: "def main(**k): return 1" }, + ]; + const out = buildToolMcpServers(specs, relayDir); + assert.equal(out.length, 1, "executable spec attaches the server"); + const passed: ResolvedToolSpec[] = JSON.parse(envValue(out[0].env, "AGENTA_TOOL_PUBLIC_SPECS")!); + assert.deepEqual( + passed.map((s) => s.name), + ["adder"], + "client spec excluded from the executable list passed to the bridge", + ); + }); +}); diff --git a/services/agent/tests/unit/tool-dispatch.test.ts b/services/agent/tests/unit/tool-dispatch.test.ts new file mode 100644 index 0000000000..af27dc991f --- /dev/null +++ b/services/agent/tests/unit/tool-dispatch.test.ts @@ -0,0 +1,123 @@ +/** + * Unit tests for the shared tool-dispatch module (tools/dispatch.ts) and its routing. + * + * The kind-dispatch ("branch on spec.kind to execute a resolved tool") used to be duplicated + * across engines/pi.ts, extensions/agenta.ts, and tools/mcp-server.ts. It now lives once in + * `runResolvedTool`. These tests cover both the routing into that function and the call-site + * advertising behavior that stays per-site: + * - buildCustomTools (pi.ts) skips `client` specs, builds a tool per `code`/`callback` spec, + * and skips a `callback` spec with no callback endpoint. + * - runResolvedTool runs a real `code` snippet end-to-end (python) and throws for `client`. + * + * No network and no harness: the `code` path shells out to python3 (available locally); the + * `callback`/relay paths are not exercised here (they need a live /tools/call or a relay dir). + * + * Run: pnpm test (or: pnpm exec vitest run tests/unit/tool-dispatch.test.ts) + */ +import { describe, it } from "vitest"; +import assert from "node:assert/strict"; +import { mkdtempSync, rmSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { buildCustomTools } from "../../src/engines/pi.ts"; +import { relayToolCall, runResolvedTool } from "../../src/tools/dispatch.ts"; +import { RELAY_RES_SUFFIX, sanitizeRelayId } from "../../src/tools/relay.ts"; +import type { ResolvedToolSpec, ToolCallbackContext } from "../../src/protocol.ts"; + +const callback: ToolCallbackContext = { endpoint: "https://agenta.test/tools/call" }; + +const clientSpec: ResolvedToolSpec = { name: "client_tool", kind: "client" }; +const codeSpec: ResolvedToolSpec = { + name: "code_tool", + kind: "code", + runtime: "python", + code: 'def main(**kw):\n return {"echo": kw}\n', +}; +const callbackSpec: ResolvedToolSpec = { + name: "callback_tool", + kind: "callback", + callRef: "composio.SOME_ACTION", +}; + +describe("buildCustomTools routing", () => { + it("skips client specs and builds one tool per code/callback spec", () => { + const tools = buildCustomTools([clientSpec, codeSpec, callbackSpec], callback); + const names = tools.map((t) => t.name); + + // `client` is browser-fulfilled, so it is never registered in-process. + assert.ok(!names.includes("client_tool"), "client spec is skipped"); + // `code` and `callback` each produce exactly one tool with the spec's name. + assert.ok(names.includes("code_tool"), "code spec produces a tool"); + assert.ok(names.includes("callback_tool"), "callback spec produces a tool"); + assert.equal(tools.length, 2, "only the two executable specs produce tools"); + }); + + it("skips a callback spec with no endpoint but keeps a sibling code spec", () => { + const tools = buildCustomTools([codeSpec, callbackSpec], undefined); + const names = tools.map((t) => t.name); + assert.ok(names.includes("code_tool"), "code spec still registers without an endpoint"); + assert.ok( + !names.includes("callback_tool"), + "callback spec is skipped when no callback endpoint", + ); + assert.equal(tools.length, 1, "only the code spec registers without an endpoint"); + }); +}); + +describe("runResolvedTool", () => { + it("runs a code spec end-to-end (python)", async () => { + const text = await runResolvedTool(codeSpec, { greeting: "hi", n: 3 }, { + toolCallId: "call-1", + }); + const parsed = JSON.parse(text); + assert.deepEqual( + parsed, + { echo: { greeting: "hi", n: 3 } }, + "code tool runs the snippet and returns its JSON output containing the input", + ); + }); + + it("throws for a client spec (never executed in-sandbox)", async () => { + await assert.rejects( + () => runResolvedTool(clientSpec, {}, { toolCallId: "call-2" }), + /browser-fulfilled/, + "client tool throws (never executed in-sandbox)", + ); + }); +}); + +// Directly exercises the Daytona file-relay path (the code site of the fixed `callRef` bug): +// pre-write the response file the runner watches for, then call relayToolCall and read it back. +describe("relayToolCall (Daytona file relay)", () => { + it("returns the relayed text when the response is ok", async () => { + const dir = mkdtempSync(join(tmpdir(), "agenta-relay-test-")); + try { + const toolCallId = "call-ok"; + const resPath = join(dir, sanitizeRelayId(toolCallId) + RELAY_RES_SUFFIX); + writeFileSync(resPath, JSON.stringify({ ok: true, text: "relayed-ok" })); + const out = await relayToolCall(dir, "myTool", toolCallId, { a: 1 }); + assert.equal(out, "relayed-ok"); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); + + it("reports the tool name on an empty relay error (regression for the callRef bug)", async () => { + const dir = mkdtempSync(join(tmpdir(), "agenta-relay-test-")); + try { + const toolCallId = "call-err"; + const resPath = join(dir, sanitizeRelayId(toolCallId) + RELAY_RES_SUFFIX); + // ok:false with an empty error string forces the fallback message, which referenced the + // undefined `callRef` before the fix and would have thrown a ReferenceError instead. + writeFileSync(resPath, JSON.stringify({ ok: false, error: "" })); + await assert.rejects( + () => relayToolCall(dir, "myTool", toolCallId, {}), + /tool relay failed for myTool/, + "the error message uses toolName, not an undefined callRef", + ); + } finally { + rmSync(dir, { recursive: true, force: true }); + } + }); +}); diff --git a/services/agent/tsconfig.json b/services/agent/tsconfig.json index b8314675f3..be7c8f733b 100644 --- a/services/agent/tsconfig.json +++ b/services/agent/tsconfig.json @@ -12,5 +12,5 @@ "resolveJsonModule": true, "allowImportingTsExtensions": true }, - "include": ["src/**/*.ts"] + "include": ["src/**/*.ts", "tests/**/*.ts", "vitest.config.ts"] } diff --git a/services/agent/vitest.config.ts b/services/agent/vitest.config.ts new file mode 100644 index 0000000000..f11ad12029 --- /dev/null +++ b/services/agent/vitest.config.ts @@ -0,0 +1,20 @@ +import { defineConfig } from "vitest/config"; + +// Mirrors the web/packages/* convention: node env, junit for CI publishing, v8 coverage +// over src/. Unit tests live in tests/unit/**; the runner code stays in src/. +export default defineConfig({ + test: { + include: ["tests/unit/**/*.test.ts"], + environment: "node", + reporters: ["default", "junit"], + outputFile: { + junit: "./test-results/junit.xml", + }, + coverage: { + provider: "v8", + include: ["src/**/*.ts"], + reporter: ["text", "lcov", "json-summary"], + reportsDirectory: "./coverage", + }, + }, +}); From 6eb063c0e07b2f32111afe889a4d5a35a6d60c8e Mon Sep 17 00:00:00 2001 From: Mahmoud Mabrouk Date: Sun, 21 Jun 2026 01:14:51 +0200 Subject: [PATCH 2/2] Update .github/workflows/12-check-unit-tests.yml Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- .github/workflows/12-check-unit-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/12-check-unit-tests.yml b/.github/workflows/12-check-unit-tests.yml index c8bc699e65..6f066a9817 100644 --- a/.github/workflows/12-check-unit-tests.yml +++ b/.github/workflows/12-check-unit-tests.yml @@ -319,6 +319,8 @@ jobs: AGENTA_LICENSE: oss steps: - uses: actions/checkout@v6 + with: + persist-credentials: false - name: Skip when package selection excludes services if: github.event_name == 'workflow_dispatch' && !contains(fromJSON('["all","services-only"]'), inputs.packages)