From 100d061764d029fc27e3ca50dc54b18f5f7ebf7a Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Thu, 23 Apr 2026 17:08:07 -0600
Subject: [PATCH 01/11] feat: update for NGT commands, first pass

---
 messages/shared.md                 |  8 ++++++
 package.json                       |  2 +-
 src/agentTestCache.ts              |  9 ++++---
 src/commands/agent/test/results.ts | 11 +++++---
 src/commands/agent/test/resume.ts  | 31 ++++++++++++++++++----
 src/commands/agent/test/run.ts     | 30 +++++++++++++++++-----
 src/flags.ts                       |  6 +++++
 src/handleTestResults.ts           | 35 +++++++++++++++++++++++--
 src/testRunnerFactory.ts           | 41 ++++++++++++++++++++++++++++++
 src/testStages.ts                  |  7 ++---
 yarn.lock                          |  6 ++---
 11 files changed, 158 insertions(+), 28 deletions(-)
 create mode 100644 src/testRunnerFactory.ts
diff --git a/messages/shared.md b/messages/shared.md
index e007fadc..0ee0904b 100644
--- a/messages/shared.md
+++ b/messages/shared.md
@@ -20,6 +20,14 @@ When enabled, includes detailed generated data (such as invoked actions) in the
 
 The generated data is in JSON format and includes the Apex classes or Flows that were invoked, the Salesforce objects that were touched, and so on. Use the JSON structure of this information to build the test case JSONPath expression when using custom evaluations.
 
+# flags.test-runner-type.summary
+
+Explicitly specify which test runner to use (NGT or legacy).
+
+# flags.test-runner-type.description
+
+By default, the command automatically detects which test runner to use based on the test definition metadata type in your org. Use this flag to explicitly specify the runner type. 'ngt' uses AiTestSuiteDefinition metadata and the /einstein/ai-testing/runs endpoints. 'legacy' uses AiEvaluationDefinition metadata and the /einstein/ai-evaluations/runs endpoints.
+
 # error.invalidAgentType
 
 agentType must be either "customer" or "internal". Found: [%s]
diff --git a/package.json b/package.json
index 3b921081..16fb3e5c 100644
--- a/package.json
+++ b/package.json
@@ -12,7 +12,7 @@
     "@inquirer/prompts": "^7.10.1",
     "@oclif/core": "^4",
     "@oclif/multi-stage-output": "^0.8.36",
-    "@salesforce/agents": "^1.1.2",
+    "@salesforce/agents": "file:../agents",
     "@salesforce/core": "^8.28.3",
     "@salesforce/kit": "^3.2.6",
     "@salesforce/sf-plugins-core": "^12.2.6",
diff --git a/src/agentTestCache.ts b/src/agentTestCache.ts
index bdb17c1d..64104799 100644
--- a/src/agentTestCache.ts
+++ b/src/agentTestCache.ts
@@ -18,12 +18,14 @@ import { Global, SfError, TTLConfig } from '@salesforce/core';
 import { Duration } from '@salesforce/kit';
 
 type ResultFormat = 'json' | 'human' | 'junit' | 'tap';
+type TestRunnerType = 'ngt' | 'legacy';
 
 type CacheContents = {
   runId: string;
   name: string;
   outputDir?: string;
   resultFormat?: ResultFormat;
+  runnerType?: TestRunnerType;
 };
 
 export class AgentTestCache extends TTLConfig<TTLConfig.Options, CacheContents> {
@@ -45,11 +47,12 @@ export class AgentTestCache extends TTLConfig<TTLConfig.Options, CacheContents>
     runId: string,
     name: string,
     outputDir?: string,
-    resultFormat?: ResultFormat
+    resultFormat?: ResultFormat,
+    runnerType?: TestRunnerType
   ): Promise<void> {
     if (!runId) throw new SfError('runId is required to create a cache entry');
 
-    this.set(runId, { runId, name, outputDir, resultFormat });
+    this.set(runId, { runId, name, outputDir, resultFormat, runnerType });
     await this.write();
   }
 
@@ -70,7 +73,7 @@ export class AgentTestCache extends TTLConfig<TTLConfig.Options, CacheContents>
   public useIdOrMostRecent(
     runId: string | undefined,
     useMostRecent: boolean
-  ): { runId: string; name?: string; outputDir?: string; resultFormat?: ResultFormat } {
+  ): { runId: string; name?: string; outputDir?: string; resultFormat?: ResultFormat; runnerType?: TestRunnerType } {
     if (runId && useMostRecent) {
       throw new SfError('Cannot specify both a runId and use most recent flag');
     }
diff --git a/src/commands/agent/test/results.ts b/src/commands/agent/test/results.ts
index 69cfe77e..521d920e 100644
--- a/src/commands/agent/test/results.ts
+++ b/src/commands/agent/test/results.ts
@@ -16,14 +16,15 @@
 
 import { SfCommand, Flags, toHelpSection } from '@salesforce/sf-plugins-core';
 import { EnvironmentVariable, Messages, SfError } from '@salesforce/core';
-import { AgentTester, AgentTestResultsResponse } from '@salesforce/agents';
-import { resultFormatFlag, testOutputDirFlag, verboseFlag } from '../../../flags.js';
+import { AgentTestResultsResponse, AgentTestNGTResultsResponse } from '@salesforce/agents';
+import { resultFormatFlag, testOutputDirFlag, testRunnerTypeFlag, verboseFlag } from '../../../flags.js';
 import { handleTestResults } from '../../../handleTestResults.js';
+import { createTestRunner } from '../../../testRunnerFactory.js';
 
 Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
 const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.test.results');
 
-export type AgentTestResultsResult = AgentTestResultsResponse;
+export type AgentTestResultsResult = AgentTestResultsResponse | AgentTestNGTResultsResponse;
 
 export default class AgentTestResults extends SfCommand<AgentTestResultsResult> {
   public static readonly summary = messages.getMessage('summary');
@@ -51,13 +52,15 @@ export default class AgentTestResults extends SfCommand<AgentTestResultsResult>
     }),
     'result-format': resultFormatFlag(),
     'output-dir': testOutputDirFlag(),
+    'test-runner-type': testRunnerTypeFlag,
     verbose: verboseFlag,
   };
 
   public async run(): Promise<AgentTestResultsResult> {
     const { flags } = await this.parse(AgentTestResults);
 
-    const agentTester = new AgentTester(flags['target-org'].getConnection(flags['api-version']));
+    const connection = flags['target-org'].getConnection(flags['api-version']);
+    const { runner: agentTester } = await createTestRunner(connection, flags['test-runner-type']);
 
     let response;
     try {
diff --git a/src/commands/agent/test/resume.ts b/src/commands/agent/test/resume.ts
index bca01e77..7c2da6b0 100644
--- a/src/commands/agent/test/resume.ts
+++ b/src/commands/agent/test/resume.ts
@@ -16,12 +16,18 @@
 
 import { SfCommand, Flags, toHelpSection } from '@salesforce/sf-plugins-core';
 import { EnvironmentVariable, Messages, SfError } from '@salesforce/core';
-import { AgentTester } from '@salesforce/agents';
 import { CLIError } from '@oclif/core/errors';
 import { AgentTestCache } from '../../../agentTestCache.js';
 import { TestStages } from '../../../testStages.js';
-import { AgentTestRunResult, resultFormatFlag, testOutputDirFlag, verboseFlag } from '../../../flags.js';
+import {
+  AgentTestRunResult,
+  resultFormatFlag,
+  testOutputDirFlag,
+  testRunnerTypeFlag,
+  verboseFlag,
+} from '../../../flags.js';
 import { handleTestResults } from '../../../handleTestResults.js';
+import { createTestRunner } from '../../../testRunnerFactory.js';
 
 Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
 const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.test.resume');
@@ -65,6 +71,7 @@ export default class AgentTestResume extends SfCommand<AgentTestRunResult> {
     }),
     'result-format': resultFormatFlag(),
     'output-dir': testOutputDirFlag(),
+    'test-runner-type': testRunnerTypeFlag,
     verbose: verboseFlag,
   };
 
@@ -78,6 +85,7 @@ export default class AgentTestResume extends SfCommand<AgentTestRunResult> {
     let runId;
     let outputDir;
     let resultFormat;
+    let cachedRunnerType;
 
     try {
       const cacheEntry = agentTestCache.useIdOrMostRecent(flags['job-id'], flags['use-most-recent']);
@@ -85,6 +93,7 @@ export default class AgentTestResume extends SfCommand<AgentTestRunResult> {
       runId = cacheEntry.runId;
       outputDir = cacheEntry.outputDir;
       resultFormat = cacheEntry.resultFormat;
+      cachedRunnerType = cacheEntry.runnerType;
     } catch (e) {
       const wrapped = SfError.wrap(e);
 
@@ -105,7 +114,14 @@ export default class AgentTestResume extends SfCommand<AgentTestRunResult> {
       jsonEnabled: this.jsonEnabled(),
     });
     this.mso.start({ id: runId });
-    const agentTester = new AgentTester(flags['target-org'].getConnection(flags['api-version']));
+
+    // Use cached runner type if available, otherwise use explicit flag or detect
+    const connection = flags['target-org'].getConnection(flags['api-version']);
+    const { runner: agentTester } = await createTestRunner(
+      connection,
+      flags['test-runner-type'] ?? cachedRunnerType,
+      name
+    );
 
     let completed;
     let response;
@@ -139,12 +155,17 @@ export default class AgentTestResume extends SfCommand<AgentTestRunResult> {
 
     // Set exit code to 1 only for execution errors (tests couldn't run properly)
     // Test assertion failures are business logic and should not affect exit code
-    if (response?.testCases.some((tc) => tc.status === 'ERROR')) {
+    // Only applicable to legacy responses (NGT doesn't have test case status)
+    if (
+      response &&
+      'subjectName' in response &&
+      response.testCases.some((tc) => 'status' in tc && tc.status === 'ERROR')
+    ) {
       process.exitCode = 1;
     }
 
     // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion
-    return { ...response!, runId, status: 'COMPLETED' };
+    return { ...response!, runId, status: 'COMPLETED' } as AgentTestRunResult;
   }
 
   protected catch(error: Error | SfError | CLIError): Promise<never> {
diff --git a/src/commands/agent/test/run.ts b/src/commands/agent/test/run.ts
index 330223c6..51497743 100644
--- a/src/commands/agent/test/run.ts
+++ b/src/commands/agent/test/run.ts
@@ -16,7 +16,7 @@
 
 import { SfCommand, Flags, toHelpSection } from '@salesforce/sf-plugins-core';
 import { EnvironmentVariable, Messages, SfError } from '@salesforce/core';
-import { AgentTester, AgentTestStartResponse } from '@salesforce/agents';
+import { AgentTestStartResponse, AgentTestNGTStartResponse } from '@salesforce/agents';
 import { colorize } from '@oclif/core/ux';
 import { CLIError } from '@oclif/core/errors';
 import {
@@ -26,11 +26,13 @@ import {
   promptForAiEvaluationDefinitionApiName,
   resultFormatFlag,
   testOutputDirFlag,
+  testRunnerTypeFlag,
   verboseFlag,
 } from '../../../flags.js';
 import { AgentTestCache } from '../../../agentTestCache.js';
 import { TestStages } from '../../../testStages.js';
 import { handleTestResults } from '../../../handleTestResults.js';
+import { createTestRunner } from '../../../testRunnerFactory.js';
 
 Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
 const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.test.run');
@@ -88,6 +90,7 @@ export default class AgentTestRun extends SfCommand<AgentTestRunResult> {
     }),
     'result-format': resultFormatFlag(),
     'output-dir': testOutputDirFlag(),
+    'test-runner-type': testRunnerTypeFlag,
     verbose: verboseFlag,
   };
 
@@ -107,8 +110,12 @@ export default class AgentTestRun extends SfCommand<AgentTestRunResult> {
     this.mso = new TestStages({ title: `Agent Test Run: ${apiName}`, jsonEnabled: this.jsonEnabled() });
     this.mso.start();
 
-    const agentTester = new AgentTester(connection);
-    let response: AgentTestStartResponse;
+    // Determine which test runner to use (NGT or legacy)
+    const result = await createTestRunner(connection, flags['test-runner-type'], apiName);
+    const agentTester = result.runner;
+    const runnerType = result.type;
+
+    let response: AgentTestStartResponse | AgentTestNGTStartResponse;
     try {
       response = await agentTester.start(apiName);
     } catch (e) {
@@ -135,7 +142,13 @@ export default class AgentTestRun extends SfCommand<AgentTestRunResult> {
     this.mso.update({ id: response.runId });
 
     const agentTestCache = await AgentTestCache.create();
-    await agentTestCache.createCacheEntry(response.runId, apiName, flags['output-dir'], flags['result-format']);
+    await agentTestCache.createCacheEntry(
+      response.runId,
+      apiName,
+      flags['output-dir'],
+      flags['result-format'],
+      runnerType
+    );
 
     if (flags.wait?.minutes) {
       let completed;
@@ -170,12 +183,17 @@ export default class AgentTestRun extends SfCommand<AgentTestRunResult> {
 
       // Set exit code to 1 only for execution errors (tests couldn't run properly)
       // Test assertion failures are business logic and should not affect exit code
-      if (detailsResponse?.testCases.some((tc) => tc.status === 'ERROR')) {
+      // Only applicable to legacy responses (NGT doesn't have test case status)
+      if (
+        detailsResponse &&
+        'subjectName' in detailsResponse &&
+        detailsResponse.testCases.some((tc) => 'status' in tc && tc.status === 'ERROR')
+      ) {
         process.exitCode = 1;
       }
 
       // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion
-      return { ...detailsResponse!, status: 'COMPLETED', runId: response.runId };
+      return { ...detailsResponse!, status: 'COMPLETED', runId: response.runId } as AgentTestRunResult;
     } else {
       this.mso.stop();
       this.log(
diff --git a/src/flags.ts b/src/flags.ts
index bc627118..6deb37d6 100644
--- a/src/flags.ts
+++ b/src/flags.ts
@@ -68,6 +68,12 @@ export const verboseFlag = Flags.boolean({
   description: messages.getMessage('flags.verbose.description'),
 });
 
+export const testRunnerTypeFlag = Flags.custom<'ngt' | 'legacy'>({
+  options: ['ngt', 'legacy'],
+  summary: messages.getMessage('flags.test-runner-type.summary'),
+  description: messages.getMessage('flags.test-runner-type.description'),
+})();
+
 function validateInput(input: string, validate: (input: string) => boolean | string): never | string {
   const result = validate(input);
   if (typeof result === 'string') throw new Error(result);
diff --git a/src/handleTestResults.ts b/src/handleTestResults.ts
index e859037c..f1a94601 100644
--- a/src/handleTestResults.ts
+++ b/src/handleTestResults.ts
@@ -16,11 +16,23 @@
 import { join } from 'node:path';
 import { stripVTControlCharacters } from 'node:util';
 import { writeFile, mkdir } from 'node:fs/promises';
-import { AgentTestResultsResponse, convertTestResultsToFormat, humanFriendlyName, metric } from '@salesforce/agents';
+import {
+  AgentTestResultsResponse,
+  AgentTestNGTResultsResponse,
+  convertTestResultsToFormat,
+  humanFriendlyName,
+  metric,
+} from '@salesforce/agents';
 import { Ux } from '@salesforce/sf-plugins-core/Ux';
 import { ux as ocux } from '@oclif/core';
 import ansis from 'ansis';
 
+type TestResultsResponse = AgentTestResultsResponse | AgentTestNGTResultsResponse;
+
+function isLegacyResponse(response: TestResultsResponse): response is AgentTestResultsResponse {
+  return 'subjectName' in response;
+}
+
 async function writeFileToDir(outputDir: string, fileName: string, content: string): Promise<void> {
   // if directory doesn't exist, create it
   await mkdir(outputDir, { recursive: true });
@@ -227,7 +239,7 @@ export async function handleTestResults({
 }: {
   id: string;
   format: 'human' | 'json' | 'junit' | 'tap';
-  results: AgentTestResultsResponse | undefined;
+  results: TestResultsResponse | undefined;
   jsonEnabled: boolean;
   outputDir?: string;
   verbose?: boolean;
@@ -239,6 +251,25 @@ export async function handleTestResults({
 
   const ux = new Ux({ jsonEnabled });
 
+  // For NGT responses, we only support JSON format for now
+  if (!isLegacyResponse(results)) {
+    if (format !== 'json') {
+      ux.log(
+        ansis.yellow('Warning: NGT test results only support JSON format. Use --result-format json or omit the flag.')
+      );
+    }
+    const formatted = JSON.stringify(results, null, 2);
+    if (outputDir) {
+      const file = `test-result-${id}.json`;
+      await writeFileToDir(outputDir, file, formatted);
+      ux.log(`Created JSON file at ${join(outputDir, file)}`);
+    } else {
+      ux.log(formatted);
+    }
+    return;
+  }
+
+  // Legacy response formatting
   if (format === 'human') {
     const formatted = humanFormat(results, verbose);
     if (outputDir) {
diff --git a/src/testRunnerFactory.ts b/src/testRunnerFactory.ts
new file mode 100644
index 00000000..bc2ba0b7
--- /dev/null
+++ b/src/testRunnerFactory.ts
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2026, Salesforce, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { Connection } from '@salesforce/core';
+import { AgentTester, AgentTesterNGT, determineTestRunner, TestRunnerType } from '@salesforce/agents';
+
+export type TestRunnerInstance = AgentTester | AgentTesterNGT;
+
+/**
+ * Creates the appropriate test runner (NGT or legacy) based on detection or explicit type.
+ *
+ * @param connection - Salesforce connection
+ * @param explicitType - Optional explicit runner type to use (bypasses detection)
+ * @param testDefinitionName - Optional test name for conflict detection
+ * @returns Object containing the runner instance and its type
+ */
+export async function createTestRunner(
+  connection: Connection,
+  explicitType?: TestRunnerType,
+  testDefinitionName?: string
+): Promise<{ runner: TestRunnerInstance; type: TestRunnerType }> {
+  // Use explicit type if provided, otherwise detect
+  const runnerType = explicitType ?? (await determineTestRunner(connection, testDefinitionName));
+
+  const runner = runnerType === 'ngt' ? new AgentTesterNGT(connection) : new AgentTester(connection);
+
+  return { runner, type: runnerType };
+}
diff --git a/src/testStages.ts b/src/testStages.ts
index 5eff2def..81d985d4 100644
--- a/src/testStages.ts
+++ b/src/testStages.ts
@@ -16,10 +16,11 @@
 
 import { colorize } from '@oclif/core/ux';
 import { MultiStageOutput } from '@oclif/multi-stage-output';
-import { AgentTestResultsResponse, AgentTester } from '@salesforce/agents';
+import { AgentTestResultsResponse, AgentTestNGTResultsResponse } from '@salesforce/agents';
 import { Lifecycle } from '@salesforce/core';
 import { Duration } from '@salesforce/kit';
 import { Ux } from '@salesforce/sf-plugins-core';
+import type { TestRunnerInstance } from './testRunnerFactory.js';
 
 type Data = {
   id: string;
@@ -86,10 +87,10 @@ export class TestStages {
   }
 
   public async poll(
-    agentTester: AgentTester,
+    agentTester: TestRunnerInstance,
     id: string,
     wait: Duration
-  ): Promise<{ completed: boolean; response?: AgentTestResultsResponse }> {
+  ): Promise<{ completed: boolean; response?: AgentTestResultsResponse | AgentTestNGTResultsResponse }> {
     this.mso.skipTo('Polling for Test Results');
     const lifecycle = Lifecycle.getInstance();
     lifecycle.on(
diff --git a/yarn.lock b/yarn.lock
index a01fe617..daacbd86 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -1593,10 +1593,8 @@
   resolved "https://registry.yarnpkg.com/@rtsao/scc/-/scc-1.1.0.tgz#927dd2fae9bc3361403ac2c7a00c32ddce9ad7e8"
   integrity sha512-zt6OdqaDoOnJ1ZYsCYGt9YmWzDXl4vQdKTyJev62gFhRGKdx7mcT54V9KIjg+d2wi9EXsPvAPKe7i7WjfVWB8g==
 
-"@salesforce/agents@^1.1.2":
-  version "1.1.2"
-  resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-1.1.2.tgz#aa2b93e0ba71eefcde05541d9add8da3975577ec"
-  integrity sha512-p7isCk2WoV0t1skRoTjYeead+GOoF2I7VPo+K6YYt+h6S+v8vJTdBc8NNhnmUJcz386FIK5jc0g7bSz8lCQ0tQ==
+"@salesforce/agents@file:../agents":
+  version "1.1.3"
   dependencies:
     "@salesforce/core" "^8.28.3"
     "@salesforce/kit" "^3.2.6"

From 63ae0acd1f20f631c2d6bd22714f76756bf7fbd6 Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Mon, 27 Apr 2026 13:59:14 -0600
Subject: [PATCH 02/11] chore: use ID prefix detection in test runner factory
 to avoid unnecessary org queries

---
 src/commands/agent/test/results.ts |  7 ++++++-
 src/commands/agent/test/resume.ts  |  5 +++--
 src/testRunnerFactory.ts           | 33 +++++++++++++++++++++++-------
 3 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/src/commands/agent/test/results.ts b/src/commands/agent/test/results.ts
index 521d920e..5a76df1f 100644
--- a/src/commands/agent/test/results.ts
+++ b/src/commands/agent/test/results.ts
@@ -60,7 +60,12 @@ export default class AgentTestResults extends SfCommand<AgentTestResultsResult>
     const { flags } = await this.parse(AgentTestResults);
 
     const connection = flags['target-org'].getConnection(flags['api-version']);
-    const { runner: agentTester } = await createTestRunner(connection, flags['test-runner-type']);
+    const { runner: agentTester } = await createTestRunner(
+      connection,
+      flags['test-runner-type'],
+      undefined,
+      flags['job-id']
+    );
 
     let response;
     try {
diff --git a/src/commands/agent/test/resume.ts b/src/commands/agent/test/resume.ts
index 7c2da6b0..ecaa7131 100644
--- a/src/commands/agent/test/resume.ts
+++ b/src/commands/agent/test/resume.ts
@@ -115,12 +115,13 @@ export default class AgentTestResume extends SfCommand<AgentTestRunResult> {
     });
     this.mso.start({ id: runId });
 
-    // Use cached runner type if available, otherwise use explicit flag or detect
+    // Use explicit flag > cached runner type > ID prefix detection > org metadata query
     const connection = flags['target-org'].getConnection(flags['api-version']);
     const { runner: agentTester } = await createTestRunner(
       connection,
       flags['test-runner-type'] ?? cachedRunnerType,
-      name
+      name,
+      runId
     );
 
     let completed;
diff --git a/src/testRunnerFactory.ts b/src/testRunnerFactory.ts
index bc2ba0b7..3028f167 100644
--- a/src/testRunnerFactory.ts
+++ b/src/testRunnerFactory.ts
@@ -15,27 +15,46 @@
  */
 
 import { Connection } from '@salesforce/core';
-import { AgentTester, AgentTesterNGT, determineTestRunner, TestRunnerType } from '@salesforce/agents';
+import {
+  AgentTester,
+  AgentTesterNGT,
+  detectTestRunnerFromId,
+  determineTestRunner,
+  TestRunnerType,
+} from '@salesforce/agents';
 
 export type TestRunnerInstance = AgentTester | AgentTesterNGT;
 
 /**
  * Creates the appropriate test runner (NGT or legacy) based on detection or explicit type.
  *
+ * Detection priority:
+ * 1. `explicitType` — user-supplied `--test-runner-type` flag, always wins
+ * 2. `runId` prefix — instant detection from the Salesforce ID prefix (`3A2` = NGT, `4KB` = legacy), no network call
+ * 3. `testDefinitionName` — org metadata query via `determineTestRunner` (network call, used as last resort)
+ *
  * @param connection - Salesforce connection
- * @param explicitType - Optional explicit runner type to use (bypasses detection)
- * @param testDefinitionName - Optional test name for conflict detection
+ * @param explicitType - Optional explicit runner type (bypasses all detection)
+ * @param testDefinitionName - Optional test name for org metadata detection
+ * @param runId - Optional existing run ID; prefix is used for instant detection
  * @returns Object containing the runner instance and its type
  */
 export async function createTestRunner(
   connection: Connection,
   explicitType?: TestRunnerType,
-  testDefinitionName?: string
+  testDefinitionName?: string,
+  runId?: string
 ): Promise<{ runner: TestRunnerInstance; type: TestRunnerType }> {
-  // Use explicit type if provided, otherwise detect
-  const runnerType = explicitType ?? (await determineTestRunner(connection, testDefinitionName));
+  let runnerType: TestRunnerType;
 
-  const runner = runnerType === 'ngt' ? new AgentTesterNGT(connection) : new AgentTester(connection);
+  if (explicitType) {
+    runnerType = explicitType;
+  } else if (runId && detectTestRunnerFromId(runId)) {
+    runnerType = detectTestRunnerFromId(runId)!;
+  } else {
+    runnerType = await determineTestRunner(connection, testDefinitionName);
+  }
 
+  const runner = runnerType === 'ngt' ? new AgentTesterNGT(connection) : new AgentTester(connection);
   return { runner, type: runnerType };
 }

From e87ab06fc9711ca8a2d50b02c20ecc15dd9fd32e Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Tue, 28 Apr 2026 10:31:20 -0600
Subject: [PATCH 03/11] chore: handle test result formats

---
 src/handleTestResults.ts | 219 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 207 insertions(+), 12 deletions(-)

diff --git a/src/handleTestResults.ts b/src/handleTestResults.ts
index f1a94601..2fb9ead1 100644
--- a/src/handleTestResults.ts
+++ b/src/handleTestResults.ts
@@ -23,6 +23,7 @@ import {
   humanFriendlyName,
   metric,
 } from '@salesforce/agents';
+import { XMLBuilder } from 'fast-xml-parser';
 import { Ux } from '@salesforce/sf-plugins-core/Ux';
 import { ux as ocux } from '@oclif/core';
 import ansis from 'ansis';
@@ -90,6 +91,167 @@ export function readableTime(time: number, decimalPlaces = 2): string {
   return `${hours}h ${minutes}m`;
 }
 
+type ParsedScorerResponse = {
+  status?: string;
+  score?: number;
+  reasoning?: string;
+  actualValue?: string;
+  expectedValue?: string;
+};
+
+function humanFormatNGT(results: AgentTestNGTResultsResponse): string {
+  const ux = new Ux();
+  const tables: string[] = [];
+
+  for (const testCase of results.testCases) {
+    let userInput = '';
+    try {
+      const parsed = JSON.parse(testCase.subjectResponse) as { userInput?: string };
+      userInput = parsed.userInput ?? '';
+    } catch {
+      // ignore
+    }
+
+    const scorerRows = testCase.testScorerResults.map((scorer) => {
+      let parsed: ParsedScorerResponse = {};
+      try {
+        parsed = JSON.parse(scorer.scorerResponse) as ParsedScorerResponse;
+      } catch {
+        // ignore
+      }
+      return {
+        scorer: scorer.scorerName,
+        result: parsed.status === 'PASS' ? ansis.green('Pass') : ansis.red('Fail'),
+        expected: parsed.expectedValue ?? '',
+        actual: parsed.actualValue ?? '',
+        reasoning: parsed.reasoning ?? '',
+      };
+    });
+
+    tables.push(
+      ux.makeTable({
+        title: `${ansis.bold(`Test Case #${testCase.testNumber}`)}\n${ansis.dim('User Input')}: ${userInput}`,
+        overflow: 'wrap',
+        columns: [
+          { key: 'scorer', name: 'Scorer' },
+          { key: 'result', name: 'Result' },
+          { key: 'expected', name: 'Expected', width: '25%' },
+          { key: 'actual', name: 'Actual', width: '25%' },
+          { key: 'reasoning', name: 'Reasoning', width: '35%' },
+        ],
+        data: scorerRows,
+        width: '100%',
+      })
+    );
+    tables.push('\n');
+  }
+
+  const totalCases = results.testCases.length;
+  const passCases = results.testCases.filter((tc) =>
+    tc.testScorerResults.every((s) => {
+      try {
+        return (JSON.parse(s.scorerResponse) as ParsedScorerResponse).status === 'PASS';
+      } catch {
+        return false;
+      }
+    })
+  ).length;
+
+  const summary = makeSimpleTable(
+    {
+      Status: results.status,
+      'Total Test Cases': String(totalCases),
+      'Passing Test Cases': String(passCases),
+      'Failing Test Cases': String(totalCases - passCases),
+    },
+    ansis.bold.blue('Test Results')
+  );
+
+  return tables.join('') + `\n${summary}\n`;
+}
+
+function junitFormatNGT(results: AgentTestNGTResultsResponse): string {
+  const builder = new XMLBuilder({ format: true, attributeNamePrefix: '$', ignoreAttributes: false });
+  const testCount = results.testCases.length;
+  const failureCount = results.testCases.filter((tc) =>
+    tc.testScorerResults.some((s) => {
+      try {
+        return (JSON.parse(s.scorerResponse) as ParsedScorerResponse).status !== 'PASS';
+      } catch {
+        return true;
+      }
+    })
+  ).length;
+
+  const suites = builder.build({
+    testsuites: {
+      $name: 'AgentTestNGT',
+      $tests: testCount,
+      $failures: failureCount,
+      property: [{ $name: 'status', $value: results.status }],
+      testsuite: results.testCases.map((tc) => ({
+        $name: tc.testNumber,
+        $assertions: tc.testScorerResults.length,
+        failure: tc.testScorerResults
+          .map((s) => {
+            let parsed: ParsedScorerResponse = {};
+            try {
+              parsed = JSON.parse(s.scorerResponse) as ParsedScorerResponse;
+            } catch {
+              // ignore
+            }
+            if (parsed.status !== 'PASS') {
+              return { $message: parsed.reasoning ?? 'Unknown error', $name: s.scorerName };
+            }
+          })
+          .filter(Boolean),
+      })),
+    },
+  });
+
+  return `<?xml version="1.0" encoding="UTF-8"?>\n${suites}`.trim();
+}
+
+function tapFormatNGT(results: AgentTestNGTResultsResponse): string {
+  const lines: string[] = [];
+  let expectationCount = 0;
+
+  for (const tc of results.testCases) {
+    for (const scorer of tc.testScorerResults) {
+      let parsed: ParsedScorerResponse = {};
+      try {
+        parsed = JSON.parse(scorer.scorerResponse) as ParsedScorerResponse;
+      } catch {
+        // ignore
+      }
+      const pass = parsed.status === 'PASS';
+      expectationCount++;
+      lines.push(`${pass ? 'ok' : 'not ok'} ${expectationCount} ${tc.testNumber}.${scorer.scorerName}`);
+      if (!pass) {
+        lines.push('  ---');
+        lines.push(`  message: ${parsed.reasoning ?? 'Unknown error'}`);
+        lines.push(`  scorer: ${scorer.scorerName}`);
+        lines.push(`  actual: ${parsed.actualValue ?? ''}`);
+        lines.push(`  expected: ${parsed.expectedValue ?? ''}`);
+        lines.push('  ...');
+      }
+    }
+  }
+
+  return `Tap Version 14\n1..${expectationCount}\n${lines.join('\n')}`;
+}
+
+function convertNGTTestResultsToFormat(results: AgentTestNGTResultsResponse, format: 'json' | 'junit' | 'tap'): string {
+  switch (format) {
+    case 'json':
+      return JSON.stringify(results, null, 2);
+    case 'junit':
+      return junitFormatNGT(results);
+    case 'tap':
+      return tapFormatNGT(results);
+  }
+}
+
 export function humanFormat(results: AgentTestResultsResponse, verbose = false): string {
   const ux = new Ux();
 
@@ -251,20 +413,53 @@ export async function handleTestResults({
 
   const ux = new Ux({ jsonEnabled });
 
-  // For NGT responses, we only support JSON format for now
   if (!isLegacyResponse(results)) {
-    if (format !== 'json') {
-      ux.log(
-        ansis.yellow('Warning: NGT test results only support JSON format. Use --result-format json or omit the flag.')
-      );
+    if (format === 'human') {
+      const formatted = humanFormatNGT(results);
+      if (outputDir) {
+        const file = `test-result-${id}.txt`;
+        await writeFileToDir(outputDir, file, stripVTControlCharacters(formatted));
+        ux.log(`Created human-readable file at ${join(outputDir, file)}`);
+      } else {
+        ux.log(formatted);
+      }
+      return;
     }
-    const formatted = JSON.stringify(results, null, 2);
-    if (outputDir) {
-      const file = `test-result-${id}.json`;
-      await writeFileToDir(outputDir, file, formatted);
-      ux.log(`Created JSON file at ${join(outputDir, file)}`);
-    } else {
-      ux.log(formatted);
+
+    if (format === 'json') {
+      const formatted = convertNGTTestResultsToFormat(results, 'json');
+      if (outputDir) {
+        const file = `test-result-${id}.json`;
+        await writeFileToDir(outputDir, file, formatted);
+        ux.log(`Created JSON file at ${join(outputDir, file)}`);
+      } else {
+        ux.log(formatted);
+      }
+      return;
+    }
+
+    if (format === 'junit') {
+      const formatted = convertNGTTestResultsToFormat(results, 'junit');
+      if (outputDir) {
+        const file = `test-result-${id}.xml`;
+        await writeFileToDir(outputDir, file, formatted);
+        ux.log(`Created JUnit file at ${join(outputDir, file)}`);
+      } else {
+        ux.log(formatted);
+      }
+      return;
+    }
+
+    if (format === 'tap') {
+      const formatted = convertNGTTestResultsToFormat(results, 'tap');
+      if (outputDir) {
+        const file = `test-result-${id}.txt`;
+        await writeFileToDir(outputDir, file, formatted);
+        ux.log(`Created TAP file at ${join(outputDir, file)}`);
+      } else {
+        ux.log(formatted);
+      }
+      return;
     }
     return;
   }

From 0e7bf24482ef7dcc402cafc84edd743406ac1667 Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Tue, 28 Apr 2026 11:37:07 -0600
Subject: [PATCH 04/11] refactor: rename NGT/TestingCenter names

---
 messages/shared.md                 |   4 +-
 src/agentTestCache.ts              |   2 +-
 src/commands/agent/test/results.ts |   4 +-
 src/commands/agent/test/resume.ts  |   4 +-
 src/commands/agent/test/run.ts     |   4 +-
 src/flags.ts                       |   4 +-
 src/testRunnerFactory.ts           |   6 +-
 test/common.test.ts                | 141 +++++++++++++++++++++++++
 test/testRunnerFactory.test.ts     | 164 +++++++++++++++++++++++++++++
 9 files changed, 319 insertions(+), 14 deletions(-)
 create mode 100644 test/common.test.ts
 create mode 100644 test/testRunnerFactory.test.ts

diff --git a/messages/shared.md b/messages/shared.md
index 0ee0904b..4c28cf79 100644
--- a/messages/shared.md
+++ b/messages/shared.md
@@ -22,11 +22,11 @@ The generated data is in JSON format and includes the Apex classes or Flows that
 
 # flags.test-runner-type.summary
 
-Explicitly specify which test runner to use (NGT or legacy).
+Explicitly specify which test runner to use (agentforce-studio or testing-center).
 
 # flags.test-runner-type.description
 
-By default, the command automatically detects which test runner to use based on the test definition metadata type in your org. Use this flag to explicitly specify the runner type. 'ngt' uses AiTestSuiteDefinition metadata and the /einstein/ai-testing/runs endpoints. 'legacy' uses AiEvaluationDefinition metadata and the /einstein/ai-evaluations/runs endpoints.
+By default, the command automatically detects which test runner to use based on the test definition metadata type in your org. Use this flag to explicitly specify the runner type. 'agentforce-studio' uses AiTestSuiteDefinition metadata. 'testing-center' uses AiEvaluationDefinition metadata.
 
 # error.invalidAgentType
 
diff --git a/src/agentTestCache.ts b/src/agentTestCache.ts
index 64104799..de7b86a0 100644
--- a/src/agentTestCache.ts
+++ b/src/agentTestCache.ts
@@ -18,7 +18,7 @@ import { Global, SfError, TTLConfig } from '@salesforce/core';
 import { Duration } from '@salesforce/kit';
 
 type ResultFormat = 'json' | 'human' | 'junit' | 'tap';
-type TestRunnerType = 'ngt' | 'legacy';
+type TestRunnerType = 'agentforce-studio' | 'testing-center';
 
 type CacheContents = {
   runId: string;
diff --git a/src/commands/agent/test/results.ts b/src/commands/agent/test/results.ts
index 5a76df1f..451c475d 100644
--- a/src/commands/agent/test/results.ts
+++ b/src/commands/agent/test/results.ts
@@ -52,7 +52,7 @@ export default class AgentTestResults extends SfCommand<AgentTestResultsResult>
     }),
     'result-format': resultFormatFlag(),
     'output-dir': testOutputDirFlag(),
-    'test-runner-type': testRunnerTypeFlag,
+    'test-runner': testRunnerTypeFlag,
     verbose: verboseFlag,
   };
 
@@ -62,7 +62,7 @@ export default class AgentTestResults extends SfCommand<AgentTestResultsResult>
     const connection = flags['target-org'].getConnection(flags['api-version']);
     const { runner: agentTester } = await createTestRunner(
       connection,
-      flags['test-runner-type'],
+      flags['test-runner'],
       undefined,
       flags['job-id']
     );
diff --git a/src/commands/agent/test/resume.ts b/src/commands/agent/test/resume.ts
index ecaa7131..8e2ec3cd 100644
--- a/src/commands/agent/test/resume.ts
+++ b/src/commands/agent/test/resume.ts
@@ -71,7 +71,7 @@ export default class AgentTestResume extends SfCommand<AgentTestRunResult> {
     }),
     'result-format': resultFormatFlag(),
     'output-dir': testOutputDirFlag(),
-    'test-runner-type': testRunnerTypeFlag,
+    'test-runner': testRunnerTypeFlag,
     verbose: verboseFlag,
   };
 
@@ -119,7 +119,7 @@ export default class AgentTestResume extends SfCommand<AgentTestRunResult> {
     const connection = flags['target-org'].getConnection(flags['api-version']);
     const { runner: agentTester } = await createTestRunner(
       connection,
-      flags['test-runner-type'] ?? cachedRunnerType,
+      flags['test-runner'] ?? cachedRunnerType,
       name,
       runId
     );
diff --git a/src/commands/agent/test/run.ts b/src/commands/agent/test/run.ts
index 51497743..1c95c626 100644
--- a/src/commands/agent/test/run.ts
+++ b/src/commands/agent/test/run.ts
@@ -90,7 +90,7 @@ export default class AgentTestRun extends SfCommand<AgentTestRunResult> {
     }),
     'result-format': resultFormatFlag(),
     'output-dir': testOutputDirFlag(),
-    'test-runner-type': testRunnerTypeFlag,
+    'test-runner': testRunnerTypeFlag,
     verbose: verboseFlag,
   };
 
@@ -111,7 +111,7 @@ export default class AgentTestRun extends SfCommand<AgentTestRunResult> {
     this.mso.start();
 
     // Determine which test runner to use (NGT or legacy)
-    const result = await createTestRunner(connection, flags['test-runner-type'], apiName);
+    const result = await createTestRunner(connection, flags['test-runner'], apiName);
     const agentTester = result.runner;
     const runnerType = result.type;
 
diff --git a/src/flags.ts b/src/flags.ts
index 6deb37d6..3709a511 100644
--- a/src/flags.ts
+++ b/src/flags.ts
@@ -68,8 +68,8 @@ export const verboseFlag = Flags.boolean({
   description: messages.getMessage('flags.verbose.description'),
 });
 
-export const testRunnerTypeFlag = Flags.custom<'ngt' | 'legacy'>({
-  options: ['ngt', 'legacy'],
+export const testRunnerTypeFlag = Flags.custom<'agentforce-studio' | 'testing-center'>({
+  options: ['agentforce-studio', 'testing-center'],
   summary: messages.getMessage('flags.test-runner-type.summary'),
   description: messages.getMessage('flags.test-runner-type.description'),
 })();
diff --git a/src/testRunnerFactory.ts b/src/testRunnerFactory.ts
index 3028f167..4edd797a 100644
--- a/src/testRunnerFactory.ts
+++ b/src/testRunnerFactory.ts
@@ -26,11 +26,11 @@ import {
 export type TestRunnerInstance = AgentTester | AgentTesterNGT;
 
 /**
- * Creates the appropriate test runner (NGT or legacy) based on detection or explicit type.
+ * Creates the appropriate test runner (agentforce-studio or testing-center) based on detection or explicit type.
  *
  * Detection priority:
  * 1. `explicitType` — user-supplied `--test-runner-type` flag, always wins
- * 2. `runId` prefix — instant detection from the Salesforce ID prefix (`3A2` = NGT, `4KB` = legacy), no network call
+ * 2. `runId` prefix — instant detection from the Salesforce ID prefix (`3A2` = agentforce-studio, `4KB` = testing-center), no network call
  * 3. `testDefinitionName` — org metadata query via `determineTestRunner` (network call, used as last resort)
  *
  * @param connection - Salesforce connection
@@ -55,6 +55,6 @@ export async function createTestRunner(
     runnerType = await determineTestRunner(connection, testDefinitionName);
   }
 
-  const runner = runnerType === 'ngt' ? new AgentTesterNGT(connection) : new AgentTester(connection);
+  const runner = runnerType === 'agentforce-studio' ? new AgentTesterNGT(connection) : new AgentTester(connection);
   return { runner, type: runnerType };
 }
diff --git a/test/common.test.ts b/test/common.test.ts
new file mode 100644
index 00000000..25ec52bb
--- /dev/null
+++ b/test/common.test.ts
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2026, Salesforce, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { expect } from 'chai';
+import { SfError } from '@salesforce/core';
+import type { CompilationError } from '@salesforce/agents';
+import { throwAgentCompilationError, COMPILATION_API_EXIT_CODES } from '../src/common.js';
+
+describe('common', () => {
+  describe('COMPILATION_API_EXIT_CODES', () => {
+    it('should re-export COMPILATION_API_EXIT_CODES from @salesforce/agents', () => {
+      expect(COMPILATION_API_EXIT_CODES).to.be.an('object');
+    });
+  });
+
+  describe('throwAgentCompilationError', () => {
+    it('should throw SfError with unknown error message when given empty array', () => {
+      try {
+        throwAgentCompilationError([]);
+        expect.fail('Expected error to be thrown');
+      } catch (e) {
+        expect(e).to.be.instanceOf(SfError);
+        expect((e as SfError).name).to.equal('CompileAgentScriptError');
+        expect((e as SfError).message).to.equal('Unknown compilation error occurred');
+        expect((e as SfError).exitCode).to.equal(1);
+      }
+    });
+
+    it('should throw SfError with formatted error message for single error', () => {
+      const errors: CompilationError[] = [
+        {
+          errorType: 'SyntaxError',
+          description: 'Unexpected token',
+          lineStart: 5,
+          colStart: 10,
+          lineEnd: 5,
+          colEnd: 15,
+        },
+      ];
+
+      try {
+        throwAgentCompilationError(errors);
+        expect.fail('Expected error to be thrown');
+      } catch (e) {
+        expect(e).to.be.instanceOf(SfError);
+        expect((e as SfError).message).to.equal('SyntaxError: Unexpected token [Ln 5, Col 10]');
+        expect((e as SfError).exitCode).to.equal(1);
+      }
+    });
+
+    it('should join multiple errors with EOL separator', () => {
+      const errors: CompilationError[] = [
+        {
+          errorType: 'SyntaxError',
+          description: 'Unexpected token',
+          lineStart: 5,
+          colStart: 10,
+          lineEnd: 5,
+          colEnd: 15,
+        },
+        {
+          errorType: 'TypeError',
+          description: 'Cannot read property',
+          lineStart: 12,
+          colStart: 3,
+          lineEnd: 12,
+          colEnd: 20,
+        },
+      ];
+
+      try {
+        throwAgentCompilationError(errors);
+        expect.fail('Expected error to be thrown');
+      } catch (e) {
+        expect(e).to.be.instanceOf(SfError);
+        const msg = (e as SfError).message;
+        expect(msg).to.include('SyntaxError: Unexpected token [Ln 5, Col 10]');
+        expect(msg).to.include('TypeError: Cannot read property [Ln 12, Col 3]');
+      }
+    });
+
+    it('should always set exitCode to 1', () => {
+      const errors: CompilationError[] = [
+        { errorType: 'AnyError', description: 'Something failed', lineStart: 1, colStart: 1, lineEnd: 1, colEnd: 5 },
+      ];
+
+      try {
+        throwAgentCompilationError(errors);
+        expect.fail('Expected error to be thrown');
+      } catch (e) {
+        expect((e as SfError).exitCode).to.equal(1);
+      }
+    });
+
+    it('should always set error name to CompileAgentScriptError', () => {
+      try {
+        throwAgentCompilationError([]);
+        expect.fail('Expected error to be thrown');
+      } catch (e) {
+        expect((e as SfError).name).to.equal('CompileAgentScriptError');
+      }
+    });
+
+    it('should include errors array in data for non-empty input', () => {
+      const errors: CompilationError[] = [
+        { errorType: 'SyntaxError', description: 'Bad token', lineStart: 1, colStart: 1, lineEnd: 1, colEnd: 5 },
+      ];
+
+      try {
+        throwAgentCompilationError(errors);
+        expect.fail('Expected error to be thrown');
+      } catch (e) {
+        const sfErr = e as SfError;
+        expect(sfErr.data).to.deep.equal({ errors });
+      }
+    });
+
+    it('should include empty array in data for empty input', () => {
+      try {
+        throwAgentCompilationError([]);
+        expect.fail('Expected error to be thrown');
+      } catch (e) {
+        const sfErr = e as SfError;
+        expect(sfErr.data).to.deep.equal([]);
+      }
+    });
+  });
+});
diff --git a/test/testRunnerFactory.test.ts b/test/testRunnerFactory.test.ts
new file mode 100644
index 00000000..92eb43a2
--- /dev/null
+++ b/test/testRunnerFactory.test.ts
@@ -0,0 +1,164 @@
+/*
+ * Copyright 2026, Salesforce, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { expect } from 'chai';
+import sinon from 'sinon';
+import esmock from 'esmock';
+import type { Connection } from '@salesforce/core';
+import type { TestRunnerType } from '@salesforce/agents';
+import type { createTestRunner as CreateTestRunnerFn } from '../src/testRunnerFactory.js';
+
+type MockConnection = Pick<Connection, 'instanceUrl'>;
+
+const makeMockConnection = (): MockConnection => ({ instanceUrl: 'https://test.salesforce.com' });
+
+describe('testRunnerFactory', () => {
+  let detectTestRunnerFromIdStub: sinon.SinonStub;
+  let determineTestRunnerStub: sinon.SinonStub;
+  let AgentTesterStub: sinon.SinonStub;
+  let AgentTesterNGTStub: sinon.SinonStub;
+  let createTestRunner: typeof CreateTestRunnerFn;
+
+  beforeEach(async () => {
+    detectTestRunnerFromIdStub = sinon.stub();
+    determineTestRunnerStub = sinon.stub();
+    AgentTesterStub = sinon.stub().returns({ type: 'testing-center' });
+    AgentTesterNGTStub = sinon.stub().returns({ type: 'agentforce-studio' });
+
+    // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-member-access
+    const { createTestRunner: fn } = await esmock('../src/testRunnerFactory.js', {
+      '@salesforce/agents': {
+        AgentTester: AgentTesterStub,
+        AgentTesterNGT: AgentTesterNGTStub,
+        detectTestRunnerFromId: detectTestRunnerFromIdStub,
+        determineTestRunner: determineTestRunnerStub,
+      },
+    });
+
+    // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
+    createTestRunner = fn as typeof CreateTestRunnerFn;
+  });
+
+  afterEach(() => {
+    sinon.restore();
+  });
+
+  describe('explicit type', () => {
+    it('should use ngt runner when explicitType is "ngt"', async () => {
+      const connection = makeMockConnection() as Connection;
+      const result = await createTestRunner(connection, 'agentforce-studio' as TestRunnerType);
+
+      expect(result.type).to.equal('agentforce-studio');
+      expect(AgentTesterNGTStub.calledOnce).to.be.true;
+      expect(AgentTesterStub.called).to.be.false;
+      expect(detectTestRunnerFromIdStub.called).to.be.false;
+      expect(determineTestRunnerStub.called).to.be.false;
+    });
+
+    it('should use legacy runner when explicitType is "legacy"', async () => {
+      const connection = makeMockConnection() as Connection;
+      const result = await createTestRunner(connection, 'testing-center' as TestRunnerType);
+
+      expect(result.type).to.equal('testing-center');
+      expect(AgentTesterStub.calledOnce).to.be.true;
+      expect(AgentTesterNGTStub.called).to.be.false;
+      expect(detectTestRunnerFromIdStub.called).to.be.false;
+      expect(determineTestRunnerStub.called).to.be.false;
+    });
+  });
+
+  describe('runId-based detection', () => {
+    it('should use ngt runner when runId detects ngt type', async () => {
+      detectTestRunnerFromIdStub.returns('agentforce-studio');
+      const connection = makeMockConnection() as Connection;
+      const result = await createTestRunner(connection, undefined, undefined, '3A2xxxxxxxxxxxx');
+
+      expect(result.type).to.equal('agentforce-studio');
+      expect(AgentTesterNGTStub.calledOnce).to.be.true;
+      expect(determineTestRunnerStub.called).to.be.false;
+    });
+
+    it('should use legacy runner when runId detects legacy type', async () => {
+      detectTestRunnerFromIdStub.returns('testing-center');
+      const connection = makeMockConnection() as Connection;
+      const result = await createTestRunner(connection, undefined, undefined, '4KBxxxxxxxxxxxx');
+
+      expect(result.type).to.equal('testing-center');
+      expect(AgentTesterStub.calledOnce).to.be.true;
+      expect(determineTestRunnerStub.called).to.be.false;
+    });
+
+    it('should fall through to determineTestRunner when runId detection returns null', async () => {
+      detectTestRunnerFromIdStub.returns(null);
+      determineTestRunnerStub.resolves('agentforce-studio');
+      const connection = makeMockConnection() as Connection;
+
+      await createTestRunner(connection, undefined, 'myTestDef', 'unknownId');
+
+      expect(determineTestRunnerStub.calledOnce).to.be.true;
+    });
+  });
+
+  describe('org metadata detection fallback', () => {
+    it('should call determineTestRunner when no explicitType or runId', async () => {
+      determineTestRunnerStub.resolves('agentforce-studio');
+      const connection = makeMockConnection() as Connection;
+      const result = await createTestRunner(connection, undefined, 'myTestDefinition');
+
+      expect(determineTestRunnerStub.calledOnceWith(connection, 'myTestDefinition')).to.be.true;
+      expect(result.type).to.equal('agentforce-studio');
+      expect(AgentTesterNGTStub.calledOnce).to.be.true;
+    });
+
+    it('should call determineTestRunner with undefined testDefinitionName when not provided', async () => {
+      determineTestRunnerStub.resolves('testing-center');
+      const connection = makeMockConnection() as Connection;
+      const result = await createTestRunner(connection);
+
+      expect(determineTestRunnerStub.calledOnceWith(connection, undefined)).to.be.true;
+      expect(result.type).to.equal('testing-center');
+    });
+  });
+
+  describe('runner instantiation', () => {
+    it('should pass connection to AgentTesterNGT', async () => {
+      const connection = makeMockConnection() as Connection;
+      await createTestRunner(connection, 'agentforce-studio' as TestRunnerType);
+
+      expect(AgentTesterNGTStub.calledWithNew()).to.be.true;
+      expect(AgentTesterNGTStub.firstCall.args[0]).to.equal(connection);
+    });
+
+    it('should pass connection to AgentTester', async () => {
+      const connection = makeMockConnection() as Connection;
+      await createTestRunner(connection, 'testing-center' as TestRunnerType);
+
+      expect(AgentTesterStub.calledWithNew()).to.be.true;
+      expect(AgentTesterStub.firstCall.args[0]).to.equal(connection);
+    });
+
+    it('should return the runner instance alongside the type', async () => {
+      const mockRunnerInstance = { poll: sinon.stub() };
+      AgentTesterNGTStub.returns(mockRunnerInstance);
+      const connection = makeMockConnection() as Connection;
+
+      const result = await createTestRunner(connection, 'agentforce-studio' as TestRunnerType);
+
+      expect(result.runner).to.equal(mockRunnerInstance);
+      expect(result.type).to.equal('agentforce-studio');
+    });
+  });
+});

From 8d2e8863f0733b48b5762a4c2284e68185e2b403 Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Tue, 28 Apr 2026 11:40:29 -0600
Subject: [PATCH 05/11] refactor: update method name for ATD

---
 src/commands/agent/test/run.ts | 4 ++--
 src/flags.ts                   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/commands/agent/test/run.ts b/src/commands/agent/test/run.ts
index 1c95c626..05cd5f55 100644
--- a/src/commands/agent/test/run.ts
+++ b/src/commands/agent/test/run.ts
@@ -23,7 +23,7 @@ import {
   AgentTestRunResult,
   FlaggablePrompt,
   makeFlags,
-  promptForAiEvaluationDefinitionApiName,
+  promptForTestDefinitionApiName,
   resultFormatFlag,
   testOutputDirFlag,
   testRunnerTypeFlag,
@@ -105,7 +105,7 @@ export default class AgentTestRun extends SfCommand<AgentTestRunResult> {
     }
 
     const apiName =
-      flags['api-name'] ?? (await promptForAiEvaluationDefinitionApiName(FLAGGABLE_PROMPTS['api-name'], connection));
+      flags['api-name'] ?? (await promptForTestDefinitionApiName(FLAGGABLE_PROMPTS['api-name'], connection));
 
     this.mso = new TestStages({ title: `Agent Test Run: ${apiName}`, jsonEnabled: this.jsonEnabled() });
     this.mso.start();
diff --git a/src/flags.ts b/src/flags.ts
index 3709a511..06d234b0 100644
--- a/src/flags.ts
+++ b/src/flags.ts
@@ -134,7 +134,7 @@ export function traverseForFiles(dirOrDirs: string | string[], suffixes: string[
   return results;
 }
 
-export const promptForAiEvaluationDefinitionApiName = async (
+export const promptForTestDefinitionApiName = async (
   flagDef: FlaggablePrompt,
   connection: Connection
 ): Promise<string> => {

From 4f43e2084fd5016aa9fae1147c0c72d72e84bd6e Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Tue, 28 Apr 2026 12:01:30 -0600
Subject: [PATCH 06/11] chore: review

---
 messages/shared.md             |   6 +-
 src/agentTestCache.ts          |   2 +-
 src/commands/agent/test/run.ts |   5 +-
 src/flags.ts                   |   4 +-
 src/handleTestResults.ts       | 112 +++++++++------------------------
 src/testRunnerFactory.ts       |  12 +---
 test/testRunnerFactory.test.ts |   8 +--
 7 files changed, 42 insertions(+), 107 deletions(-)

diff --git a/messages/shared.md b/messages/shared.md
index 4c28cf79..173e30cf 100644
--- a/messages/shared.md
+++ b/messages/shared.md
@@ -20,13 +20,13 @@ When enabled, includes detailed generated data (such as invoked actions) in the
 
 The generated data is in JSON format and includes the Apex classes or Flows that were invoked, the Salesforce objects that were touched, and so on. Use the JSON structure of this information to build the test case JSONPath expression when using custom evaluations.
 
-# flags.test-runner-type.summary
+# flags.test-runner.summary
 
 Explicitly specify which test runner to use (agentforce-studio or testing-center).
 
-# flags.test-runner-type.description
+# flags.test-runner.description
 
-By default, the command automatically detects which test runner to use based on the test definition metadata type in your org. Use this flag to explicitly specify the runner type. 'agentforce-studio' uses AiTestSuiteDefinition metadata. 'testing-center' uses AiEvaluationDefinition metadata.
+By default, the command automatically detects which test runner to use based on the test definition metadata type in your org. Use this flag to explicitly specify the runner type. 'agentforce-studio' uses AiTestingDefinition metadata. 'testing-center' uses AiEvaluationDefinition metadata.
 
 # error.invalidAgentType
 
diff --git a/src/agentTestCache.ts b/src/agentTestCache.ts
index de7b86a0..62d7ca28 100644
--- a/src/agentTestCache.ts
+++ b/src/agentTestCache.ts
@@ -16,9 +16,9 @@
 
 import { Global, SfError, TTLConfig } from '@salesforce/core';
 import { Duration } from '@salesforce/kit';
+import type { TestRunnerType } from '@salesforce/agents';
 
 type ResultFormat = 'json' | 'human' | 'junit' | 'tap';
-type TestRunnerType = 'agentforce-studio' | 'testing-center';
 
 type CacheContents = {
   runId: string;
diff --git a/src/commands/agent/test/run.ts b/src/commands/agent/test/run.ts
index 05cd5f55..b529f97d 100644
--- a/src/commands/agent/test/run.ts
+++ b/src/commands/agent/test/run.ts
@@ -110,10 +110,7 @@ export default class AgentTestRun extends SfCommand<AgentTestRunResult> {
     this.mso = new TestStages({ title: `Agent Test Run: ${apiName}`, jsonEnabled: this.jsonEnabled() });
     this.mso.start();
 
-    // Determine which test runner to use (NGT or legacy)
-    const result = await createTestRunner(connection, flags['test-runner'], apiName);
-    const agentTester = result.runner;
-    const runnerType = result.type;
+    const { runner: agentTester, type: runnerType } = await createTestRunner(connection, flags['test-runner'], apiName);
 
     let response: AgentTestStartResponse | AgentTestNGTStartResponse;
     try {
diff --git a/src/flags.ts b/src/flags.ts
index 06d234b0..4888d701 100644
--- a/src/flags.ts
+++ b/src/flags.ts
@@ -70,8 +70,8 @@ export const verboseFlag = Flags.boolean({
 
 export const testRunnerTypeFlag = Flags.custom<'agentforce-studio' | 'testing-center'>({
   options: ['agentforce-studio', 'testing-center'],
-  summary: messages.getMessage('flags.test-runner-type.summary'),
-  description: messages.getMessage('flags.test-runner-type.description'),
+  summary: messages.getMessage('flags.test-runner.summary'),
+  description: messages.getMessage('flags.test-runner.description'),
 })();
 
 function validateInput(input: string, validate: (input: string) => boolean | string): never | string {
diff --git a/src/handleTestResults.ts b/src/handleTestResults.ts
index 2fb9ead1..bb9e66fb 100644
--- a/src/handleTestResults.ts
+++ b/src/handleTestResults.ts
@@ -68,11 +68,6 @@ export function readableTime(time: number, decimalPlaces = 2): string {
     return '< 1s';
   }
 
-  // if time < 1000ms, return time in ms
-  if (time < 1000) {
-    return `${time}ms`;
-  }
-
   // if time < 60s, return time in seconds
   if (time < 60_000) {
     return `${truncate(time / 1000, decimalPlaces)}s`;
@@ -99,6 +94,14 @@ type ParsedScorerResponse = {
   expectedValue?: string;
 };
 
+function parseScorerResponse(raw: string): ParsedScorerResponse {
+  try {
+    return JSON.parse(raw) as ParsedScorerResponse;
+  } catch {
+    return {};
+  }
+}
+
 function humanFormatNGT(results: AgentTestNGTResultsResponse): string {
   const ux = new Ux();
   const tables: string[] = [];
@@ -113,12 +116,7 @@ function humanFormatNGT(results: AgentTestNGTResultsResponse): string {
     }
 
     const scorerRows = testCase.testScorerResults.map((scorer) => {
-      let parsed: ParsedScorerResponse = {};
-      try {
-        parsed = JSON.parse(scorer.scorerResponse) as ParsedScorerResponse;
-      } catch {
-        // ignore
-      }
+      const parsed = parseScorerResponse(scorer.scorerResponse);
       return {
         scorer: scorer.scorerName,
         result: parsed.status === 'PASS' ? ansis.green('Pass') : ansis.red('Fail'),
@@ -148,13 +146,7 @@ function humanFormatNGT(results: AgentTestNGTResultsResponse): string {
 
   const totalCases = results.testCases.length;
   const passCases = results.testCases.filter((tc) =>
-    tc.testScorerResults.every((s) => {
-      try {
-        return (JSON.parse(s.scorerResponse) as ParsedScorerResponse).status === 'PASS';
-      } catch {
-        return false;
-      }
-    })
+    tc.testScorerResults.every((s) => parseScorerResponse(s.scorerResponse).status === 'PASS')
   ).length;
 
   const summary = makeSimpleTable(
@@ -174,13 +166,7 @@ function junitFormatNGT(results: AgentTestNGTResultsResponse): string {
   const builder = new XMLBuilder({ format: true, attributeNamePrefix: '$', ignoreAttributes: false });
   const testCount = results.testCases.length;
   const failureCount = results.testCases.filter((tc) =>
-    tc.testScorerResults.some((s) => {
-      try {
-        return (JSON.parse(s.scorerResponse) as ParsedScorerResponse).status !== 'PASS';
-      } catch {
-        return true;
-      }
-    })
+    tc.testScorerResults.some((s) => parseScorerResponse(s.scorerResponse).status !== 'PASS')
   ).length;
 
   const suites = builder.build({
@@ -194,12 +180,7 @@ function junitFormatNGT(results: AgentTestNGTResultsResponse): string {
         $assertions: tc.testScorerResults.length,
         failure: tc.testScorerResults
           .map((s) => {
-            let parsed: ParsedScorerResponse = {};
-            try {
-              parsed = JSON.parse(s.scorerResponse) as ParsedScorerResponse;
-            } catch {
-              // ignore
-            }
+            const parsed = parseScorerResponse(s.scorerResponse);
             if (parsed.status !== 'PASS') {
               return { $message: parsed.reasoning ?? 'Unknown error', $name: s.scorerName };
             }
@@ -218,12 +199,7 @@ function tapFormatNGT(results: AgentTestNGTResultsResponse): string {
 
   for (const tc of results.testCases) {
     for (const scorer of tc.testScorerResults) {
-      let parsed: ParsedScorerResponse = {};
-      try {
-        parsed = JSON.parse(scorer.scorerResponse) as ParsedScorerResponse;
-      } catch {
-        // ignore
-      }
+      const parsed = parseScorerResponse(scorer.scorerResponse);
       const pass = parsed.status === 'PASS';
       expectationCount++;
       lines.push(`${pass ? 'ok' : 'not ok'} ${expectationCount} ${tc.testNumber}.${scorer.scorerName}`);
@@ -238,7 +214,7 @@ function tapFormatNGT(results: AgentTestNGTResultsResponse): string {
     }
   }
 
-  return `Tap Version 14\n1..${expectationCount}\n${lines.join('\n')}`;
+  return `TAP version 13\n1..${expectationCount}\n${lines.join('\n')}`;
 }
 
 function convertNGTTestResultsToFormat(results: AgentTestNGTResultsResponse, format: 'json' | 'junit' | 'tap'): string {
@@ -414,52 +390,20 @@ export async function handleTestResults({
   const ux = new Ux({ jsonEnabled });
 
   if (!isLegacyResponse(results)) {
-    if (format === 'human') {
-      const formatted = humanFormatNGT(results);
-      if (outputDir) {
-        const file = `test-result-${id}.txt`;
-        await writeFileToDir(outputDir, file, stripVTControlCharacters(formatted));
-        ux.log(`Created human-readable file at ${join(outputDir, file)}`);
-      } else {
-        ux.log(formatted);
-      }
-      return;
-    }
-
-    if (format === 'json') {
-      const formatted = convertNGTTestResultsToFormat(results, 'json');
-      if (outputDir) {
-        const file = `test-result-${id}.json`;
-        await writeFileToDir(outputDir, file, formatted);
-        ux.log(`Created JSON file at ${join(outputDir, file)}`);
-      } else {
-        ux.log(formatted);
-      }
-      return;
-    }
-
-    if (format === 'junit') {
-      const formatted = convertNGTTestResultsToFormat(results, 'junit');
-      if (outputDir) {
-        const file = `test-result-${id}.xml`;
-        await writeFileToDir(outputDir, file, formatted);
-        ux.log(`Created JUnit file at ${join(outputDir, file)}`);
-      } else {
-        ux.log(formatted);
-      }
-      return;
-    }
-
-    if (format === 'tap') {
-      const formatted = convertNGTTestResultsToFormat(results, 'tap');
-      if (outputDir) {
-        const file = `test-result-${id}.txt`;
-        await writeFileToDir(outputDir, file, formatted);
-        ux.log(`Created TAP file at ${join(outputDir, file)}`);
-      } else {
-        ux.log(formatted);
-      }
-      return;
+    const ngtFormatConfig = {
+      human: { ext: 'txt', label: 'human-readable', get: () => humanFormatNGT(results), strip: true },
+      json: { ext: 'json', label: 'JSON', get: () => convertNGTTestResultsToFormat(results, 'json'), strip: false },
+      junit: { ext: 'xml', label: 'JUnit', get: () => convertNGTTestResultsToFormat(results, 'junit'), strip: false },
+      tap: { ext: 'txt', label: 'TAP', get: () => convertNGTTestResultsToFormat(results, 'tap'), strip: false },
+    } as const;
+    const cfg = ngtFormatConfig[format];
+    const formatted = cfg.get();
+    if (outputDir) {
+      const file = `test-result-${id}.${cfg.ext}`;
+      await writeFileToDir(outputDir, file, cfg.strip ? stripVTControlCharacters(formatted) : formatted);
+      ux.log(`Created ${cfg.label} file at ${join(outputDir, file)}`);
+    } else {
+      ux.log(formatted);
     }
     return;
   }
diff --git a/src/testRunnerFactory.ts b/src/testRunnerFactory.ts
index 4edd797a..43218324 100644
--- a/src/testRunnerFactory.ts
+++ b/src/testRunnerFactory.ts
@@ -45,15 +45,9 @@ export async function createTestRunner(
   testDefinitionName?: string,
   runId?: string
 ): Promise<{ runner: TestRunnerInstance; type: TestRunnerType }> {
-  let runnerType: TestRunnerType;
-
-  if (explicitType) {
-    runnerType = explicitType;
-  } else if (runId && detectTestRunnerFromId(runId)) {
-    runnerType = detectTestRunnerFromId(runId)!;
-  } else {
-    runnerType = await determineTestRunner(connection, testDefinitionName);
-  }
+  const detected = runId ? detectTestRunnerFromId(runId) : undefined;
+  const runnerType: TestRunnerType =
+    explicitType ?? detected ?? (await determineTestRunner(connection, testDefinitionName));
 
   const runner = runnerType === 'agentforce-studio' ? new AgentTesterNGT(connection) : new AgentTester(connection);
   return { runner, type: runnerType };
diff --git a/test/testRunnerFactory.test.ts b/test/testRunnerFactory.test.ts
index 92eb43a2..ef0596e4 100644
--- a/test/testRunnerFactory.test.ts
+++ b/test/testRunnerFactory.test.ts
@@ -57,7 +57,7 @@ describe('testRunnerFactory', () => {
   });
 
   describe('explicit type', () => {
-    it('should use ngt runner when explicitType is "ngt"', async () => {
+    it('should use agentforce-studio runner when explicitType is "agentforce-studio"', async () => {
       const connection = makeMockConnection() as Connection;
       const result = await createTestRunner(connection, 'agentforce-studio' as TestRunnerType);
 
@@ -68,7 +68,7 @@ describe('testRunnerFactory', () => {
       expect(determineTestRunnerStub.called).to.be.false;
     });
 
-    it('should use legacy runner when explicitType is "legacy"', async () => {
+    it('should use testing-center runner when explicitType is "testing-center"', async () => {
       const connection = makeMockConnection() as Connection;
       const result = await createTestRunner(connection, 'testing-center' as TestRunnerType);
 
@@ -81,7 +81,7 @@ describe('testRunnerFactory', () => {
   });
 
   describe('runId-based detection', () => {
-    it('should use ngt runner when runId detects ngt type', async () => {
+    it('should use agentforce-studio runner when runId detects agentforce-studio type', async () => {
       detectTestRunnerFromIdStub.returns('agentforce-studio');
       const connection = makeMockConnection() as Connection;
       const result = await createTestRunner(connection, undefined, undefined, '3A2xxxxxxxxxxxx');
@@ -91,7 +91,7 @@ describe('testRunnerFactory', () => {
       expect(determineTestRunnerStub.called).to.be.false;
     });
 
-    it('should use legacy runner when runId detects legacy type', async () => {
+    it('should use testing-center runner when runId detects testing-center type', async () => {
       detectTestRunnerFromIdStub.returns('testing-center');
       const connection = makeMockConnection() as Connection;
       const result = await createTestRunner(connection, undefined, undefined, '4KBxxxxxxxxxxxx');

From a9c47056a4902409d7691a9d0a07fb6132230db9 Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Tue, 28 Apr 2026 13:13:39 -0600
Subject: [PATCH 07/11] chore: review II

---
 src/commands/agent/test/results.ts | 4 ++--
 src/commands/agent/test/resume.ts  | 4 ++--
 src/commands/agent/test/run.ts     | 7 +++----
 src/flags.ts                       | 2 +-
 src/testRunnerFactory.ts           | 2 +-
 5 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/commands/agent/test/results.ts b/src/commands/agent/test/results.ts
index 451c475d..a3392ea5 100644
--- a/src/commands/agent/test/results.ts
+++ b/src/commands/agent/test/results.ts
@@ -17,7 +17,7 @@
 import { SfCommand, Flags, toHelpSection } from '@salesforce/sf-plugins-core';
 import { EnvironmentVariable, Messages, SfError } from '@salesforce/core';
 import { AgentTestResultsResponse, AgentTestNGTResultsResponse } from '@salesforce/agents';
-import { resultFormatFlag, testOutputDirFlag, testRunnerTypeFlag, verboseFlag } from '../../../flags.js';
+import { resultFormatFlag, testOutputDirFlag, testRunnerFlag, verboseFlag } from '../../../flags.js';
 import { handleTestResults } from '../../../handleTestResults.js';
 import { createTestRunner } from '../../../testRunnerFactory.js';
 
@@ -52,7 +52,7 @@ export default class AgentTestResults extends SfCommand<AgentTestResultsResult>
     }),
     'result-format': resultFormatFlag(),
     'output-dir': testOutputDirFlag(),
-    'test-runner': testRunnerTypeFlag,
+    'test-runner': testRunnerFlag,
     verbose: verboseFlag,
   };
 
diff --git a/src/commands/agent/test/resume.ts b/src/commands/agent/test/resume.ts
index 8e2ec3cd..256241b1 100644
--- a/src/commands/agent/test/resume.ts
+++ b/src/commands/agent/test/resume.ts
@@ -23,7 +23,7 @@ import {
   AgentTestRunResult,
   resultFormatFlag,
   testOutputDirFlag,
-  testRunnerTypeFlag,
+  testRunnerFlag,
   verboseFlag,
 } from '../../../flags.js';
 import { handleTestResults } from '../../../handleTestResults.js';
@@ -71,7 +71,7 @@ export default class AgentTestResume extends SfCommand<AgentTestRunResult> {
     }),
     'result-format': resultFormatFlag(),
     'output-dir': testOutputDirFlag(),
-    'test-runner': testRunnerTypeFlag,
+    'test-runner': testRunnerFlag,
     verbose: verboseFlag,
   };
 
diff --git a/src/commands/agent/test/run.ts b/src/commands/agent/test/run.ts
index b529f97d..4dbbb968 100644
--- a/src/commands/agent/test/run.ts
+++ b/src/commands/agent/test/run.ts
@@ -26,7 +26,7 @@ import {
   promptForTestDefinitionApiName,
   resultFormatFlag,
   testOutputDirFlag,
-  testRunnerTypeFlag,
+  testRunnerFlag,
   verboseFlag,
 } from '../../../flags.js';
 import { AgentTestCache } from '../../../agentTestCache.js';
@@ -90,7 +90,7 @@ export default class AgentTestRun extends SfCommand<AgentTestRunResult> {
     }),
     'result-format': resultFormatFlag(),
     'output-dir': testOutputDirFlag(),
-    'test-runner': testRunnerTypeFlag,
+    'test-runner': testRunnerFlag,
     verbose: verboseFlag,
   };
 
@@ -189,8 +189,7 @@ export default class AgentTestRun extends SfCommand<AgentTestRunResult> {
         process.exitCode = 1;
       }
 
-      // eslint-disable-next-line @typescript-eslint/no-unnecessary-type-assertion
-      return { ...detailsResponse!, status: 'COMPLETED', runId: response.runId } as AgentTestRunResult;
+      return { ...detailsResponse, status: 'COMPLETED', runId: response.runId } as AgentTestRunResult;
     } else {
       this.mso.stop();
       this.log(
diff --git a/src/flags.ts b/src/flags.ts
index 4888d701..b6902276 100644
--- a/src/flags.ts
+++ b/src/flags.ts
@@ -68,7 +68,7 @@ export const verboseFlag = Flags.boolean({
   description: messages.getMessage('flags.verbose.description'),
 });
 
-export const testRunnerTypeFlag = Flags.custom<'agentforce-studio' | 'testing-center'>({
+export const testRunnerFlag = Flags.custom<'agentforce-studio' | 'testing-center'>({
   options: ['agentforce-studio', 'testing-center'],
   summary: messages.getMessage('flags.test-runner.summary'),
   description: messages.getMessage('flags.test-runner.description'),
diff --git a/src/testRunnerFactory.ts b/src/testRunnerFactory.ts
index 43218324..43ee41f9 100644
--- a/src/testRunnerFactory.ts
+++ b/src/testRunnerFactory.ts
@@ -29,7 +29,7 @@ export type TestRunnerInstance = AgentTester | AgentTesterNGT;
  * Creates the appropriate test runner (agentforce-studio or testing-center) based on detection or explicit type.
  *
  * Detection priority:
- * 1. `explicitType` — user-supplied `--test-runner-type` flag, always wins
+ * 1. `explicitType` — user-supplied `--test-runner` flag, always wins
  * 2. `runId` prefix — instant detection from the Salesforce ID prefix (`3A2` = agentforce-studio, `4KB` = testing-center), no network call
  * 3. `testDefinitionName` — org metadata query via `determineTestRunner` (network call, used as last resort)
  *

From 9b479d5bd411d4bb9f0d126ccc8450ff57238281 Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Tue, 28 Apr 2026 14:28:49 -0600
Subject: [PATCH 08/11] chore: catch ambiguous error, and suggest flag

---
 src/testRunnerFactory.ts | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/testRunnerFactory.ts b/src/testRunnerFactory.ts
index 43ee41f9..8664f2b8 100644
--- a/src/testRunnerFactory.ts
+++ b/src/testRunnerFactory.ts
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-import { Connection } from '@salesforce/core';
+import { Connection, SfError } from '@salesforce/core';
 import {
   AgentTester,
   AgentTesterNGT,
@@ -46,8 +46,22 @@ export async function createTestRunner(
   runId?: string
 ): Promise<{ runner: TestRunnerInstance; type: TestRunnerType }> {
   const detected = runId ? detectTestRunnerFromId(runId) : undefined;
-  const runnerType: TestRunnerType =
-    explicitType ?? detected ?? (await determineTestRunner(connection, testDefinitionName));
+  let runnerType: TestRunnerType;
+  try {
+    runnerType = explicitType ?? detected ?? (await determineTestRunner(connection, testDefinitionName));
+  } catch (e) {
+    const wrapped = SfError.wrap(e);
+    if (wrapped.name === 'AmbiguousTestDefinition') {
+      throw new SfError(
+        wrapped.message,
+        wrapped.name,
+        ['Use --test-runner to explicitly specify the runner type (agentforce-studio or testing-center)'],
+        undefined,
+        wrapped
+      );
+    }
+    throw wrapped;
+  }
 
   const runner = runnerType === 'agentforce-studio' ? new AgentTesterNGT(connection) : new AgentTester(connection);
   return { runner, type: runnerType };

From 7419fafb228624e12b56f8dc15cb47e258eddb84 Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Tue, 28 Apr 2026 14:35:04 -0600
Subject: [PATCH 09/11] chore: snapshot/schemas

---
 command-snapshot.json           | 14 +++++++-
 schemas/agent-test-results.json | 57 ++++++++++++++++++++++++++++++++-
 schemas/agent-test-resume.json  | 52 ++++++++++++++++++++++++++++++
 schemas/agent-test-run.json     | 52 ++++++++++++++++++++++++++++++
 4 files changed, 173 insertions(+), 2 deletions(-)

diff --git a/command-snapshot.json b/command-snapshot.json
index 050e1343..b815bd34 100644
--- a/command-snapshot.json
+++ b/command-snapshot.json
@@ -182,7 +182,17 @@
     "command": "agent:test:results",
     "flagAliases": [],
     "flagChars": ["d", "i", "o"],
-    "flags": ["api-version", "flags-dir", "job-id", "json", "output-dir", "result-format", "target-org", "verbose"],
+    "flags": [
+      "api-version",
+      "flags-dir",
+      "job-id",
+      "json",
+      "output-dir",
+      "result-format",
+      "target-org",
+      "test-runner",
+      "verbose"
+    ],
     "plugin": "@salesforce/plugin-agent"
   },
   {
@@ -198,6 +208,7 @@
       "output-dir",
       "result-format",
       "target-org",
+      "test-runner",
       "use-most-recent",
       "verbose",
       "wait"
@@ -217,6 +228,7 @@
       "output-dir",
       "result-format",
       "target-org",
+      "test-runner",
       "verbose",
       "wait"
     ],
diff --git a/schemas/agent-test-results.json b/schemas/agent-test-results.json
index 50332090..a176db01 100644
--- a/schemas/agent-test-results.json
+++ b/schemas/agent-test-results.json
@@ -3,7 +3,14 @@
   "$ref": "#/definitions/AgentTestResultsResult",
   "definitions": {
     "AgentTestResultsResult": {
-      "$ref": "#/definitions/AgentTestResultsResponse"
+      "anyOf": [
+        {
+          "$ref": "#/definitions/AgentTestResultsResponse"
+        },
+        {
+          "$ref": "#/definitions/AgentTestNGTResultsResponse"
+        }
+      ]
     },
     "AgentTestResultsResponse": {
       "type": "object",
@@ -148,6 +155,54 @@
       },
       "required": ["status", "startTime", "inputs", "generatedData", "testResults", "testNumber"],
       "additionalProperties": false
+    },
+    "AgentTestNGTResultsResponse": {
+      "type": "object",
+      "properties": {
+        "status": {
+          "type": "string"
+        },
+        "testCases": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/NGTTestCaseResult"
+          }
+        }
+      },
+      "required": ["status", "testCases"],
+      "additionalProperties": false
+    },
+    "NGTTestCaseResult": {
+      "type": "object",
+      "properties": {
+        "subjectResponse": {
+          "type": "string"
+        },
+        "testNumber": {
+          "type": "number"
+        },
+        "testScorerResults": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/TestScorerResult"
+          }
+        }
+      },
+      "required": ["subjectResponse", "testNumber", "testScorerResults"],
+      "additionalProperties": false
+    },
+    "TestScorerResult": {
+      "type": "object",
+      "properties": {
+        "scorerName": {
+          "type": "string"
+        },
+        "scorerResponse": {
+          "type": "string"
+        }
+      },
+      "required": ["scorerName", "scorerResponse"],
+      "additionalProperties": false
     }
   }
 }
diff --git a/schemas/agent-test-resume.json b/schemas/agent-test-resume.json
index 3b73ce7d..5f195569 100644
--- a/schemas/agent-test-resume.json
+++ b/schemas/agent-test-resume.json
@@ -65,6 +65,26 @@
             }
           },
           "required": ["runId", "startTime", "status", "subjectName", "testCases"]
+        },
+        {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "status": {
+              "type": "string",
+              "const": "COMPLETED"
+            },
+            "runId": {
+              "type": "string"
+            },
+            "testCases": {
+              "type": "array",
+              "items": {
+                "$ref": "#/definitions/NGTTestCaseResult"
+              }
+            }
+          },
+          "required": ["runId", "status", "testCases"]
         }
       ]
     },
@@ -183,6 +203,38 @@
       },
       "required": ["status", "startTime", "inputs", "generatedData", "testResults", "testNumber"],
       "additionalProperties": false
+    },
+    "NGTTestCaseResult": {
+      "type": "object",
+      "properties": {
+        "subjectResponse": {
+          "type": "string"
+        },
+        "testNumber": {
+          "type": "number"
+        },
+        "testScorerResults": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/TestScorerResult"
+          }
+        }
+      },
+      "required": ["subjectResponse", "testNumber", "testScorerResults"],
+      "additionalProperties": false
+    },
+    "TestScorerResult": {
+      "type": "object",
+      "properties": {
+        "scorerName": {
+          "type": "string"
+        },
+        "scorerResponse": {
+          "type": "string"
+        }
+      },
+      "required": ["scorerName", "scorerResponse"],
+      "additionalProperties": false
     }
   }
 }
diff --git a/schemas/agent-test-run.json b/schemas/agent-test-run.json
index 3b73ce7d..5f195569 100644
--- a/schemas/agent-test-run.json
+++ b/schemas/agent-test-run.json
@@ -65,6 +65,26 @@
             }
           },
           "required": ["runId", "startTime", "status", "subjectName", "testCases"]
+        },
+        {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "status": {
+              "type": "string",
+              "const": "COMPLETED"
+            },
+            "runId": {
+              "type": "string"
+            },
+            "testCases": {
+              "type": "array",
+              "items": {
+                "$ref": "#/definitions/NGTTestCaseResult"
+              }
+            }
+          },
+          "required": ["runId", "status", "testCases"]
         }
       ]
     },
@@ -183,6 +203,38 @@
       },
       "required": ["status", "startTime", "inputs", "generatedData", "testResults", "testNumber"],
       "additionalProperties": false
+    },
+    "NGTTestCaseResult": {
+      "type": "object",
+      "properties": {
+        "subjectResponse": {
+          "type": "string"
+        },
+        "testNumber": {
+          "type": "number"
+        },
+        "testScorerResults": {
+          "type": "array",
+          "items": {
+            "$ref": "#/definitions/TestScorerResult"
+          }
+        }
+      },
+      "required": ["subjectResponse", "testNumber", "testScorerResults"],
+      "additionalProperties": false
+    },
+    "TestScorerResult": {
+      "type": "object",
+      "properties": {
+        "scorerName": {
+          "type": "string"
+        },
+        "scorerResponse": {
+          "type": "string"
+        }
+      },
+      "required": ["scorerName", "scorerResponse"],
+      "additionalProperties": false
     }
   }
 }

From 1bef698290fb276a5eee3b8b7a40f82e9c76aa3d Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Tue, 28 Apr 2026 14:42:28 -0600
Subject: [PATCH 10/11] chore: dry up testrunner factory

---
 src/testRunnerFactory.ts       |  29 +------
 test/testRunnerFactory.test.ts | 150 +++++++++++++--------------------
 2 files changed, 59 insertions(+), 120 deletions(-)

diff --git a/src/testRunnerFactory.ts b/src/testRunnerFactory.ts
index 8664f2b8..6e799ccc 100644
--- a/src/testRunnerFactory.ts
+++ b/src/testRunnerFactory.ts
@@ -15,40 +15,18 @@
  */
 
 import { Connection, SfError } from '@salesforce/core';
-import {
-  AgentTester,
-  AgentTesterNGT,
-  detectTestRunnerFromId,
-  determineTestRunner,
-  TestRunnerType,
-} from '@salesforce/agents';
+import { createAgentTester, AgentTester, AgentTesterNGT, type TestRunnerType } from '@salesforce/agents';
 
 export type TestRunnerInstance = AgentTester | AgentTesterNGT;
 
-/**
- * Creates the appropriate test runner (agentforce-studio or testing-center) based on detection or explicit type.
- *
- * Detection priority:
- * 1. `explicitType` — user-supplied `--test-runner` flag, always wins
- * 2. `runId` prefix — instant detection from the Salesforce ID prefix (`3A2` = agentforce-studio, `4KB` = testing-center), no network call
- * 3. `testDefinitionName` — org metadata query via `determineTestRunner` (network call, used as last resort)
- *
- * @param connection - Salesforce connection
- * @param explicitType - Optional explicit runner type (bypasses all detection)
- * @param testDefinitionName - Optional test name for org metadata detection
- * @param runId - Optional existing run ID; prefix is used for instant detection
- * @returns Object containing the runner instance and its type
- */
 export async function createTestRunner(
   connection: Connection,
   explicitType?: TestRunnerType,
   testDefinitionName?: string,
   runId?: string
 ): Promise<{ runner: TestRunnerInstance; type: TestRunnerType }> {
-  const detected = runId ? detectTestRunnerFromId(runId) : undefined;
-  let runnerType: TestRunnerType;
   try {
-    runnerType = explicitType ?? detected ?? (await determineTestRunner(connection, testDefinitionName));
+    return await createAgentTester(connection, { explicitType, runId, testDefinitionName });
   } catch (e) {
     const wrapped = SfError.wrap(e);
     if (wrapped.name === 'AmbiguousTestDefinition') {
@@ -62,7 +40,4 @@ export async function createTestRunner(
     }
     throw wrapped;
   }
-
-  const runner = runnerType === 'agentforce-studio' ? new AgentTesterNGT(connection) : new AgentTester(connection);
-  return { runner, type: runnerType };
 }
diff --git a/test/testRunnerFactory.test.ts b/test/testRunnerFactory.test.ts
index ef0596e4..1c1ebb96 100644
--- a/test/testRunnerFactory.test.ts
+++ b/test/testRunnerFactory.test.ts
@@ -17,34 +17,25 @@
 import { expect } from 'chai';
 import sinon from 'sinon';
 import esmock from 'esmock';
+import { SfError } from '@salesforce/core';
 import type { Connection } from '@salesforce/core';
 import type { TestRunnerType } from '@salesforce/agents';
 import type { createTestRunner as CreateTestRunnerFn } from '../src/testRunnerFactory.js';
 
 type MockConnection = Pick<Connection, 'instanceUrl'>;
-
 const makeMockConnection = (): MockConnection => ({ instanceUrl: 'https://test.salesforce.com' });
 
 describe('testRunnerFactory', () => {
-  let detectTestRunnerFromIdStub: sinon.SinonStub;
-  let determineTestRunnerStub: sinon.SinonStub;
-  let AgentTesterStub: sinon.SinonStub;
-  let AgentTesterNGTStub: sinon.SinonStub;
+  let createAgentTesterStub: sinon.SinonStub;
   let createTestRunner: typeof CreateTestRunnerFn;
 
   beforeEach(async () => {
-    detectTestRunnerFromIdStub = sinon.stub();
-    determineTestRunnerStub = sinon.stub();
-    AgentTesterStub = sinon.stub().returns({ type: 'testing-center' });
-    AgentTesterNGTStub = sinon.stub().returns({ type: 'agentforce-studio' });
+    createAgentTesterStub = sinon.stub();
 
     // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment, @typescript-eslint/no-unsafe-member-access
     const { createTestRunner: fn } = await esmock('../src/testRunnerFactory.js', {
       '@salesforce/agents': {
-        AgentTester: AgentTesterStub,
-        AgentTesterNGT: AgentTesterNGTStub,
-        detectTestRunnerFromId: detectTestRunnerFromIdStub,
-        determineTestRunner: determineTestRunnerStub,
+        createAgentTester: createAgentTesterStub,
       },
     });
 
@@ -56,109 +47,82 @@ describe('testRunnerFactory', () => {
     sinon.restore();
   });
 
-  describe('explicit type', () => {
-    it('should use agentforce-studio runner when explicitType is "agentforce-studio"', async () => {
+  describe('argument passthrough', () => {
+    it('passes explicitType, runId, and testDefinitionName to createAgentTester', async () => {
+      const mockResult = { runner: {}, type: 'agentforce-studio' as TestRunnerType };
+      createAgentTesterStub.resolves(mockResult);
       const connection = makeMockConnection() as Connection;
-      const result = await createTestRunner(connection, 'agentforce-studio' as TestRunnerType);
 
-      expect(result.type).to.equal('agentforce-studio');
-      expect(AgentTesterNGTStub.calledOnce).to.be.true;
-      expect(AgentTesterStub.called).to.be.false;
-      expect(detectTestRunnerFromIdStub.called).to.be.false;
-      expect(determineTestRunnerStub.called).to.be.false;
-    });
+      await createTestRunner(connection, 'agentforce-studio', 'myTest', '3A2xxx');
 
-    it('should use testing-center runner when explicitType is "testing-center"', async () => {
-      const connection = makeMockConnection() as Connection;
-      const result = await createTestRunner(connection, 'testing-center' as TestRunnerType);
-
-      expect(result.type).to.equal('testing-center');
-      expect(AgentTesterStub.calledOnce).to.be.true;
-      expect(AgentTesterNGTStub.called).to.be.false;
-      expect(detectTestRunnerFromIdStub.called).to.be.false;
-      expect(determineTestRunnerStub.called).to.be.false;
+      expect(
+        createAgentTesterStub.calledOnceWith(connection, {
+          explicitType: 'agentforce-studio',
+          runId: '3A2xxx',
+          testDefinitionName: 'myTest',
+        })
+      ).to.be.true;
     });
-  });
 
-  describe('runId-based detection', () => {
-    it('should use agentforce-studio runner when runId detects agentforce-studio type', async () => {
-      detectTestRunnerFromIdStub.returns('agentforce-studio');
+    it('passes undefined fields when not provided', async () => {
+      createAgentTesterStub.resolves({ runner: {}, type: 'testing-center' as TestRunnerType });
       const connection = makeMockConnection() as Connection;
-      const result = await createTestRunner(connection, undefined, undefined, '3A2xxxxxxxxxxxx');
 
-      expect(result.type).to.equal('agentforce-studio');
-      expect(AgentTesterNGTStub.calledOnce).to.be.true;
-      expect(determineTestRunnerStub.called).to.be.false;
-    });
-
-    it('should use testing-center runner when runId detects testing-center type', async () => {
-      detectTestRunnerFromIdStub.returns('testing-center');
-      const connection = makeMockConnection() as Connection;
-      const result = await createTestRunner(connection, undefined, undefined, '4KBxxxxxxxxxxxx');
+      await createTestRunner(connection);
 
-      expect(result.type).to.equal('testing-center');
-      expect(AgentTesterStub.calledOnce).to.be.true;
-      expect(determineTestRunnerStub.called).to.be.false;
+      expect(
+        createAgentTesterStub.calledOnceWith(connection, {
+          explicitType: undefined,
+          runId: undefined,
+          testDefinitionName: undefined,
+        })
+      ).to.be.true;
     });
 
-    it('should fall through to determineTestRunner when runId detection returns null', async () => {
-      detectTestRunnerFromIdStub.returns(null);
-      determineTestRunnerStub.resolves('agentforce-studio');
+    it('returns the result from createAgentTester', async () => {
+      const mockRunner = { poll: sinon.stub() };
+      const mockResult = { runner: mockRunner, type: 'agentforce-studio' as TestRunnerType };
+      createAgentTesterStub.resolves(mockResult);
       const connection = makeMockConnection() as Connection;
 
-      await createTestRunner(connection, undefined, 'myTestDef', 'unknownId');
-
-      expect(determineTestRunnerStub.calledOnce).to.be.true;
-    });
-  });
+      const result = await createTestRunner(connection, 'agentforce-studio');
 
-  describe('org metadata detection fallback', () => {
-    it('should call determineTestRunner when no explicitType or runId', async () => {
-      determineTestRunnerStub.resolves('agentforce-studio');
-      const connection = makeMockConnection() as Connection;
-      const result = await createTestRunner(connection, undefined, 'myTestDefinition');
-
-      expect(determineTestRunnerStub.calledOnceWith(connection, 'myTestDefinition')).to.be.true;
+      expect(result.runner).to.equal(mockRunner);
       expect(result.type).to.equal('agentforce-studio');
-      expect(AgentTesterNGTStub.calledOnce).to.be.true;
-    });
-
-    it('should call determineTestRunner with undefined testDefinitionName when not provided', async () => {
-      determineTestRunnerStub.resolves('testing-center');
-      const connection = makeMockConnection() as Connection;
-      const result = await createTestRunner(connection);
-
-      expect(determineTestRunnerStub.calledOnceWith(connection, undefined)).to.be.true;
-      expect(result.type).to.equal('testing-center');
     });
   });
 
-  describe('runner instantiation', () => {
-    it('should pass connection to AgentTesterNGT', async () => {
+  describe('AmbiguousTestDefinition error handling', () => {
+    it('re-throws with --test-runner action hint', async () => {
+      const original = new SfError('MySuite exists in both metadata types', 'AmbiguousTestDefinition');
+      createAgentTesterStub.rejects(original);
       const connection = makeMockConnection() as Connection;
-      await createTestRunner(connection, 'agentforce-studio' as TestRunnerType);
 
-      expect(AgentTesterNGTStub.calledWithNew()).to.be.true;
-      expect(AgentTesterNGTStub.firstCall.args[0]).to.equal(connection);
+      try {
+        await createTestRunner(connection, undefined, 'MySuite');
+        expect.fail('Expected error was not thrown');
+      } catch (err) {
+        expect(err).to.be.instanceOf(SfError);
+        const sfErr = err as SfError;
+        expect(sfErr.name).to.equal('AmbiguousTestDefinition');
+        expect(sfErr.actions).to.include(
+          'Use --test-runner to explicitly specify the runner type (agentforce-studio or testing-center)'
+        );
+        expect(sfErr.cause).to.equal(original);
+      }
     });
 
-    it('should pass connection to AgentTester', async () => {
+    it('passes through non-AmbiguousTestDefinition errors unchanged', async () => {
+      const original = new SfError('Network error', 'NetworkError');
+      createAgentTesterStub.rejects(original);
       const connection = makeMockConnection() as Connection;
-      await createTestRunner(connection, 'testing-center' as TestRunnerType);
 
-      expect(AgentTesterStub.calledWithNew()).to.be.true;
-      expect(AgentTesterStub.firstCall.args[0]).to.equal(connection);
-    });
-
-    it('should return the runner instance alongside the type', async () => {
-      const mockRunnerInstance = { poll: sinon.stub() };
-      AgentTesterNGTStub.returns(mockRunnerInstance);
-      const connection = makeMockConnection() as Connection;
-
-      const result = await createTestRunner(connection, 'agentforce-studio' as TestRunnerType);
-
-      expect(result.runner).to.equal(mockRunnerInstance);
-      expect(result.type).to.equal('agentforce-studio');
+      try {
+        await createTestRunner(connection, undefined, 'MySuite');
+        expect.fail('Expected error was not thrown');
+      } catch (err) {
+        expect(err).to.equal(original);
+      }
     });
   });
 });

From 253ae02547f761b2036ea0145e72633cc6092343 Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Tue, 28 Apr 2026 15:14:49 -0600
Subject: [PATCH 11/11] test: add NUT

---
 test/nuts/z4.agent.test.ngt.nut.ts | 245 +++++++++++++++++++++++++++++
 1 file changed, 245 insertions(+)
 create mode 100644 test/nuts/z4.agent.test.ngt.nut.ts

diff --git a/test/nuts/z4.agent.test.ngt.nut.ts b/test/nuts/z4.agent.test.ngt.nut.ts
new file mode 100644
index 00000000..161927c7
--- /dev/null
+++ b/test/nuts/z4.agent.test.ngt.nut.ts
@@ -0,0 +1,245 @@
+/*
+ * Copyright 2026, Salesforce, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { writeFileSync, mkdirSync } from 'node:fs';
+import { join } from 'node:path';
+import { expect } from 'chai';
+import { execCmd, Duration, TestSession } from '@salesforce/cli-plugins-testkit';
+import { ComponentSetBuilder } from '@salesforce/source-deploy-retrieve';
+import { Agent } from '@salesforce/agents';
+import { Org } from '@salesforce/core';
+import { AgentTestCache } from '../../src/agentTestCache.js';
+import type { AgentTestListResult } from '../../src/commands/agent/test/list.js';
+import type { AgentTestResultsResult } from '../../src/commands/agent/test/results.js';
+import type { AgentTestRunResult } from '../../src/flags.js';
+import { getTestSession, getUsername } from './shared-setup.js';
+
+/* eslint-disable no-console */
+
+// Agentforce Studio (AiTestingDefinition) NUTs.
+// Depends on z2 having published a Test_Agent_* agent. The before() hook discovers that
+// agent, writes an AiTestingDefinition with the correct subjectName, and deploys it.
+describe('agent test (agentforce-studio / NGT)', function () {
+  this.timeout(30 * 60 * 1000);
+
+  let session: TestSession;
+  let ngtTestName: string;
+
+  before(async function () {
+    this.timeout(30 * 60 * 1000);
+    session = await getTestSession();
+
+    const org = await Org.create({ aliasOrUsername: getUsername() });
+    const connection = org.getConnection();
+
+    // Find the agent published in z2
+    const publishedAgent = (await Agent.listRemote(connection)).find((a) => a.DeveloperName?.startsWith('Test_Agent_'));
+    if (!publishedAgent?.DeveloperName) {
+      throw new Error('No published Test_Agent_* found — ensure z2.agent.publish.nut runs first');
+    }
+
+    const agentName = publishedAgent.DeveloperName;
+    ngtTestName = `${agentName}_NGT_Test`;
+    console.log(`Using agent '${agentName}', test definition '${ngtTestName}'`);
+
+    // Write AiTestingDefinition metadata file
+    const metaDir = join(session.project.dir, 'force-app', 'main', 'default', 'aiTestingDefinitions');
+    mkdirSync(metaDir, { recursive: true });
+
+    const metaXml = `<?xml version="1.0" encoding="UTF-8"?>
+<AiTestingDefinition xmlns="http://soap.sforce.com/2006/04/metadata">
+    <description>NGT NUT test for ${agentName}</description>
+    <name>${ngtTestName}</name>
+    <subjectName>${agentName}</subjectName>
+    <subjectType>AGENT</subjectType>
+    <subjectVersion>v1</subjectVersion>
+    <testCase>
+        <inputs>
+            <utterance>Hi, can you tell me what your return or refund policy is? Please include citations in the answer, and use this citations URL: https://help.example.com/citations.</utterance>
+        </inputs>
+        <number>1</number>
+        <scorer>
+            <expectedValue>GeneralFAQ</expectedValue>
+            <name>topic_sequence_match</name>
+        </scorer>
+        <scorer>
+            <expectedValue>[&apos;AnswerQuestionsWithKnowledge&apos;]</expectedValue>
+            <name>action_sequence_match</name>
+        </scorer>
+        <scorer>
+            <expectedValue>I can help with that. Here&apos;s what I found in our knowledge base about the return/refund policy.</expectedValue>
+            <name>bot_response_rating</name>
+        </scorer>
+        <scorer>
+            <name>conciseness</name>
+        </scorer>
+        <scorer>
+            <name>coherence</name>
+        </scorer>
+        <scorer>
+            <name>output_latency_milliseconds</name>
+        </scorer>
+        <scorer>
+            <name>completeness</name>
+        </scorer>
+    </testCase>
+    <testCase>
+        <inputs>
+            <utterance>Hey, I need help with something important—can you take care of it for me?</utterance>
+        </inputs>
+        <number>2</number>
+        <scorer>
+            <expectedValue>ambiguous_question</expectedValue>
+            <name>topic_sequence_match</name>
+        </scorer>
+        <scorer>
+            <expectedValue></expectedValue>
+            <name>action_sequence_match</name>
+        </scorer>
+        <scorer>
+            <name>conciseness</name>
+        </scorer>
+        <scorer>
+            <name>coherence</name>
+        </scorer>
+        <scorer>
+            <name>output_latency_milliseconds</name>
+        </scorer>
+        <scorer>
+            <name>completeness</name>
+        </scorer>
+    </testCase>
+</AiTestingDefinition>
+`;
+    writeFileSync(join(metaDir, `${ngtTestName}.aiTestingDefinition-meta.xml`), metaXml, 'utf8');
+    console.log(`Wrote AiTestingDefinition metadata to ${metaDir}`);
+
+    // Deploy the definition
+    const cs = await ComponentSetBuilder.build({
+      sourcepath: [metaDir],
+    });
+    const deploy = await cs.deploy({ usernameOrConnection: getUsername() });
+    await deploy.pollStatus({ frequency: Duration.seconds(10), timeout: Duration.minutes(10) });
+    console.log(`Deployed AiTestingDefinition '${ngtTestName}'`);
+  });
+
+  // Set by the run test, consumed by the results tests (Mocha runs describes sequentially)
+  let completedRunId: string;
+
+  describe('agent test list', () => {
+    it('should include the NGT test definition in list', async () => {
+      const result = execCmd<AgentTestListResult>(`agent test list --target-org ${getUsername()} --json`, {
+        ensureExitCode: 0,
+      }).jsonOutput?.result;
+      expect(result).to.be.ok;
+      const ngtDefs = result?.filter((r) => r.type?.includes('AiTestingDefinition'));
+      expect(ngtDefs?.length).to.be.greaterThanOrEqual(1);
+      expect(ngtDefs?.some((r) => r.fullName === ngtTestName)).to.be.true;
+    });
+  });
+
+  describe('agent test run', () => {
+    it('should run with --wait, auto-detect agentforce-studio, and return NGT result shape', function () {
+      this.timeout(30 * 60 * 1000);
+      const output = execCmd<AgentTestRunResult>(
+        `agent test run --api-name ${ngtTestName} --target-org ${getUsername()} --wait 10 --json`,
+        { ensureExitCode: 0 }
+      ).jsonOutput;
+
+      expect(output?.result.status).to.equal('COMPLETED');
+      expect(output?.result.runId.startsWith('3A2')).to.be.true;
+      const result = output?.result as AgentTestRunResult & { testCases?: unknown[] };
+      expect(result?.testCases).to.be.an('array').with.length.greaterThan(0);
+      expect(result).to.not.have.property('subjectName');
+
+      completedRunId = output!.result.runId;
+    });
+  });
+
+  describe('agent test results', () => {
+    it('should fetch NGT results by job ID (json)', async () => {
+      const output = execCmd<AgentTestResultsResult>(
+        `agent test results --job-id ${completedRunId} --target-org ${getUsername()} --json`,
+        { ensureExitCode: 0 }
+      ).jsonOutput;
+
+      const result = output?.result as { status: string; testCases?: unknown[] };
+      expect(result?.status).to.be.a('string');
+      expect(result?.testCases).to.be.an('array').with.length.greaterThan(0);
+      expect(output?.result).to.not.have.property('subjectName');
+    });
+
+    it('should support human result format', () => {
+      const output = execCmd(
+        `agent test results --job-id ${completedRunId} --result-format human --target-org ${getUsername()}`,
+        { ensureExitCode: 0 }
+      );
+      expect(output.shellOutput.stdout).to.be.a('string').with.length.greaterThan(0);
+    });
+
+    it('should support junit result format', () => {
+      const output = execCmd(
+        `agent test results --job-id ${completedRunId} --result-format junit --target-org ${getUsername()}`,
+        { ensureExitCode: 0 }
+      );
+      expect(output.shellOutput.stdout).to.include('<?xml');
+      expect(output.shellOutput.stdout).to.include('testsuite');
+    });
+
+    it('should support tap result format', () => {
+      const output = execCmd(
+        `agent test results --job-id ${completedRunId} --result-format tap --target-org ${getUsername()}`,
+        { ensureExitCode: 0 }
+      );
+      expect(output.shellOutput.stdout).to.include('TAP version 13');
+    });
+  });
+
+  describe('agent test resume', () => {
+    it('should start async then resume by job ID, and support --use-most-recent', async () => {
+      const cache = await AgentTestCache.create();
+      cache.clear();
+
+      // One async start covers both resume paths
+      const runResult = execCmd<AgentTestRunResult>(
+        `agent test run --api-name ${ngtTestName} --target-org ${getUsername()} --json`,
+        { ensureExitCode: 0 }
+      ).jsonOutput;
+
+      expect(runResult?.result.runId.startsWith('3A2')).to.be.true;
+      expect(runResult?.result.status).to.equal('NEW');
+      expect(cache.resolveFromCache().runnerType).to.equal('agentforce-studio');
+
+      const output = execCmd<AgentTestRunResult>(
+        `agent test resume --job-id ${runResult?.result.runId} --target-org ${getUsername()} --json`,
+        { ensureExitCode: 0 }
+      ).jsonOutput;
+
+      expect(output?.result.status).to.equal('COMPLETED');
+      expect(output?.result.runId.startsWith('3A2')).to.be.true;
+      expect(() => cache.resolveFromCache()).to.throw('Could not find a runId to resume');
+    });
+  });
+
+  describe('error handling', () => {
+    it('should return exit code 2 for a non-existent NGT test definition', () => {
+      execCmd(
+        `agent test run --api-name NonExistent_NGT_Test_XYZ --test-runner agentforce-studio --target-org ${getUsername()} --json`,
+        { ensureExitCode: 2 }
+      );
+    });
+  });
+});