From 75fb752a8276c7cd3c7ec3b1b6c1f5a04baf2fb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Thu, 4 Jun 2026 06:50:49 +0000 Subject: [PATCH 1/2] feat: add agent-mock package for deterministic E2E testing (#33) New package @united-workforce/agent-mock (uwf-mock CLI): - Reads pre-scripted outputs from a YAML mock data file (--mock-data) - Counts existing CAS chain steps to determine step index - Validates expected role matches actual moderator routing - Stores minimal detail node in CAS for valid step refs - Zero LLM, instant execution, 100% deterministic Usage in config.yaml: agents: mock: command: uwf-mock args: ["--mock-data", "./fixtures/scenario.yaml"] Refs #33 --- .../__tests__/fixtures/simple-scenario.yaml | 18 +++ .../agent-mock/__tests__/mock-agent.test.ts | 48 +++++++ packages/agent-mock/package.json | 47 +++++++ packages/agent-mock/src/cli.ts | 18 +++ packages/agent-mock/src/index.ts | 2 + packages/agent-mock/src/mock-agent.ts | 128 ++++++++++++++++++ packages/agent-mock/src/types.ts | 12 ++ packages/agent-mock/tsconfig.json | 9 ++ pnpm-lock.yaml | 22 +++ proman.yaml | 4 + tsconfig.json | 1 + 11 files changed, 309 insertions(+) create mode 100644 packages/agent-mock/__tests__/fixtures/simple-scenario.yaml create mode 100644 packages/agent-mock/__tests__/mock-agent.test.ts create mode 100644 packages/agent-mock/package.json create mode 100644 packages/agent-mock/src/cli.ts create mode 100644 packages/agent-mock/src/index.ts create mode 100644 packages/agent-mock/src/mock-agent.ts create mode 100644 packages/agent-mock/src/types.ts create mode 100644 packages/agent-mock/tsconfig.json diff --git a/packages/agent-mock/__tests__/fixtures/simple-scenario.yaml b/packages/agent-mock/__tests__/fixtures/simple-scenario.yaml new file mode 100644 index 0000000..58c452a --- /dev/null +++ b/packages/agent-mock/__tests__/fixtures/simple-scenario.yaml @@ -0,0 +1,18 @@ +steps: + - role: planner + output: | + --- + $status: ready + plan: test-plan-hash + repoPath: /tmp/test-repo + --- + Plan: implement the feature. + + - role: developer + output: | + --- + $status: done + branch: fix/1-test + worktree: /tmp/worktree + --- + Implemented the feature. diff --git a/packages/agent-mock/__tests__/mock-agent.test.ts b/packages/agent-mock/__tests__/mock-agent.test.ts new file mode 100644 index 0000000..2cafb2e --- /dev/null +++ b/packages/agent-mock/__tests__/mock-agent.test.ts @@ -0,0 +1,48 @@ +import { readFile } from "node:fs/promises"; +import { join } from "node:path"; +import { describe, expect, test } from "vitest"; + +import { parseScenario, selectMockStep } from "../src/mock-agent.js"; + +const FIXTURE = join(__dirname, "fixtures", "simple-scenario.yaml"); + +describe("parseScenario", () => { + test("parses the 2-step fixture in order", async () => { + const scenario = parseScenario(await readFile(FIXTURE, "utf8")); + expect(scenario.steps).toHaveLength(2); + expect(scenario.steps[0].role).toBe("planner"); + expect(scenario.steps[1].role).toBe("developer"); + expect(scenario.steps[0].output).toContain("$status: ready"); + expect(scenario.steps[1].output).toContain("branch: fix/1-test"); + }); + + test("rejects documents without a steps array", () => { + expect(() => parseScenario("foo: bar")).toThrow(/steps/); + }); + + test("rejects steps missing role or output", () => { + expect(() => parseScenario("steps:\n - role: planner")).toThrow(/role.*output/); + }); +}); + +describe("selectMockStep", () => { + const scenario = { + steps: [ + { role: "planner", output: "plan-output" }, + { role: "developer", output: "dev-output" }, + ], + }; + + test("step index counts existing steps to pick the current step", () => { + expect(selectMockStep(scenario, 0, "planner").output).toBe("plan-output"); + expect(selectMockStep(scenario, 1, "developer").output).toBe("dev-output"); + }); + + test("throws when the moderator routes to an unexpected role", () => { + expect(() => selectMockStep(scenario, 0, "developer")).toThrow(/expected role "planner"/); + }); + + test("throws when the step index runs past the scripted steps", () => { + expect(() => selectMockStep(scenario, 2, "planner")).toThrow(/no step at index 2/); + }); +}); diff --git a/packages/agent-mock/package.json b/packages/agent-mock/package.json new file mode 100644 index 0000000..62fe149 --- /dev/null +++ b/packages/agent-mock/package.json @@ -0,0 +1,47 @@ +{ + "name": "@united-workforce/agent-mock", + "version": "0.5.0", + "files": [ + "src", + "dist", + "package.json" + ], + "type": "module", + "bin": { + "uwf-mock": "./src/cli.ts" + }, + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "scripts": { + "prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1", + "test": "vitest run __tests__/", + "test:ci": "vitest run __tests__/" + }, + "dependencies": { + "@ocas/core": "^0.3.0", + "@united-workforce/protocol": "workspace:^", + "@united-workforce/util": "workspace:^", + "@united-workforce/util-agent": "workspace:^", + "yaml": "^2.9.0" + }, + "devDependencies": { + "typescript": "^5.8.3" + }, + "publishConfig": { + "access": "public" + }, + "repository": { + "type": "git", + "url": "https://git.shazhou.work/shazhou/united-workforce.git", + "directory": "packages/agent-mock" + }, + "homepage": "https://git.shazhou.work/shazhou/united-workforce#readme", + "bugs": { + "url": "https://git.shazhou.work/shazhou/united-workforce/issues" + }, + "license": "MIT" +} diff --git a/packages/agent-mock/src/cli.ts b/packages/agent-mock/src/cli.ts new file mode 100644 index 0000000..2676ac6 --- /dev/null +++ b/packages/agent-mock/src/cli.ts @@ -0,0 +1,18 @@ +#!/usr/bin/env node + +import { createMockAgent } from "./mock-agent.js"; + +const USAGE = "usage: uwf-mock --mock-data --thread --role --prompt "; + +function getMockDataPath(argv: string[]): string { + const idx = argv.indexOf("--mock-data"); + if (idx === -1 || idx + 1 >= argv.length || argv[idx + 1] === "") { + process.stderr.write(`--mock-data is required. ${USAGE}\n`); + process.exit(1); + } + return argv[idx + 1]; +} + +const mockDataPath = getMockDataPath(process.argv); +const main = createMockAgent(mockDataPath); +void main(); diff --git a/packages/agent-mock/src/index.ts b/packages/agent-mock/src/index.ts new file mode 100644 index 0000000..4d36d72 --- /dev/null +++ b/packages/agent-mock/src/index.ts @@ -0,0 +1,2 @@ +export { createMockAgent, parseScenario, selectMockStep } from "./mock-agent.js"; +export type { MockScenario, MockStep } from "./types.js"; diff --git a/packages/agent-mock/src/mock-agent.ts b/packages/agent-mock/src/mock-agent.ts new file mode 100644 index 0000000..1044232 --- /dev/null +++ b/packages/agent-mock/src/mock-agent.ts @@ -0,0 +1,128 @@ +import { readFile } from "node:fs/promises"; + +import { bootstrap, type JSONSchema, putSchema, type Store } from "@ocas/core"; +import { createLogger } from "@united-workforce/util"; +import { type AgentContext, type AgentRunResult, createAgent } from "@united-workforce/util-agent"; +import { parse } from "yaml"; + +import type { MockScenario, MockStep } from "./types.js"; + +const log = createLogger({ sink: { kind: "stderr" } }); + +const MOCK_DETAIL_SCHEMA: JSONSchema = { + title: "mock-detail", + type: "object", + required: ["sessionId", "role", "stepIndex"], + properties: { + sessionId: { type: "string" }, + role: { type: "string" }, + stepIndex: { type: "integer" }, + }, + additionalProperties: false, +}; + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +/** Parse a YAML mock data document into a {@link MockScenario}. Pure — no I/O. */ +export function parseScenario(text: string): MockScenario { + const raw = parse(text) as unknown; + if (!isRecord(raw) || !Array.isArray(raw.steps)) { + throw new Error("mock data must be a mapping with a 'steps' array"); + } + const steps: MockStep[] = raw.steps.map((entry, i) => { + if (!isRecord(entry) || typeof entry.role !== "string" || typeof entry.output !== "string") { + throw new Error(`mock step ${i} must have string 'role' and string 'output'`); + } + return { role: entry.role, output: entry.output }; + }); + return { steps }; +} + +async function loadScenario(path: string): Promise { + const text = await readFile(path, "utf8"); + return parseScenario(text); +} + +/** + * Pick the scripted step for the given index and verify the moderator routed to + * the expected role. Throws on out-of-range index or role mismatch so routing + * bugs surface loudly during E2E runs. + */ +export function selectMockStep(scenario: MockScenario, stepIndex: number, role: string): MockStep { + const step = scenario.steps[stepIndex]; + if (step === undefined) { + throw new Error( + `mock scenario has no step at index ${stepIndex} (total ${scenario.steps.length}); ` + + `moderator routed to role "${role}"`, + ); + } + if (step.role !== role) { + throw new Error( + `mock step ${stepIndex} expected role "${step.role}" but moderator routed to "${role}"`, + ); + } + return step; +} + +/** Persist a minimal detail node so the step node has a valid CAS ref. */ +async function storeMockDetail( + store: Store, + sessionId: string, + role: string, + stepIndex: number, +): Promise { + await bootstrap(store); + const schemaHash = await putSchema(store, MOCK_DETAIL_SCHEMA); + return store.cas.put(schemaHash, { sessionId, role, stepIndex }); +} + +/** + * Agent CLI factory: a deterministic, LLM-free agent that replays pre-scripted + * outputs from a YAML mock data file. The step index is derived by counting the + * existing steps in the thread's CAS chain (exposed via `ctx.steps`). + */ +export function createMockAgent(mockDataPath: string): () => Promise { + let lastResult: AgentRunResult | null = null; + + async function run(ctx: AgentContext): Promise { + const scenario = await loadScenario(mockDataPath); + const stepIndex = ctx.steps.length; + log( + "MK7X2QPV", + `mock step ${stepIndex} for role "${ctx.role}" (${scenario.steps.length} scripted)`, + ); + + const step = selectMockStep(scenario, stepIndex, ctx.role); + const sessionId = `mock-${stepIndex}`; + const detailHash = await storeMockDetail(ctx.store, sessionId, ctx.role, stepIndex); + + const result: AgentRunResult = { + output: step.output, + detailHash, + sessionId, + assembledPrompt: "", + }; + lastResult = result; + return result; + } + + async function continueRun( + sessionId: string, + _message: string, + _store: Store, + ): Promise { + if (lastResult === null) { + throw new Error("mock continue called before run"); + } + log("MK3N8RTW", `mock continue for session ${sessionId}, replaying scripted output`); + return lastResult; + } + + return createAgent({ + name: "mock", + run, + continue: continueRun, + }); +} diff --git a/packages/agent-mock/src/types.ts b/packages/agent-mock/src/types.ts new file mode 100644 index 0000000..63e83cf --- /dev/null +++ b/packages/agent-mock/src/types.ts @@ -0,0 +1,12 @@ +/** One pre-scripted step in a mock scenario. */ +export type MockStep = { + /** Role this step is expected to run as. Validated against the actual `--role` argument. */ + role: string; + /** Frontmatter markdown output the mock agent emits for this step. */ + output: string; +}; + +/** Deterministic, pre-scripted agent script loaded from a YAML mock data file. */ +export type MockScenario = { + steps: MockStep[]; +}; diff --git a/packages/agent-mock/tsconfig.json b/packages/agent-mock/tsconfig.json new file mode 100644 index 0000000..7b7fa2a --- /dev/null +++ b/packages/agent-mock/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "rootDir": "src", + "outDir": "dist" + }, + "include": ["src"], + "references": [{ "path": "../util-agent" }, { "path": "../util" }, { "path": "../protocol" }] +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0bbdfce..1aa12c6 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -93,6 +93,28 @@ importers: specifier: ^5.8.3 version: 5.9.3 + packages/agent-mock: + dependencies: + '@ocas/core': + specifier: ^0.3.0 + version: 0.3.0 + '@united-workforce/protocol': + specifier: workspace:^ + version: link:../protocol + '@united-workforce/util': + specifier: workspace:^ + version: link:../util + '@united-workforce/util-agent': + specifier: workspace:^ + version: link:../util-agent + yaml: + specifier: ^2.9.0 + version: 2.9.0 + devDependencies: + typescript: + specifier: ^5.8.3 + version: 5.9.3 + packages/cli: dependencies: '@ocas/core': diff --git a/proman.yaml b/proman.yaml index 551f753..a0cc8e5 100644 --- a/proman.yaml +++ b/proman.yaml @@ -23,6 +23,10 @@ packages: path: packages/agent-builtin type: cli + - name: "@united-workforce/agent-mock" + path: packages/agent-mock + type: cli + - name: "@united-workforce/cli" path: packages/cli type: cli diff --git a/tsconfig.json b/tsconfig.json index 1547e16..76e1129 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -23,6 +23,7 @@ { "path": "packages/util-agent" }, { "path": "packages/agent-hermes" }, { "path": "packages/agent-builtin" }, + { "path": "packages/agent-mock" }, { "path": "packages/agent-claude-code" }, { "path": "packages/cli" } ] -- 2.43.0 From 80e8efb05e9457585274d414e97b8f6184f6432c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Thu, 4 Jun 2026 07:44:48 +0000 Subject: [PATCH 2/2] test: E2E integration tests with uwf-mock agent (#33) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three scenarios testing the full CLI pipeline: 1. Linear workflow (planner → worker → $END): CAS chain integrity 2. Loop workflow (developer ↔ reviewer): moderator routing through cycles 3. Role mismatch detection: agent catches routing bugs Uses workflow add → thread start → thread exec with uwf-mock, verifying CAS state, thread lifecycle, and error handling. Updated assertions to use getThread().status === 'completed' (aligned with PR #45 unified thread storage). Refs #33 --- .../cli/src/__tests__/e2e-mock-agent.test.ts | 296 ++++++++++++++++++ .../__tests__/fixtures/e2e-linear.mock.yaml | 13 + .../fixtures/e2e-linear.workflow.yaml | 32 ++ .../src/__tests__/fixtures/e2e-loop.mock.yaml | 25 ++ .../__tests__/fixtures/e2e-loop.workflow.yaml | 36 +++ .../__tests__/fixtures/e2e-mismatch.mock.yaml | 16 + 6 files changed, 418 insertions(+) create mode 100644 packages/cli/src/__tests__/e2e-mock-agent.test.ts create mode 100644 packages/cli/src/__tests__/fixtures/e2e-linear.mock.yaml create mode 100644 packages/cli/src/__tests__/fixtures/e2e-linear.workflow.yaml create mode 100644 packages/cli/src/__tests__/fixtures/e2e-loop.mock.yaml create mode 100644 packages/cli/src/__tests__/fixtures/e2e-loop.workflow.yaml create mode 100644 packages/cli/src/__tests__/fixtures/e2e-mismatch.mock.yaml diff --git a/packages/cli/src/__tests__/e2e-mock-agent.test.ts b/packages/cli/src/__tests__/e2e-mock-agent.test.ts new file mode 100644 index 0000000..e71e488 --- /dev/null +++ b/packages/cli/src/__tests__/e2e-mock-agent.test.ts @@ -0,0 +1,296 @@ +import { execFileSync } from "node:child_process"; +import { existsSync } from "node:fs"; +import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { openStore } from "@ocas/fs"; +import type { CasRef, StartNodePayload, StepNodePayload } from "@united-workforce/protocol"; +import { afterEach, beforeAll, beforeEach, describe, expect, test } from "vitest"; +import { stringify } from "yaml"; +import { cmdThreadStart } from "../commands/thread.js"; +import { cmdWorkflowAdd } from "../commands/workflow.js"; +import { createUwfStore, getThread } from "../store.js"; + +// ── paths ────────────────────────────────────────────────────────────────── + +const TEST_DIR = dirname(fileURLToPath(import.meta.url)); +const FIXTURES_DIR = join(TEST_DIR, "fixtures"); +const CLI_PATH = join(TEST_DIR, "..", "..", "dist", "cli.js"); +const REPO_ROOT = join(TEST_DIR, "..", "..", "..", ".."); +const AGENT_MOCK_DIR = join(REPO_ROOT, "packages", "agent-mock"); +const AGENT_MOCK_CLI = join(AGENT_MOCK_DIR, "dist", "cli.js"); + +// ── shared fixture state ───────────────────────────────────────────────────── + +let tmpDir: string; +let uwfHome: string; +let casDir: string; +let savedEnv: { uwf: string | undefined; ocas: string | undefined }; + +/** + * The mock agent runs from its built `dist/cli.js`. When the test suite runs + * standalone (no prior `pnpm run build`), build it on demand so the E2E run is + * self-contained. + */ +beforeAll(() => { + if (existsSync(AGENT_MOCK_CLI)) { + return; + } + execFileSync( + process.execPath, + [ + join(REPO_ROOT, "node_modules", "typescript", "bin", "tsc"), + "--build", + "--force", + AGENT_MOCK_DIR, + ], + { cwd: REPO_ROOT, stdio: "ignore" }, + ); +}, 120000); + +beforeEach(async () => { + tmpDir = await mkdtemp(join(tmpdir(), "cli-e2e-mock-")); + uwfHome = join(tmpDir, "uwf"); + casDir = join(tmpDir, "ocas"); + await mkdir(uwfHome, { recursive: true }); + await mkdir(casDir, { recursive: true }); + // Programmatic CLI APIs (cmdWorkflowAdd, cmdThreadStart) read the global CAS + // directory from OCAS_HOME and the storage root from UWF_HOME. + savedEnv = { uwf: process.env.UWF_HOME, ocas: process.env.OCAS_HOME }; + process.env.UWF_HOME = uwfHome; + process.env.OCAS_HOME = casDir; +}); + +afterEach(async () => { + process.env.UWF_HOME = savedEnv.uwf; + process.env.OCAS_HOME = savedEnv.ocas; + await rm(tmpDir, { recursive: true, force: true }); +}); + +// ── helpers ────────────────────────────────────────────────────────────────── + +/** + * Write a `config.yaml` into UWF_HOME that wires the default agent to the mock + * agent. The mock data path is baked into the agent args so the CLI's + * `thread exec` (without an `--agent` override) resolves it from config. + */ +async function writeMockConfig(mockDataFixture: string): Promise { + const config = { + defaultAgent: "mock", + defaultModel: "test", + providers: {}, + models: {}, + agentOverrides: null, + agents: { + mock: { + command: process.execPath, + args: [AGENT_MOCK_CLI, "--mock-data", join(FIXTURES_DIR, mockDataFixture)], + }, + }, + }; + await writeFile(join(uwfHome, "config.yaml"), stringify(config)); +} + +/** + * `cmdWorkflowAdd` enforces filename↔name consistency, so copy the fixture into + * UWF_HOME under `.yaml` before registering it. + */ +async function addWorkflow(workflowFixture: string, workflowName: string): Promise { + const text = await readFile(join(FIXTURES_DIR, workflowFixture), "utf8"); + const filePath = join(uwfHome, `${workflowName}.yaml`); + await writeFile(filePath, text); + const result = await cmdWorkflowAdd(uwfHome, filePath); + return result.hash; +} + +type ExecResult = { stdout: string; stderr: string; exitCode: number }; + +function runExec(threadId: string): ExecResult { + try { + const stdout = execFileSync(process.execPath, [CLI_PATH, "thread", "exec", threadId], { + encoding: "utf8", + stdio: ["ignore", "pipe", "pipe"], + env: { ...process.env, UWF_HOME: uwfHome, OCAS_HOME: casDir }, + cwd: tmpDir, + timeout: 30000, + }); + return { stdout, stderr: "", exitCode: 0 }; + } catch (e: unknown) { + const err = e as NodeJS.ErrnoException & { + stdout?: string; + stderr?: string; + status?: number; + }; + return { stdout: err.stdout ?? "", stderr: err.stderr ?? "", exitCode: err.status ?? 1 }; + } +} + +type StepOutputJson = { + thread: string; + head: string; + status: string; + currentRole: string | null; + done: boolean; +}; + +function execStep(threadId: string): StepOutputJson { + const { stdout, stderr, exitCode } = runExec(threadId); + if (exitCode !== 0) { + throw new Error(`thread exec failed (code ${exitCode})\nstdout: ${stdout}\nstderr: ${stderr}`); + } + return JSON.parse(stdout.trim()) as StepOutputJson; +} + +function getStepNode(store: Awaited>, hash: string): StepNodePayload { + const node = store.cas.get(hash as CasRef); + expect(node).not.toBeNull(); + return node!.payload as StepNodePayload; +} + +function getStatus(store: Awaited>, outputRef: CasRef): unknown { + const node = store.cas.get(outputRef); + expect(node).not.toBeNull(); + return (node!.payload as Record).$status; +} + +// ── scenarios ───────────────────────────────────────────────────────────────── + +describe("E2E mock-agent: full uwf pipeline", () => { + test("1. linear workflow runs planner then worker and reaches $END", async () => { + await writeMockConfig("e2e-linear.mock.yaml"); + const workflowHash = await addWorkflow("e2e-linear.workflow.yaml", "test-linear"); + + const start = await cmdThreadStart(uwfHome, workflowHash, "Build the thing", uwfHome, tmpDir); + const threadId = start.thread; + + // Capture the start node hash (thread head before any step). + const startHash = getThread((await createUwfStore(uwfHome)).varStore, threadId)?.head; + expect(startHash).toBeDefined(); + + // Step 1 → planner. + const step1 = execStep(threadId); + expect(step1.thread).toBe(threadId); + expect(step1.done).toBe(false); + expect(step1.status).toBe("idle"); + expect(step1.currentRole).toBe("worker"); + + // Step 2 → worker → $END (thread archived to history). + const step2 = execStep(threadId); + expect(step2.done).toBe(true); + expect(step2.status).toBe("completed"); + expect(step2.currentRole).toBeNull(); + + // Verify CAS chain integrity: start → step1 → step2. + const store = await openStore(casDir); + const s1 = getStepNode(store, step1.head); + const s2 = getStepNode(store, step2.head); + + expect(s1.role).toBe("planner"); + expect(s1.prev).toBeNull(); + expect(s1.start).toBe(startHash); + + expect(s2.role).toBe("worker"); + expect(s2.prev).toBe(step1.head); + expect(s2.start).toBe(s1.start); + + // Output frontmatter statuses persisted correctly. + expect(getStatus(store, s1.output)).toBe("ready"); + expect(getStatus(store, s2.output)).toBe("done"); + + // The start node points at the registered workflow. + const startNode = store.cas.get(startHash as CasRef); + expect((startNode!.payload as StartNodePayload).workflow).toBe(workflowHash); + + // Thread is completed: status changed to "completed", head updated. + const uwf = await createUwfStore(uwfHome); + const finalEntry = getThread(uwf.varStore, threadId); + expect(finalEntry).not.toBeNull(); + expect(finalEntry!.status).toBe("completed"); + expect(finalEntry!.head).toBe(step2.head); + }); + + test("2. branching workflow loops developer→reviewer→developer→reviewer→$END", async () => { + await writeMockConfig("e2e-loop.mock.yaml"); + const workflowHash = await addWorkflow("e2e-loop.workflow.yaml", "test-loop"); + + const start = await cmdThreadStart(uwfHome, workflowHash, "Implement feature", uwfHome, tmpDir); + const threadId = start.thread; + + // 4 steps: developer, reviewer (rejected → loop), developer, reviewer (approved → $END). + const s1 = execStep(threadId); + expect(s1.status).toBe("idle"); + expect(s1.currentRole).toBe("reviewer"); + + const s2 = execStep(threadId); + expect(s2.status).toBe("idle"); + // reviewer rejected → loops back to developer. + expect(s2.currentRole).toBe("developer"); + + const s3 = execStep(threadId); + expect(s3.status).toBe("idle"); + expect(s3.currentRole).toBe("reviewer"); + + const s4 = execStep(threadId); + expect(s4.done).toBe(true); + expect(s4.status).toBe("completed"); + + // Verify the chain order and roles. + const store = await openStore(casDir); + const n1 = getStepNode(store, s1.head); + const n2 = getStepNode(store, s2.head); + const n3 = getStepNode(store, s3.head); + const n4 = getStepNode(store, s4.head); + + expect([n1.role, n2.role, n3.role, n4.role]).toEqual([ + "developer", + "reviewer", + "developer", + "reviewer", + ]); + expect(n1.prev).toBeNull(); + expect(n2.prev).toBe(s1.head); + expect(n3.prev).toBe(s2.head); + expect(n4.prev).toBe(s3.head); + + // All steps share the same start node. + expect(new Set([n1.start, n2.start, n3.start, n4.start]).size).toBe(1); + + // Statuses drove the loop routing. + expect(getStatus(store, n1.output)).toBe("review_needed"); + expect(getStatus(store, n2.output)).toBe("rejected"); + expect(getStatus(store, n3.output)).toBe("review_needed"); + expect(getStatus(store, n4.output)).toBe("approved"); + + const uwf = await createUwfStore(uwfHome); + const finalEntry = getThread(uwf.varStore, threadId); + expect(finalEntry).not.toBeNull(); + expect(finalEntry!.status).toBe("completed"); + }); + + test("3. role mismatch in mock data makes the agent exit with an error", async () => { + // Reuses the linear workflow but with a mock whose step[1].role is wrong. + await writeMockConfig("e2e-mismatch.mock.yaml"); + const workflowHash = await addWorkflow("e2e-linear.workflow.yaml", "test-linear"); + + const start = await cmdThreadStart(uwfHome, workflowHash, "Build the thing", uwfHome, tmpDir); + const threadId = start.thread; + + // Step 1 (planner) matches and succeeds. + const step1 = execStep(threadId); + expect(step1.status).toBe("idle"); + expect(step1.currentRole).toBe("worker"); + + // Step 2: moderator routes to "worker" but mock step[1].role is "planner". + const result = runExec(threadId); + expect(result.exitCode).not.toBe(0); + expect(`${result.stdout}\n${result.stderr}`).toMatch(/expected role "planner"/); + + // The thread remains active (no step node was written for the failed step). + const uwf = await createUwfStore(uwfHome); + const entry = getThread(uwf.varStore, threadId); + expect(entry).not.toBeNull(); + expect(entry!.status).not.toBe("completed"); + expect(entry!.head).toBe(step1.head); + }); +}); diff --git a/packages/cli/src/__tests__/fixtures/e2e-linear.mock.yaml b/packages/cli/src/__tests__/fixtures/e2e-linear.mock.yaml new file mode 100644 index 0000000..66a5135 --- /dev/null +++ b/packages/cli/src/__tests__/fixtures/e2e-linear.mock.yaml @@ -0,0 +1,13 @@ +steps: + - role: planner + output: | + --- + $status: ready + --- + Planning complete. + - role: worker + output: | + --- + $status: done + --- + Work complete. diff --git a/packages/cli/src/__tests__/fixtures/e2e-linear.workflow.yaml b/packages/cli/src/__tests__/fixtures/e2e-linear.workflow.yaml new file mode 100644 index 0000000..9a4a638 --- /dev/null +++ b/packages/cli/src/__tests__/fixtures/e2e-linear.workflow.yaml @@ -0,0 +1,32 @@ +name: test-linear +description: Simple 2-step linear test (planner -> worker -> $END) +roles: + planner: + description: Plans work + goal: Plan the task + capabilities: [] + procedure: Plan it + output: Output a plan and set $status to ready + frontmatter: + oneOf: + - properties: + $status: { const: ready } + required: [$status] + worker: + description: Does work + goal: Do the work + capabilities: [] + procedure: Do it + output: Output the result and set $status to done + frontmatter: + oneOf: + - properties: + $status: { const: done } + required: [$status] +graph: + $START: + _: { role: planner, prompt: 'Plan the task' } + planner: + ready: { role: worker, prompt: 'Do the work' } + worker: + done: { role: '$END', prompt: 'Done' } diff --git a/packages/cli/src/__tests__/fixtures/e2e-loop.mock.yaml b/packages/cli/src/__tests__/fixtures/e2e-loop.mock.yaml new file mode 100644 index 0000000..e2fa37d --- /dev/null +++ b/packages/cli/src/__tests__/fixtures/e2e-loop.mock.yaml @@ -0,0 +1,25 @@ +steps: + - role: developer + output: | + --- + $status: review_needed + --- + First implementation. + - role: reviewer + output: | + --- + $status: rejected + --- + Needs changes, sending back. + - role: developer + output: | + --- + $status: review_needed + --- + Second implementation addressing feedback. + - role: reviewer + output: | + --- + $status: approved + --- + Looks good, approved. diff --git a/packages/cli/src/__tests__/fixtures/e2e-loop.workflow.yaml b/packages/cli/src/__tests__/fixtures/e2e-loop.workflow.yaml new file mode 100644 index 0000000..604452a --- /dev/null +++ b/packages/cli/src/__tests__/fixtures/e2e-loop.workflow.yaml @@ -0,0 +1,36 @@ +name: test-loop +description: Branching test where the reviewer can reject and loop back to the developer +roles: + developer: + description: Implements changes + goal: Implement the change + capabilities: [] + procedure: Write code + output: Summarize the change and set $status to review_needed + frontmatter: + oneOf: + - properties: + $status: { const: review_needed } + required: [$status] + reviewer: + description: Reviews changes + goal: Review the change + capabilities: [] + procedure: Review code + output: Approve or reject; set $status to approved or rejected + frontmatter: + oneOf: + - properties: + $status: { const: rejected } + required: [$status] + - properties: + $status: { const: approved } + required: [$status] +graph: + $START: + _: { role: developer, prompt: 'Implement the change' } + developer: + review_needed: { role: reviewer, prompt: 'Review the change' } + reviewer: + rejected: { role: developer, prompt: 'Fix the issues and resubmit' } + approved: { role: '$END', prompt: 'Approved, done' } diff --git a/packages/cli/src/__tests__/fixtures/e2e-mismatch.mock.yaml b/packages/cli/src/__tests__/fixtures/e2e-mismatch.mock.yaml new file mode 100644 index 0000000..d397d90 --- /dev/null +++ b/packages/cli/src/__tests__/fixtures/e2e-mismatch.mock.yaml @@ -0,0 +1,16 @@ +# Reuses the test-linear workflow. The moderator routes step 0 -> planner and +# step 1 -> worker, but step[1].role below is "planner", so the mock agent must +# detect the role mismatch on the second step and exit with an error. +steps: + - role: planner + output: | + --- + $status: ready + --- + Planning complete. + - role: planner + output: | + --- + $status: done + --- + This step claims to be planner, but the moderator routes to worker. -- 2.43.0