From 75fb752a8276c7cd3c7ec3b1b6c1f5a04baf2fb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= <xiaoju@shazhou.work>
Date: Thu, 4 Jun 2026 06:50:49 +0000
Subject: [PATCH 1/2] feat: add agent-mock package for deterministic E2E
 testing (#33)

New package @united-workforce/agent-mock (uwf-mock CLI):
- Reads pre-scripted outputs from a YAML mock data file (--mock-data)
- Counts existing CAS chain steps to determine step index
- Validates expected role matches actual moderator routing
- Stores minimal detail node in CAS for valid step refs
- Zero LLM, instant execution, 100% deterministic

Usage in config.yaml:
  agents:
    mock:
      command: uwf-mock
      args: ["--mock-data", "./fixtures/scenario.yaml"]

Refs #33
---
 .../__tests__/fixtures/simple-scenario.yaml   |  18 +++
 .../agent-mock/__tests__/mock-agent.test.ts   |  48 +++++++
 packages/agent-mock/package.json              |  47 +++++++
 packages/agent-mock/src/cli.ts                |  18 +++
 packages/agent-mock/src/index.ts              |   2 +
 packages/agent-mock/src/mock-agent.ts         | 128 ++++++++++++++++++
 packages/agent-mock/src/types.ts              |  12 ++
 packages/agent-mock/tsconfig.json             |   9 ++
 pnpm-lock.yaml                                |  22 +++
 proman.yaml                                   |   4 +
 tsconfig.json                                 |   1 +
 11 files changed, 309 insertions(+)
 create mode 100644 packages/agent-mock/__tests__/fixtures/simple-scenario.yaml
 create mode 100644 packages/agent-mock/__tests__/mock-agent.test.ts
 create mode 100644 packages/agent-mock/package.json
 create mode 100644 packages/agent-mock/src/cli.ts
 create mode 100644 packages/agent-mock/src/index.ts
 create mode 100644 packages/agent-mock/src/mock-agent.ts
 create mode 100644 packages/agent-mock/src/types.ts
 create mode 100644 packages/agent-mock/tsconfig.json

diff --git a/packages/agent-mock/__tests__/fixtures/simple-scenario.yaml b/packages/agent-mock/__tests__/fixtures/simple-scenario.yaml
new file mode 100644
index 0000000..58c452a
--- /dev/null
+++ b/packages/agent-mock/__tests__/fixtures/simple-scenario.yaml
@@ -0,0 +1,18 @@
+steps:
+  - role: planner
+    output: |
+      ---
+      $status: ready
+      plan: test-plan-hash
+      repoPath: /tmp/test-repo
+      ---
+      Plan: implement the feature.
+
+  - role: developer
+    output: |
+      ---
+      $status: done
+      branch: fix/1-test
+      worktree: /tmp/worktree
+      ---
+      Implemented the feature.
diff --git a/packages/agent-mock/__tests__/mock-agent.test.ts b/packages/agent-mock/__tests__/mock-agent.test.ts
new file mode 100644
index 0000000..2cafb2e
--- /dev/null
+++ b/packages/agent-mock/__tests__/mock-agent.test.ts
@@ -0,0 +1,48 @@
+import { readFile } from "node:fs/promises";
+import { join } from "node:path";
+import { describe, expect, test } from "vitest";
+
+import { parseScenario, selectMockStep } from "../src/mock-agent.js";
+
+const FIXTURE = join(__dirname, "fixtures", "simple-scenario.yaml");
+
+describe("parseScenario", () => {
+  test("parses the 2-step fixture in order", async () => {
+    const scenario = parseScenario(await readFile(FIXTURE, "utf8"));
+    expect(scenario.steps).toHaveLength(2);
+    expect(scenario.steps[0].role).toBe("planner");
+    expect(scenario.steps[1].role).toBe("developer");
+    expect(scenario.steps[0].output).toContain("$status: ready");
+    expect(scenario.steps[1].output).toContain("branch: fix/1-test");
+  });
+
+  test("rejects documents without a steps array", () => {
+    expect(() => parseScenario("foo: bar")).toThrow(/steps/);
+  });
+
+  test("rejects steps missing role or output", () => {
+    expect(() => parseScenario("steps:\n  - role: planner")).toThrow(/role.*output/);
+  });
+});
+
+describe("selectMockStep", () => {
+  const scenario = {
+    steps: [
+      { role: "planner", output: "plan-output" },
+      { role: "developer", output: "dev-output" },
+    ],
+  };
+
+  test("step index counts existing steps to pick the current step", () => {
+    expect(selectMockStep(scenario, 0, "planner").output).toBe("plan-output");
+    expect(selectMockStep(scenario, 1, "developer").output).toBe("dev-output");
+  });
+
+  test("throws when the moderator routes to an unexpected role", () => {
+    expect(() => selectMockStep(scenario, 0, "developer")).toThrow(/expected role "planner"/);
+  });
+
+  test("throws when the step index runs past the scripted steps", () => {
+    expect(() => selectMockStep(scenario, 2, "planner")).toThrow(/no step at index 2/);
+  });
+});
diff --git a/packages/agent-mock/package.json b/packages/agent-mock/package.json
new file mode 100644
index 0000000..62fe149
--- /dev/null
+++ b/packages/agent-mock/package.json
@@ -0,0 +1,47 @@
+{
+  "name": "@united-workforce/agent-mock",
+  "version": "0.5.0",
+  "files": [
+    "src",
+    "dist",
+    "package.json"
+  ],
+  "type": "module",
+  "bin": {
+    "uwf-mock": "./src/cli.ts"
+  },
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js"
+    }
+  },
+  "scripts": {
+    "prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1",
+    "test": "vitest run __tests__/",
+    "test:ci": "vitest run __tests__/"
+  },
+  "dependencies": {
+    "@ocas/core": "^0.3.0",
+    "@united-workforce/protocol": "workspace:^",
+    "@united-workforce/util": "workspace:^",
+    "@united-workforce/util-agent": "workspace:^",
+    "yaml": "^2.9.0"
+  },
+  "devDependencies": {
+    "typescript": "^5.8.3"
+  },
+  "publishConfig": {
+    "access": "public"
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://git.shazhou.work/shazhou/united-workforce.git",
+    "directory": "packages/agent-mock"
+  },
+  "homepage": "https://git.shazhou.work/shazhou/united-workforce#readme",
+  "bugs": {
+    "url": "https://git.shazhou.work/shazhou/united-workforce/issues"
+  },
+  "license": "MIT"
+}
diff --git a/packages/agent-mock/src/cli.ts b/packages/agent-mock/src/cli.ts
new file mode 100644
index 0000000..2676ac6
--- /dev/null
+++ b/packages/agent-mock/src/cli.ts
@@ -0,0 +1,18 @@
+#!/usr/bin/env node
+
+import { createMockAgent } from "./mock-agent.js";
+
+const USAGE = "usage: uwf-mock --mock-data <path> --thread <id> --role <role> --prompt <text>";
+
+function getMockDataPath(argv: string[]): string {
+  const idx = argv.indexOf("--mock-data");
+  if (idx === -1 || idx + 1 >= argv.length || argv[idx + 1] === "") {
+    process.stderr.write(`--mock-data is required. ${USAGE}\n`);
+    process.exit(1);
+  }
+  return argv[idx + 1];
+}
+
+const mockDataPath = getMockDataPath(process.argv);
+const main = createMockAgent(mockDataPath);
+void main();
diff --git a/packages/agent-mock/src/index.ts b/packages/agent-mock/src/index.ts
new file mode 100644
index 0000000..4d36d72
--- /dev/null
+++ b/packages/agent-mock/src/index.ts
@@ -0,0 +1,2 @@
+export { createMockAgent, parseScenario, selectMockStep } from "./mock-agent.js";
+export type { MockScenario, MockStep } from "./types.js";
diff --git a/packages/agent-mock/src/mock-agent.ts b/packages/agent-mock/src/mock-agent.ts
new file mode 100644
index 0000000..1044232
--- /dev/null
+++ b/packages/agent-mock/src/mock-agent.ts
@@ -0,0 +1,128 @@
+import { readFile } from "node:fs/promises";
+
+import { bootstrap, type JSONSchema, putSchema, type Store } from "@ocas/core";
+import { createLogger } from "@united-workforce/util";
+import { type AgentContext, type AgentRunResult, createAgent } from "@united-workforce/util-agent";
+import { parse } from "yaml";
+
+import type { MockScenario, MockStep } from "./types.js";
+
+const log = createLogger({ sink: { kind: "stderr" } });
+
+const MOCK_DETAIL_SCHEMA: JSONSchema = {
+  title: "mock-detail",
+  type: "object",
+  required: ["sessionId", "role", "stepIndex"],
+  properties: {
+    sessionId: { type: "string" },
+    role: { type: "string" },
+    stepIndex: { type: "integer" },
+  },
+  additionalProperties: false,
+};
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+/** Parse a YAML mock data document into a {@link MockScenario}. Pure — no I/O. */
+export function parseScenario(text: string): MockScenario {
+  const raw = parse(text) as unknown;
+  if (!isRecord(raw) || !Array.isArray(raw.steps)) {
+    throw new Error("mock data must be a mapping with a 'steps' array");
+  }
+  const steps: MockStep[] = raw.steps.map((entry, i) => {
+    if (!isRecord(entry) || typeof entry.role !== "string" || typeof entry.output !== "string") {
+      throw new Error(`mock step ${i} must have string 'role' and string 'output'`);
+    }
+    return { role: entry.role, output: entry.output };
+  });
+  return { steps };
+}
+
+async function loadScenario(path: string): Promise<MockScenario> {
+  const text = await readFile(path, "utf8");
+  return parseScenario(text);
+}
+
+/**
+ * Pick the scripted step for the given index and verify the moderator routed to
+ * the expected role. Throws on out-of-range index or role mismatch so routing
+ * bugs surface loudly during E2E runs.
+ */
+export function selectMockStep(scenario: MockScenario, stepIndex: number, role: string): MockStep {
+  const step = scenario.steps[stepIndex];
+  if (step === undefined) {
+    throw new Error(
+      `mock scenario has no step at index ${stepIndex} (total ${scenario.steps.length}); ` +
+        `moderator routed to role "${role}"`,
+    );
+  }
+  if (step.role !== role) {
+    throw new Error(
+      `mock step ${stepIndex} expected role "${step.role}" but moderator routed to "${role}"`,
+    );
+  }
+  return step;
+}
+
+/** Persist a minimal detail node so the step node has a valid CAS ref. */
+async function storeMockDetail(
+  store: Store,
+  sessionId: string,
+  role: string,
+  stepIndex: number,
+): Promise<string> {
+  await bootstrap(store);
+  const schemaHash = await putSchema(store, MOCK_DETAIL_SCHEMA);
+  return store.cas.put(schemaHash, { sessionId, role, stepIndex });
+}
+
+/**
+ * Agent CLI factory: a deterministic, LLM-free agent that replays pre-scripted
+ * outputs from a YAML mock data file. The step index is derived by counting the
+ * existing steps in the thread's CAS chain (exposed via `ctx.steps`).
+ */
+export function createMockAgent(mockDataPath: string): () => Promise<void> {
+  let lastResult: AgentRunResult | null = null;
+
+  async function run(ctx: AgentContext): Promise<AgentRunResult> {
+    const scenario = await loadScenario(mockDataPath);
+    const stepIndex = ctx.steps.length;
+    log(
+      "MK7X2QPV",
+      `mock step ${stepIndex} for role "${ctx.role}" (${scenario.steps.length} scripted)`,
+    );
+
+    const step = selectMockStep(scenario, stepIndex, ctx.role);
+    const sessionId = `mock-${stepIndex}`;
+    const detailHash = await storeMockDetail(ctx.store, sessionId, ctx.role, stepIndex);
+
+    const result: AgentRunResult = {
+      output: step.output,
+      detailHash,
+      sessionId,
+      assembledPrompt: "",
+    };
+    lastResult = result;
+    return result;
+  }
+
+  async function continueRun(
+    sessionId: string,
+    _message: string,
+    _store: Store,
+  ): Promise<AgentRunResult> {
+    if (lastResult === null) {
+      throw new Error("mock continue called before run");
+    }
+    log("MK3N8RTW", `mock continue for session ${sessionId}, replaying scripted output`);
+    return lastResult;
+  }
+
+  return createAgent({
+    name: "mock",
+    run,
+    continue: continueRun,
+  });
+}
diff --git a/packages/agent-mock/src/types.ts b/packages/agent-mock/src/types.ts
new file mode 100644
index 0000000..63e83cf
--- /dev/null
+++ b/packages/agent-mock/src/types.ts
@@ -0,0 +1,12 @@
+/** One pre-scripted step in a mock scenario. */
+export type MockStep = {
+  /** Role this step is expected to run as. Validated against the actual `--role` argument. */
+  role: string;
+  /** Frontmatter markdown output the mock agent emits for this step. */
+  output: string;
+};
+
+/** Deterministic, pre-scripted agent script loaded from a YAML mock data file. */
+export type MockScenario = {
+  steps: MockStep[];
+};
diff --git a/packages/agent-mock/tsconfig.json b/packages/agent-mock/tsconfig.json
new file mode 100644
index 0000000..7b7fa2a
--- /dev/null
+++ b/packages/agent-mock/tsconfig.json
@@ -0,0 +1,9 @@
+{
+  "extends": "../../tsconfig.json",
+  "compilerOptions": {
+    "rootDir": "src",
+    "outDir": "dist"
+  },
+  "include": ["src"],
+  "references": [{ "path": "../util-agent" }, { "path": "../util" }, { "path": "../protocol" }]
+}
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 0bbdfce..1aa12c6 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -93,6 +93,28 @@ importers:
         specifier: ^5.8.3
         version: 5.9.3
 
+  packages/agent-mock:
+    dependencies:
+      '@ocas/core':
+        specifier: ^0.3.0
+        version: 0.3.0
+      '@united-workforce/protocol':
+        specifier: workspace:^
+        version: link:../protocol
+      '@united-workforce/util':
+        specifier: workspace:^
+        version: link:../util
+      '@united-workforce/util-agent':
+        specifier: workspace:^
+        version: link:../util-agent
+      yaml:
+        specifier: ^2.9.0
+        version: 2.9.0
+    devDependencies:
+      typescript:
+        specifier: ^5.8.3
+        version: 5.9.3
+
   packages/cli:
     dependencies:
       '@ocas/core':
diff --git a/proman.yaml b/proman.yaml
index 551f753..a0cc8e5 100644
--- a/proman.yaml
+++ b/proman.yaml
@@ -23,6 +23,10 @@ packages:
     path: packages/agent-builtin
     type: cli
 
+  - name: "@united-workforce/agent-mock"
+    path: packages/agent-mock
+    type: cli
+
   - name: "@united-workforce/cli"
     path: packages/cli
     type: cli
diff --git a/tsconfig.json b/tsconfig.json
index 1547e16..76e1129 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -23,6 +23,7 @@
     { "path": "packages/util-agent" },
     { "path": "packages/agent-hermes" },
     { "path": "packages/agent-builtin" },
+    { "path": "packages/agent-mock" },
     { "path": "packages/agent-claude-code" },
     { "path": "packages/cli" }
   ]
-- 
2.43.0


From 80e8efb05e9457585274d414e97b8f6184f6432c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= <xiaoju@shazhou.work>
Date: Thu, 4 Jun 2026 07:44:48 +0000
Subject: [PATCH 2/2] test: E2E integration tests with uwf-mock agent (#33)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three scenarios testing the full CLI pipeline:
1. Linear workflow (planner → worker → $END): CAS chain integrity
2. Loop workflow (developer ↔ reviewer): moderator routing through cycles
3. Role mismatch detection: agent catches routing bugs

Uses workflow add → thread start → thread exec with uwf-mock,
verifying CAS state, thread lifecycle, and error handling.

Updated assertions to use getThread().status === 'completed'
(aligned with PR #45 unified thread storage).

Refs #33
---
 .../cli/src/__tests__/e2e-mock-agent.test.ts  | 296 ++++++++++++++++++
 .../__tests__/fixtures/e2e-linear.mock.yaml   |  13 +
 .../fixtures/e2e-linear.workflow.yaml         |  32 ++
 .../src/__tests__/fixtures/e2e-loop.mock.yaml |  25 ++
 .../__tests__/fixtures/e2e-loop.workflow.yaml |  36 +++
 .../__tests__/fixtures/e2e-mismatch.mock.yaml |  16 +
 6 files changed, 418 insertions(+)
 create mode 100644 packages/cli/src/__tests__/e2e-mock-agent.test.ts
 create mode 100644 packages/cli/src/__tests__/fixtures/e2e-linear.mock.yaml
 create mode 100644 packages/cli/src/__tests__/fixtures/e2e-linear.workflow.yaml
 create mode 100644 packages/cli/src/__tests__/fixtures/e2e-loop.mock.yaml
 create mode 100644 packages/cli/src/__tests__/fixtures/e2e-loop.workflow.yaml
 create mode 100644 packages/cli/src/__tests__/fixtures/e2e-mismatch.mock.yaml

diff --git a/packages/cli/src/__tests__/e2e-mock-agent.test.ts b/packages/cli/src/__tests__/e2e-mock-agent.test.ts
new file mode 100644
index 0000000..e71e488
--- /dev/null
+++ b/packages/cli/src/__tests__/e2e-mock-agent.test.ts
@@ -0,0 +1,296 @@
+import { execFileSync } from "node:child_process";
+import { existsSync } from "node:fs";
+import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
+import { openStore } from "@ocas/fs";
+import type { CasRef, StartNodePayload, StepNodePayload } from "@united-workforce/protocol";
+import { afterEach, beforeAll, beforeEach, describe, expect, test } from "vitest";
+import { stringify } from "yaml";
+import { cmdThreadStart } from "../commands/thread.js";
+import { cmdWorkflowAdd } from "../commands/workflow.js";
+import { createUwfStore, getThread } from "../store.js";
+
+// ── paths ──────────────────────────────────────────────────────────────────
+
+const TEST_DIR = dirname(fileURLToPath(import.meta.url));
+const FIXTURES_DIR = join(TEST_DIR, "fixtures");
+const CLI_PATH = join(TEST_DIR, "..", "..", "dist", "cli.js");
+const REPO_ROOT = join(TEST_DIR, "..", "..", "..", "..");
+const AGENT_MOCK_DIR = join(REPO_ROOT, "packages", "agent-mock");
+const AGENT_MOCK_CLI = join(AGENT_MOCK_DIR, "dist", "cli.js");
+
+// ── shared fixture state ─────────────────────────────────────────────────────
+
+let tmpDir: string;
+let uwfHome: string;
+let casDir: string;
+let savedEnv: { uwf: string | undefined; ocas: string | undefined };
+
+/**
+ * The mock agent runs from its built `dist/cli.js`. When the test suite runs
+ * standalone (no prior `pnpm run build`), build it on demand so the E2E run is
+ * self-contained.
+ */
+beforeAll(() => {
+  if (existsSync(AGENT_MOCK_CLI)) {
+    return;
+  }
+  execFileSync(
+    process.execPath,
+    [
+      join(REPO_ROOT, "node_modules", "typescript", "bin", "tsc"),
+      "--build",
+      "--force",
+      AGENT_MOCK_DIR,
+    ],
+    { cwd: REPO_ROOT, stdio: "ignore" },
+  );
+}, 120000);
+
+beforeEach(async () => {
+  tmpDir = await mkdtemp(join(tmpdir(), "cli-e2e-mock-"));
+  uwfHome = join(tmpDir, "uwf");
+  casDir = join(tmpDir, "ocas");
+  await mkdir(uwfHome, { recursive: true });
+  await mkdir(casDir, { recursive: true });
+  // Programmatic CLI APIs (cmdWorkflowAdd, cmdThreadStart) read the global CAS
+  // directory from OCAS_HOME and the storage root from UWF_HOME.
+  savedEnv = { uwf: process.env.UWF_HOME, ocas: process.env.OCAS_HOME };
+  process.env.UWF_HOME = uwfHome;
+  process.env.OCAS_HOME = casDir;
+});
+
+afterEach(async () => {
+  process.env.UWF_HOME = savedEnv.uwf;
+  process.env.OCAS_HOME = savedEnv.ocas;
+  await rm(tmpDir, { recursive: true, force: true });
+});
+
+// ── helpers ──────────────────────────────────────────────────────────────────
+
+/**
+ * Write a `config.yaml` into UWF_HOME that wires the default agent to the mock
+ * agent. The mock data path is baked into the agent args so the CLI's
+ * `thread exec` (without an `--agent` override) resolves it from config.
+ */
+async function writeMockConfig(mockDataFixture: string): Promise<void> {
+  const config = {
+    defaultAgent: "mock",
+    defaultModel: "test",
+    providers: {},
+    models: {},
+    agentOverrides: null,
+    agents: {
+      mock: {
+        command: process.execPath,
+        args: [AGENT_MOCK_CLI, "--mock-data", join(FIXTURES_DIR, mockDataFixture)],
+      },
+    },
+  };
+  await writeFile(join(uwfHome, "config.yaml"), stringify(config));
+}
+
+/**
+ * `cmdWorkflowAdd` enforces filename↔name consistency, so copy the fixture into
+ * UWF_HOME under `<workflow-name>.yaml` before registering it.
+ */
+async function addWorkflow(workflowFixture: string, workflowName: string): Promise<CasRef> {
+  const text = await readFile(join(FIXTURES_DIR, workflowFixture), "utf8");
+  const filePath = join(uwfHome, `${workflowName}.yaml`);
+  await writeFile(filePath, text);
+  const result = await cmdWorkflowAdd(uwfHome, filePath);
+  return result.hash;
+}
+
+type ExecResult = { stdout: string; stderr: string; exitCode: number };
+
+function runExec(threadId: string): ExecResult {
+  try {
+    const stdout = execFileSync(process.execPath, [CLI_PATH, "thread", "exec", threadId], {
+      encoding: "utf8",
+      stdio: ["ignore", "pipe", "pipe"],
+      env: { ...process.env, UWF_HOME: uwfHome, OCAS_HOME: casDir },
+      cwd: tmpDir,
+      timeout: 30000,
+    });
+    return { stdout, stderr: "", exitCode: 0 };
+  } catch (e: unknown) {
+    const err = e as NodeJS.ErrnoException & {
+      stdout?: string;
+      stderr?: string;
+      status?: number;
+    };
+    return { stdout: err.stdout ?? "", stderr: err.stderr ?? "", exitCode: err.status ?? 1 };
+  }
+}
+
+type StepOutputJson = {
+  thread: string;
+  head: string;
+  status: string;
+  currentRole: string | null;
+  done: boolean;
+};
+
+function execStep(threadId: string): StepOutputJson {
+  const { stdout, stderr, exitCode } = runExec(threadId);
+  if (exitCode !== 0) {
+    throw new Error(`thread exec failed (code ${exitCode})\nstdout: ${stdout}\nstderr: ${stderr}`);
+  }
+  return JSON.parse(stdout.trim()) as StepOutputJson;
+}
+
+function getStepNode(store: Awaited<ReturnType<typeof openStore>>, hash: string): StepNodePayload {
+  const node = store.cas.get(hash as CasRef);
+  expect(node).not.toBeNull();
+  return node!.payload as StepNodePayload;
+}
+
+function getStatus(store: Awaited<ReturnType<typeof openStore>>, outputRef: CasRef): unknown {
+  const node = store.cas.get(outputRef);
+  expect(node).not.toBeNull();
+  return (node!.payload as Record<string, unknown>).$status;
+}
+
+// ── scenarios ─────────────────────────────────────────────────────────────────
+
+describe("E2E mock-agent: full uwf pipeline", () => {
+  test("1. linear workflow runs planner then worker and reaches $END", async () => {
+    await writeMockConfig("e2e-linear.mock.yaml");
+    const workflowHash = await addWorkflow("e2e-linear.workflow.yaml", "test-linear");
+
+    const start = await cmdThreadStart(uwfHome, workflowHash, "Build the thing", uwfHome, tmpDir);
+    const threadId = start.thread;
+
+    // Capture the start node hash (thread head before any step).
+    const startHash = getThread((await createUwfStore(uwfHome)).varStore, threadId)?.head;
+    expect(startHash).toBeDefined();
+
+    // Step 1 → planner.
+    const step1 = execStep(threadId);
+    expect(step1.thread).toBe(threadId);
+    expect(step1.done).toBe(false);
+    expect(step1.status).toBe("idle");
+    expect(step1.currentRole).toBe("worker");
+
+    // Step 2 → worker → $END (thread archived to history).
+    const step2 = execStep(threadId);
+    expect(step2.done).toBe(true);
+    expect(step2.status).toBe("completed");
+    expect(step2.currentRole).toBeNull();
+
+    // Verify CAS chain integrity: start → step1 → step2.
+    const store = await openStore(casDir);
+    const s1 = getStepNode(store, step1.head);
+    const s2 = getStepNode(store, step2.head);
+
+    expect(s1.role).toBe("planner");
+    expect(s1.prev).toBeNull();
+    expect(s1.start).toBe(startHash);
+
+    expect(s2.role).toBe("worker");
+    expect(s2.prev).toBe(step1.head);
+    expect(s2.start).toBe(s1.start);
+
+    // Output frontmatter statuses persisted correctly.
+    expect(getStatus(store, s1.output)).toBe("ready");
+    expect(getStatus(store, s2.output)).toBe("done");
+
+    // The start node points at the registered workflow.
+    const startNode = store.cas.get(startHash as CasRef);
+    expect((startNode!.payload as StartNodePayload).workflow).toBe(workflowHash);
+
+    // Thread is completed: status changed to "completed", head updated.
+    const uwf = await createUwfStore(uwfHome);
+    const finalEntry = getThread(uwf.varStore, threadId);
+    expect(finalEntry).not.toBeNull();
+    expect(finalEntry!.status).toBe("completed");
+    expect(finalEntry!.head).toBe(step2.head);
+  });
+
+  test("2. branching workflow loops developer→reviewer→developer→reviewer→$END", async () => {
+    await writeMockConfig("e2e-loop.mock.yaml");
+    const workflowHash = await addWorkflow("e2e-loop.workflow.yaml", "test-loop");
+
+    const start = await cmdThreadStart(uwfHome, workflowHash, "Implement feature", uwfHome, tmpDir);
+    const threadId = start.thread;
+
+    // 4 steps: developer, reviewer (rejected → loop), developer, reviewer (approved → $END).
+    const s1 = execStep(threadId);
+    expect(s1.status).toBe("idle");
+    expect(s1.currentRole).toBe("reviewer");
+
+    const s2 = execStep(threadId);
+    expect(s2.status).toBe("idle");
+    // reviewer rejected → loops back to developer.
+    expect(s2.currentRole).toBe("developer");
+
+    const s3 = execStep(threadId);
+    expect(s3.status).toBe("idle");
+    expect(s3.currentRole).toBe("reviewer");
+
+    const s4 = execStep(threadId);
+    expect(s4.done).toBe(true);
+    expect(s4.status).toBe("completed");
+
+    // Verify the chain order and roles.
+    const store = await openStore(casDir);
+    const n1 = getStepNode(store, s1.head);
+    const n2 = getStepNode(store, s2.head);
+    const n3 = getStepNode(store, s3.head);
+    const n4 = getStepNode(store, s4.head);
+
+    expect([n1.role, n2.role, n3.role, n4.role]).toEqual([
+      "developer",
+      "reviewer",
+      "developer",
+      "reviewer",
+    ]);
+    expect(n1.prev).toBeNull();
+    expect(n2.prev).toBe(s1.head);
+    expect(n3.prev).toBe(s2.head);
+    expect(n4.prev).toBe(s3.head);
+
+    // All steps share the same start node.
+    expect(new Set([n1.start, n2.start, n3.start, n4.start]).size).toBe(1);
+
+    // Statuses drove the loop routing.
+    expect(getStatus(store, n1.output)).toBe("review_needed");
+    expect(getStatus(store, n2.output)).toBe("rejected");
+    expect(getStatus(store, n3.output)).toBe("review_needed");
+    expect(getStatus(store, n4.output)).toBe("approved");
+
+    const uwf = await createUwfStore(uwfHome);
+    const finalEntry = getThread(uwf.varStore, threadId);
+    expect(finalEntry).not.toBeNull();
+    expect(finalEntry!.status).toBe("completed");
+  });
+
+  test("3. role mismatch in mock data makes the agent exit with an error", async () => {
+    // Reuses the linear workflow but with a mock whose step[1].role is wrong.
+    await writeMockConfig("e2e-mismatch.mock.yaml");
+    const workflowHash = await addWorkflow("e2e-linear.workflow.yaml", "test-linear");
+
+    const start = await cmdThreadStart(uwfHome, workflowHash, "Build the thing", uwfHome, tmpDir);
+    const threadId = start.thread;
+
+    // Step 1 (planner) matches and succeeds.
+    const step1 = execStep(threadId);
+    expect(step1.status).toBe("idle");
+    expect(step1.currentRole).toBe("worker");
+
+    // Step 2: moderator routes to "worker" but mock step[1].role is "planner".
+    const result = runExec(threadId);
+    expect(result.exitCode).not.toBe(0);
+    expect(`${result.stdout}\n${result.stderr}`).toMatch(/expected role "planner"/);
+
+    // The thread remains active (no step node was written for the failed step).
+    const uwf = await createUwfStore(uwfHome);
+    const entry = getThread(uwf.varStore, threadId);
+    expect(entry).not.toBeNull();
+    expect(entry!.status).not.toBe("completed");
+    expect(entry!.head).toBe(step1.head);
+  });
+});
diff --git a/packages/cli/src/__tests__/fixtures/e2e-linear.mock.yaml b/packages/cli/src/__tests__/fixtures/e2e-linear.mock.yaml
new file mode 100644
index 0000000..66a5135
--- /dev/null
+++ b/packages/cli/src/__tests__/fixtures/e2e-linear.mock.yaml
@@ -0,0 +1,13 @@
+steps:
+  - role: planner
+    output: |
+      ---
+      $status: ready
+      ---
+      Planning complete.
+  - role: worker
+    output: |
+      ---
+      $status: done
+      ---
+      Work complete.
diff --git a/packages/cli/src/__tests__/fixtures/e2e-linear.workflow.yaml b/packages/cli/src/__tests__/fixtures/e2e-linear.workflow.yaml
new file mode 100644
index 0000000..9a4a638
--- /dev/null
+++ b/packages/cli/src/__tests__/fixtures/e2e-linear.workflow.yaml
@@ -0,0 +1,32 @@
+name: test-linear
+description: Simple 2-step linear test (planner -> worker -> $END)
+roles:
+  planner:
+    description: Plans work
+    goal: Plan the task
+    capabilities: []
+    procedure: Plan it
+    output: Output a plan and set $status to ready
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: ready }
+          required: [$status]
+  worker:
+    description: Does work
+    goal: Do the work
+    capabilities: []
+    procedure: Do it
+    output: Output the result and set $status to done
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: done }
+          required: [$status]
+graph:
+  $START:
+    _: { role: planner, prompt: 'Plan the task' }
+  planner:
+    ready: { role: worker, prompt: 'Do the work' }
+  worker:
+    done: { role: '$END', prompt: 'Done' }
diff --git a/packages/cli/src/__tests__/fixtures/e2e-loop.mock.yaml b/packages/cli/src/__tests__/fixtures/e2e-loop.mock.yaml
new file mode 100644
index 0000000..e2fa37d
--- /dev/null
+++ b/packages/cli/src/__tests__/fixtures/e2e-loop.mock.yaml
@@ -0,0 +1,25 @@
+steps:
+  - role: developer
+    output: |
+      ---
+      $status: review_needed
+      ---
+      First implementation.
+  - role: reviewer
+    output: |
+      ---
+      $status: rejected
+      ---
+      Needs changes, sending back.
+  - role: developer
+    output: |
+      ---
+      $status: review_needed
+      ---
+      Second implementation addressing feedback.
+  - role: reviewer
+    output: |
+      ---
+      $status: approved
+      ---
+      Looks good, approved.
diff --git a/packages/cli/src/__tests__/fixtures/e2e-loop.workflow.yaml b/packages/cli/src/__tests__/fixtures/e2e-loop.workflow.yaml
new file mode 100644
index 0000000..604452a
--- /dev/null
+++ b/packages/cli/src/__tests__/fixtures/e2e-loop.workflow.yaml
@@ -0,0 +1,36 @@
+name: test-loop
+description: Branching test where the reviewer can reject and loop back to the developer
+roles:
+  developer:
+    description: Implements changes
+    goal: Implement the change
+    capabilities: []
+    procedure: Write code
+    output: Summarize the change and set $status to review_needed
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: review_needed }
+          required: [$status]
+  reviewer:
+    description: Reviews changes
+    goal: Review the change
+    capabilities: []
+    procedure: Review code
+    output: Approve or reject; set $status to approved or rejected
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: rejected }
+          required: [$status]
+        - properties:
+            $status: { const: approved }
+          required: [$status]
+graph:
+  $START:
+    _: { role: developer, prompt: 'Implement the change' }
+  developer:
+    review_needed: { role: reviewer, prompt: 'Review the change' }
+  reviewer:
+    rejected: { role: developer, prompt: 'Fix the issues and resubmit' }
+    approved: { role: '$END', prompt: 'Approved, done' }
diff --git a/packages/cli/src/__tests__/fixtures/e2e-mismatch.mock.yaml b/packages/cli/src/__tests__/fixtures/e2e-mismatch.mock.yaml
new file mode 100644
index 0000000..d397d90
--- /dev/null
+++ b/packages/cli/src/__tests__/fixtures/e2e-mismatch.mock.yaml
@@ -0,0 +1,16 @@
+# Reuses the test-linear workflow. The moderator routes step 0 -> planner and
+# step 1 -> worker, but step[1].role below is "planner", so the mock agent must
+# detect the role mismatch on the second step and exit with an error.
+steps:
+  - role: planner
+    output: |
+      ---
+      $status: ready
+      ---
+      Planning complete.
+  - role: planner
+    output: |
+      ---
+      $status: done
+      ---
+      This step claims to be planner, but the moderator routes to worker.
-- 
2.43.0