test: E2E integration tests with uwf-mock agent (#33 )

Three scenarios testing the full CLI pipeline: 1. Linear workflow (planner → worker → $END): CAS chain integrity 2. Loop workflow (developer ↔ reviewer): moderator routing through cycles 3. Role mismatch detection: agent catches routing bugs Uses workflow add → thread start → thread exec with uwf-mock, verifying CAS state, thread lifecycle, and error handling. Refs #33
feat: add agent-mock package for deterministic E2E testing (#33 )
2026-06-04 07:44:48 +00:00 · 2026-06-04 06:50:49 +00:00
148 changed files with 1260 additions and 5043 deletions
@@ -0,0 +1,8 @@
+# Changesets
+
+Hello and welcome! This folder has been automatically generated by `@changesets/cli`, a build tool that works
+with multi-package repos, or single-package repos to help you version and publish your code. You can
+find the full documentation for it [in our repository](https://github.com/changesets/changesets).
+
+We have a quick list of common questions to get you started engaging with this project in
+[our documentation](https://github.com/changesets/changesets/blob/main/docs/common-questions.md).
@@ -0,0 +1,11 @@
+{
+  "$schema": "https://unpkg.com/@changesets/config@3.1.4/schema.json",
+  "changelog": "@changesets/cli/changelog",
+  "commit": false,
+  "fixed": [["@united-workforce/*"]],
+  "linked": [],
+  "access": "public",
+  "baseBranch": "main",
+  "updateInternalDependencies": "patch",
+  "ignore": ["@united-workforce/dashboard"]
+}
@@ -0,0 +1,30 @@
+{
+  "mode": "exit",
+  "tag": "alpha",
+  "initialVersions": {
+    "@uncaged/cli": "0.4.5",
+    "@uncaged/workflow-agent-cursor": "0.4.5",
+    "@uncaged/agent-hermes": "0.4.5",
+    "@uncaged/workflow-agent-llm": "0.4.5",
+    "@uncaged/workflow-agent-react": "0.4.5",
+    "@uncaged/workflow-cas": "0.4.5",
+    "@uncaged/dashboard": "0.1.0",
+    "@uncaged/workflow-execute": "0.4.5",
+    "@uncaged/workflow-gateway": "0.4.5",
+    "@uncaged/protocol": "0.4.5",
+    "@uncaged/workflow-reactor": "0.4.5",
+    "@uncaged/workflow-register": "0.4.5",
+    "@uncaged/workflow-runtime": "0.4.5",
+    "@uncaged/workflow-template-develop": "0.4.5",
+    "@uncaged/workflow-template-solve-issue": "0.4.5",
+    "@uncaged/util": "0.4.5",
+    "@uncaged/util-agent": "0.4.5"
+  },
+  "changesets": [
+    "env-api-unify",
+    "fix-internal-deps",
+    "fix-publish-src",
+    "fix-workspace-deps",
+    "rfc-252-agent-fn"
+  ]
+}
@@ -12,17 +12,15 @@ jobs:
    steps:
      - uses: actions/checkout@v4

-      - uses: actions/setup-node@v4
-        with:
-          node-version: 22
+      - uses: oven-sh/setup-bun@v2

-      - run: corepack enable && pnpm install
+      - run: bun install

      - name: Build
-        run: pnpm run build
+        run: bun run build

      - name: Lint
-        run: pnpm run check
+        run: bun run check

      - name: Test
-        run: pnpm run test:ci
+        run: bun run test:ci
@@ -1,226 +0,0 @@
-# Eval Framework Implementation Plan
-
-## Goal
-
-Build `uwf-eval` CLI + eval task infrastructure for evaluating uwf workflow quality with real agents.
-
-## Architecture
-
-```
-uwf-eval (runner)          task package (npm)          OCAS (storage)
-  │                          │                           │
-  ├─ unpack tarball ───────► fixture/ → tmp cwd          │
-  ├─ read task.yaml          │                           │
-  ├─ uwf thread start/exec  │                           │
-  ├─ run judges ───────────► dist/judges/*.js            │
-  ├─ collect scores          │                           │
-  └─ store results ─────────────────────────────────────► CAS nodes + variables
-```
-
-### Key Design Decisions
-
- **uwf-eval is NOT part of uwf** — separate package, shells out to uwf CLI
- **Task = npm package** — fixture + task.yaml + judge scripts, distributable as tarball
- **Judge = Node script** — `node <entry> <cwd> <thread-id>`, outputs `{score, data}` JSON
- **Every output is OCAS typed** — eval-run, judge results all have registered schemas
- **Builtin judges** — frontmatter compliance, upstream consumption, hallucination, token stats
- **Task-specific judges** — bundled in the task package, custom schema per judge
-
-## Deliverables
-
-### Phase 1: Foundation (`@united-workforce/eval`)
-
-New package in the uwf monorepo.
-
-```
-packages/eval/
-  src/
-    cli.ts                    # uwf-eval entry point
-    commands/
-      run.ts                  # uwf-eval run
-      report.ts               # uwf-eval report <hash>
-      diff.ts                 # uwf-eval diff <hash> <hash>
-      list.ts                 # uwf-eval list
-    runner/
-      prepare.ts              # unpack tarball/dir → tmp cwd
-      execute.ts              # shell out to uwf thread start/exec
-      collect.ts              # run judges, collect scores
-    judge/
-      types.ts                # JudgeInput, JudgeOutput types
-      builtin/
-        frontmatter.ts        # frontmatter compliance check
-        upstream.ts           # upstream info consumption (LLM-as-judge)
-        hallucination.ts      # hallucination detection (LLM-as-judge)
-        token-stats.ts        # token usage from $usage field (#68)
-    storage/
-      schemas.ts              # OCAS schema definitions
-      store.ts                # CAS read/write helpers
-      index.ts                # variable indexing (@uwf/eval/*)
-    task/
-      types.ts                # TaskManifest type (task.yaml)
-      loader.ts               # parse task.yaml, validate
-  package.json
-  tsconfig.json
-```
-
-#### OCAS Schemas to Register
-
-1. `@uwf/eval-run` — full eval execution record
-   ```
-   { task, config: {agent, model, engineVersion}, threadId,
-     judges: [{name, score, weight, dataHash}], overall, timestamp }
-   ```
-
-2. `@uwf/eval-judge-frontmatter` — frontmatter judge data
-   ```
-   { stepsTotal, stepsValid, invalidSteps: [{stepIndex, role, errors: string[]}] }
-   ```
-
-3. `@uwf/eval-judge-upstream` — upstream consumption judge data
-   ```
-   { perStep: [{role, consumed: string[], missed: string[], score}] }
-   ```
-
-4. `@uwf/eval-judge-hallucination` — hallucination judge data
-   ```
-   { perStep: [{role, hallucinations: string[], score}] }
-   ```
-
-5. `@uwf/eval-judge-token-stats` — token stats (not scored, informational)
-   ```
-   { totalInput, totalOutput, totalTurns, perStep: [{role, input, output, turns, duration}] }
-   ```
-
-#### CLI Design
-
-```bash
-# Run eval
-uwf-eval run <task-dir-or-tarball> [--agent hermes] [--model claude-sonnet-4] [--count 20]
-
-# View results
-uwf-eval report <run-hash>        # render via ocas render
-uwf-eval diff <hash1> <hash2>     # side-by-side comparison
-uwf-eval list                     # list past runs
-```
-
-### Phase 2: Task Package Scaffold
-
-Template for creating eval tasks. Also serves as the first real task.
-
-```
-eval-tasks/                        # shazhou/uwf-eval-tasks monorepo
-  packages/
-    _template/                     # copypaste template
-      package.json
-      task.yaml
-      fixture/
-      src/judges/
-      tsconfig.json
-    fix-off-by-one/                # first real task
-      package.json                 # @uwf-eval/fix-off-by-one
-      task.yaml
-      fixture/
-        src/calc.ts                # buggy calculator
-        src/calc.test.ts           # test that exposes the bug
-        package.json
-      src/judges/
-        test-pass.ts               # runs pnpm test, checks exit code
-        code-quality.ts            # LLM judge: minimal change, correct fix
-      schemas/
-        test-pass.json             # OCAS schema for test-pass data
-        code-quality.json          # OCAS schema for code-quality data
-      tsconfig.json
-  pnpm-workspace.yaml
-  tsconfig.json
-  biome.json
-```
-
-#### task.yaml Format
-
-```yaml
-name: fix-off-by-one
-description: Fix an off-by-one error in a calculator's add function
-workflow: solve-issue              # registered workflow name, or relative path to .yaml
-prompt: "Fix the bug: add(1,2) returns 4 instead of 3"
-limits:
-  maxSteps: 15
-  timeoutMinutes: 30
-judges:
-  - name: frontmatter-compliance
-    weight: 0.15
-    builtin: true
-  - name: upstream-consumption
-    weight: 0.15
-    builtin: true
-  - name: hallucination
-    weight: 0.1
-    builtin: true
-  - name: token-stats
-    weight: 0                      # informational, not scored
-    builtin: true
-  - name: test-pass
-    weight: 0.3
-    entry: dist/judges/test-pass.js
-    schema: schemas/test-pass.json
-  - name: code-quality
-    weight: 0.3
-    entry: dist/judges/code-quality.js
-    schema: schemas/code-quality.json
-```
-
-#### Judge Script Contract
-
-```typescript
-// Input: process.argv = [node, script, cwd, threadId]
-// Output: stdout JSON
-// Exit 0 = success, non-zero = judge error (not low score)
-
-import type { JudgeOutput } from "@united-workforce/eval";
-
-const result: JudgeOutput<TestPassData> = {
-  score: 1.0,      // 0.0 - 1.0
-  data: {           // typed per judge schema
-    command: "pnpm test",
-    exitCode: 0,
-    output: "3 tests passed"
-  }
-};
-
-console.log(JSON.stringify(result));
-```
-
-### Phase 3: Prerequisite — $usage in Adapter Protocol (#68)
-
-Blocked by #68. Token stats judge needs `$usage` in step nodes.
-
-Can proceed with Phase 1+2 without it — token-stats judge just returns zeros until adapters report usage.
-
-## Implementation Order
-
-1. **Phase 1a**: `@united-workforce/eval` package scaffold + CLI skeleton + OCAS schemas
-2. **Phase 1b**: `run` command — prepare, execute, collect flow
-3. **Phase 1c**: Builtin judges — frontmatter (deterministic), upstream + hallucination (LLM-as-judge)
-4. **Phase 2a**: Create `shazhou/uwf-eval-tasks` monorepo with proman
-5. **Phase 2b**: First task `fix-off-by-one` with fixture repo + 2 custom judges
-6. **Phase 2c**: End-to-end test: `uwf-eval run packages/fix-off-by-one --agent hermes`
-7. **Phase 1d**: `report`, `diff`, `list` commands (read from CAS, render via ocas render)
-
-## Dependencies
-
- `@ocas/core` + `@ocas/fs` — CAS storage
- `@united-workforce/protocol` — step node types
- `commander` — CLI framework (consistent with uwf)
- LLM API access — for LLM-as-judge (upstream, hallucination, task-specific quality judges)
-
-## Open Questions
-
-1. **LLM-as-judge provider config** — reuse uwf's `~/.uwf/config.yaml` provider settings? Or separate config?
-2. **Workflow file location** — task.yaml references a workflow. Should the workflow YAML be inside the tarball, or reference a registered workflow by name?
-3. **Non-coding tasks** — debate workflow has no fixture repo. task.yaml needs `fixture: null` or simply omit the `fixture/` dir. Runner creates empty cwd.
-4. **Parallel judge execution** — judges are independent, can run in parallel. Worth the complexity?
-
-## Risks
-
- LLM-as-judge consistency — same input may get different scores. Mitigation: run judge multiple times, take average? Or accept variance.
- Token cost of judges — each LLM judge call costs tokens. For a 10-step workflow with 2 LLM judges = 20 LLM calls just for judging. Acceptable?
- Fixture repo drift — if the fixture evolves, old eval runs become non-comparable. Pin fixture version in task.yaml.
@@ -1,25 +0,0 @@
-# Changelog
-
-## 0.1.0 (2026-06-05)
-
-Initial release of `@united-workforce/*` — a stateless workflow engine for AI agent orchestration.
-
-### Packages
-
- **@united-workforce/protocol** — shared types (WorkflowPayload, StepNode, etc.)
- **@united-workforce/util** — Crockford Base32, ULID, structured logger, frontmatter parsing
- **@united-workforce/util-agent** — agent factory, context builder, extract pipeline
- **@united-workforce/cli** — `uwf` CLI (thread lifecycle, status-based moderator, workflow registry)
- **@united-workforce/eval** — `uwf-eval` CLI (prepare → execute → collect eval pipeline)
- **@united-workforce/agent-hermes** — `uwf-hermes` adapter (Hermes Agent)
- **@united-workforce/agent-claude-code** — `uwf-claude-code` adapter (Claude Code CLI)
- **@united-workforce/agent-builtin** — `uwf-builtin` adapter (built-in LLM agent)
- **@united-workforce/agent-mock** — `uwf-mock` adapter (deterministic test agent)
-
-### Highlights
-
- Status-based graph routing (no LLM moderator cost)
- CAS-backed immutable thread chains (`@ocas/core`)
- Real token usage tracking (Hermes + Claude Code)
- Eval framework with built-in judges (frontmatter, token-stats, test-pass)
- `$SUSPEND` / resume for human-in-the-loop workflows
@@ -222,42 +222,41 @@ Test files (`__tests__/**`) are exempt.

 | Tool | Purpose |
 |------|---------|
-| **pnpm** | Package manager |
+| **bun** | Package manager + runtime |
 | **TypeScript** | Type checking (strict mode) |
 | **Biome** | Lint + format (replaces ESLint + Prettier) |
-| **vitest** | Test runner (all packages) |
+| **vitest** | Test runner (`cli` uses vitest; other packages use `bun test`) |

 ### Development Workflow

 ```bash
 # ── Setup ──
-pnpm install                # install all workspace dependencies
+bun install                 # install all workspace dependencies

 # ── Daily development ──
-pnpm run build              # build all packages (dependency order)
-pnpm run check              # biome check + lint-log-tags
-pnpm run typecheck          # tsc --build
-pnpm run test               # run tests across all packages
+bun run build               # tsc --build (all packages, dependency order)
+bun run check               # tsc --build + biome check + lint-log-tags
+bun run format              # biome format --write
+bun test                    # run tests across all packages

 # ── Before committing ──
-pnpm run check              # must pass — lint + log tag validation
-pnpm run typecheck          # must pass — type checking
-pnpm run test               # must pass — all package tests
+bun run check               # must pass — typecheck + lint + log tag validation
+bun test                    # must pass — all package tests
 ```

 ### Publishing

-All public `@united-workforce/*` packages are published to **npmjs.org** with **independent versioning**.
+All public `@united-workforce/*` packages are published to **npmjs.org** with **fixed mode** (all packages share the same version number).

 ```bash
 # 1. Add a changeset describing the change
-npx changeset
+bun changeset

-# 2. Bump versions + generate CHANGELOGs
-proman bump
+# 2. Bump all package versions + generate CHANGELOGs
+bun version

-# 3. Build, test, and publish
-proman publish
+# 3. Build, test, and publish (runs scripts/publish-all.mjs)
+bun release

 # Or publish manually with a tag:
 node scripts/publish-all.mjs --tag alpha
@@ -266,7 +265,7 @@ node scripts/publish-all.mjs --dry-run    # preview without publishing

 - `workspace:^` dependencies resolve to `^x.y.z` on publish
 - Publish order defined in `scripts/publish-all.mjs` (dependency order)
- Changesets config: `.changeset/config.json` (independent versioning, public access)
+- Changesets config: `.changeset/config.json` (fixed mode, public access)

 ### End-to-end: Author → Register → Run

@@ -470,7 +470,7 @@ Use the `ocas` CLI for direct CAS operations (`~/.ocas/` store, shared with `uwf

 | Tool | Purpose |
 |------|---------|
-| **pnpm** | Package manager |
+| **bun** | Package manager + runtime |
 | **TypeScript** | Type checking (strict mode) |
 | **Biome** | Lint + format |
 | **vitest** | Test runner |
@@ -17,7 +17,7 @@ The root README should have these sections in order:
 4. **Packages** — table with ALL packages from packages/ directory, columns: Package, Description, Type (cli/lib/agent/app)
 5. **Quick Start** — install, build, register workflow, start thread, run step
 6. **CLI Reference** — brief command list, detailed usage in cli README
-7. **Development** — pnpm install / build / check / test
+7. **Development** — bun install / build / check / test

 ## Per-Package README Structure

@@ -26,7 +26,7 @@ Each package README should have:
 1. **Title** — package name
 2. **One-line description** — matching package.json
 3. **Overview** — what it does, where it sits in the architecture, dependencies
-4. **Installation** — pnpm add (for libs) or "included as binary" (for cli/agents)
+4. **Installation** — bun add (for libs) or "included as binary" (for cli/agents)
 5. **API** (lib packages) — all exports from src/index.ts with type signatures, grouped by category, minimal usage examples
 6. **CLI Usage** (cli/agent packages) — command reference with examples
 7. **Internal Structure** — brief src/ file organization
@@ -56,7 +56,7 @@ For each package read:
 - All relative links work
 - Package names match package.json
 - No references to removed/renamed packages
- pnpm run build still passes
+- bun run build still passes

 ## Guidelines

@@ -23,7 +23,7 @@ roles:
      type: object
      properties:
        $status:
-          enum: ["done"]
+          enum: ["_"]
        thesis:
          type: string
        keyPoints:
@@ -37,4 +37,4 @@ graph:
  $START:
    _: { role: "analyst", prompt: "Analyze the topic in the task and produce a structured summary with key points." }
  analyst:
-    done: { role: "$END", prompt: "Analysis complete. Finish the workflow." }
+    _: { role: "$END", prompt: "Analysis complete. Finish the workflow." }
@@ -1,30 +0,0 @@
-name: eval-simple
-description: "Single-role eval workflow: fixer takes prompt, fixes code, done."
-roles:
-  fixer:
-    description: "Fixes the code based on the prompt"
-    goal: |
-      You are a code fixer. Read the prompt, understand the bug, fix it, and verify by running the tests.
-    capabilities:
-      - code-editing
-      - test-running
-    procedure: |
-      1. Read the prompt to understand what needs to be fixed
-      2. Fix the bug in the source code
-      3. Run the tests mentioned in the prompt to verify
-      4. Output $status=done when tests pass
-    output: "Describe what you fixed and confirm tests pass. Set $status to done."
-    frontmatter:
-      type: object
-      properties:
-        $status:
-          type: string
-          enum: [done]
-        summary:
-          type: string
-      required: [$status, summary]
-graph:
-  $START:
-    _: { role: "fixer", prompt: "Fix the code issue described in the task prompt." }
-  fixer:
-    done: { role: "$END", prompt: "Fix complete." }
@@ -1,8 +1,8 @@
 import { mkdtemp, rm } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
-import { createMemoryStore } from "@ocas/core";
 import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { createMemoryStore } from "@ocas/core";
 import { storeBuiltinDetail } from "../src/detail.js";
 import { appendSessionTurn, initSessionDir } from "../src/session.js";
 import type { BuiltinTurnPayload } from "../src/types.js";
@@ -1,51 +1,51 @@
-import { mkdir, rm, writeFile } from "node:fs/promises";
-import { tmpdir } from "node:os";
-import { join } from "node:path";
-import { afterAll, beforeAll, describe, expect, it } from "vitest";
+import { describe, it, expect, beforeAll, afterAll } from "vitest";
 import { readFileTool } from "../src/tools/read-file.js";
+import { writeFile, mkdir, rm } from "node:fs/promises";
+import { join } from "node:path";
+import { tmpdir } from "node:os";

 const testDir = join(tmpdir(), `read-file-test-${Date.now()}`);
 const ctx = { cwd: testDir, storageRoot: testDir };

 beforeAll(async () => {
-  await mkdir(testDir, { recursive: true });
-  await writeFile(join(testDir, "hello.txt"), "hello world", "utf8");
+	await mkdir(testDir, { recursive: true });
+	await writeFile(join(testDir, "hello.txt"), "hello world", "utf8");
 });

 afterAll(async () => {
-  await rm(testDir, { recursive: true, force: true });
+	await rm(testDir, { recursive: true, force: true });
 });

 describe("readFileTool", () => {
-  it("reads a file successfully", async () => {
-    const result = await readFileTool.execute({ path: "hello.txt" }, ctx);
-    expect(result).toBe("hello world");
-  });
+	it("reads a file successfully", async () => {
+		const result = await readFileTool.execute({ path: "hello.txt" }, ctx);
+		expect(result).toBe("hello world");
+	});

-  it("returns error for non-existent file", async () => {
-    const result = await readFileTool.execute({ path: "nope.txt" }, ctx);
-    expect(result).toMatch(/^Error:/);
-  });
+	it("returns error for non-existent file", async () => {
+		const result = await readFileTool.execute({ path: "nope.txt" }, ctx);
+		expect(result).toMatch(/^Error:/);
+	});

-  it("returns error for directory", async () => {
-    const result = await readFileTool.execute({ path: "." }, ctx);
-    expect(result).toBe("Error: not a file");
-  });
+	it("returns error for directory", async () => {
+		const result = await readFileTool.execute({ path: "." }, ctx);
+		expect(result).toBe("Error: not a file");
+	});

-  it("returns error when path is not a string", async () => {
-    const result = await readFileTool.execute({ path: 123 }, ctx);
-    expect(result).toBe("Error: path must be a string");
-  });
+	it("returns error when path is not a string", async () => {
+		const result = await readFileTool.execute({ path: 123 }, ctx);
+		expect(result).toBe("Error: path must be a string");
+	});

-  it("returns error when args is null", async () => {
-    const result = await readFileTool.execute(null, ctx);
-    expect(result).toBe("Error: path must be a string");
-  });
+	it("returns error when args is null", async () => {
+		const result = await readFileTool.execute(null, ctx);
+		expect(result).toBe("Error: path must be a string");
+	});

-  it("returns error for file exceeding 512KB limit", async () => {
-    const bigFile = join(testDir, "big.txt");
-    await writeFile(bigFile, Buffer.alloc(512 * 1024 + 1, 65));
-    const result = await readFileTool.execute({ path: "big.txt" }, ctx);
-    expect(result).toMatch(/Error:.*limit/);
-  });
+	it("returns error for file exceeding 512KB limit", async () => {
+		const bigFile = join(testDir, "big.txt");
+		await writeFile(bigFile, Buffer.alloc(512 * 1024 + 1, 65));
+		const result = await readFileTool.execute({ path: "big.txt" }, ctx);
+		expect(result).toMatch(/Error:.*limit/);
+	});
 });
@@ -1,38 +1,38 @@
-import { tmpdir } from "node:os";
-import { describe, expect, it } from "vitest";
+import { describe, it, expect } from "vitest";
 import { runCommandTool } from "../src/tools/run-command.js";
+import { tmpdir } from "node:os";

 const ctx = { cwd: tmpdir(), storageRoot: tmpdir() };

 describe("runCommandTool", () => {
-  it("runs echo command and checks stdout", async () => {
-    const result = await runCommandTool.execute({ command: "echo hello" }, ctx);
-    expect(result).toContain("hello");
-    expect(result).toContain("stdout");
-  });
+	it("runs echo command and checks stdout", async () => {
+		const result = await runCommandTool.execute({ command: "echo hello" }, ctx);
+		expect(result).toContain("hello");
+		expect(result).toContain("stdout");
+	});

-  it("returns exit code", async () => {
-    const result = await runCommandTool.execute({ command: "exit 0" }, ctx);
-    expect(result).toContain("exit_code: 0");
-  });
+	it("returns exit code", async () => {
+		const result = await runCommandTool.execute({ command: "exit 0" }, ctx);
+		expect(result).toContain("exit_code: 0");
+	});

-  it("returns non-zero exit code", async () => {
-    const result = await runCommandTool.execute({ command: "exit 42" }, ctx);
-    expect(result).toContain("exit_code: 42");
-  });
+	it("returns non-zero exit code", async () => {
+		const result = await runCommandTool.execute({ command: "exit 42" }, ctx);
+		expect(result).toContain("exit_code: 42");
+	});

-  it("returns error when command is not a string", async () => {
-    const result = await runCommandTool.execute({ command: 123 }, ctx);
-    expect(result).toBe("Error: command must be a string");
-  });
+	it("returns error when command is not a string", async () => {
+		const result = await runCommandTool.execute({ command: 123 }, ctx);
+		expect(result).toBe("Error: command must be a string");
+	});

-  it("returns error when args is null", async () => {
-    const result = await runCommandTool.execute(null, ctx);
-    expect(result).toBe("Error: command must be a string");
-  });
+	it("returns error when args is null", async () => {
+		const result = await runCommandTool.execute(null, ctx);
+		expect(result).toBe("Error: command must be a string");
+	});

-  it("custom cwd works", async () => {
-    const result = await runCommandTool.execute({ command: "pwd", cwd: "/tmp" }, ctx);
-    expect(result).toContain("/tmp");
-  });
+	it("custom cwd works", async () => {
+		const result = await runCommandTool.execute({ command: "pwd", cwd: "/tmp" }, ctx);
+		expect(result).toContain("/tmp");
+	});
 });
@@ -3,13 +3,13 @@ import { mkdtemp, rm } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import type { BuiltinTurnPayload } from "../src/types.js";
 import {
  appendSessionTurn,
  initSessionDir,
  readSessionTurns,
  removeSession,
 } from "../src/session.js";
-import type { BuiltinTurnPayload } from "../src/types.js";

 describe("session", () => {
  let storageRoot: string;
@@ -1,43 +1,43 @@
-import { readFile, rm } from "node:fs/promises";
-import { tmpdir } from "node:os";
-import { join } from "node:path";
-import { afterAll, describe, expect, it } from "vitest";
+import { describe, it, expect, afterAll } from "vitest";
 import { writeFileTool } from "../src/tools/write-file.js";
+import { readFile, rm } from "node:fs/promises";
+import { join } from "node:path";
+import { tmpdir } from "node:os";

 const testDir = join(tmpdir(), `write-file-test-${Date.now()}`);
 const ctx = { cwd: testDir, storageRoot: testDir };

 afterAll(async () => {
-  await rm(testDir, { recursive: true, force: true });
+	await rm(testDir, { recursive: true, force: true });
 });

 describe("writeFileTool", () => {
-  it("writes file successfully", async () => {
-    const result = await writeFileTool.execute({ path: "out.txt", content: "hi" }, ctx);
-    expect(result).toMatch(/Wrote 2 bytes/);
-    const content = await readFile(join(testDir, "out.txt"), "utf8");
-    expect(content).toBe("hi");
-  });
+	it("writes file successfully", async () => {
+		const result = await writeFileTool.execute({ path: "out.txt", content: "hi" }, ctx);
+		expect(result).toMatch(/Wrote 2 bytes/);
+		const content = await readFile(join(testDir, "out.txt"), "utf8");
+		expect(content).toBe("hi");
+	});

-  it("creates parent directories", async () => {
-    const result = await writeFileTool.execute({ path: "a/b/c.txt", content: "nested" }, ctx);
-    expect(result).toMatch(/Wrote/);
-    const content = await readFile(join(testDir, "a/b/c.txt"), "utf8");
-    expect(content).toBe("nested");
-  });
+	it("creates parent directories", async () => {
+		const result = await writeFileTool.execute({ path: "a/b/c.txt", content: "nested" }, ctx);
+		expect(result).toMatch(/Wrote/);
+		const content = await readFile(join(testDir, "a/b/c.txt"), "utf8");
+		expect(content).toBe("nested");
+	});

-  it("returns error when path is not a string", async () => {
-    const result = await writeFileTool.execute({ path: 123, content: "x" }, ctx);
-    expect(result).toBe("Error: path and content must be strings");
-  });
+	it("returns error when path is not a string", async () => {
+		const result = await writeFileTool.execute({ path: 123, content: "x" }, ctx);
+		expect(result).toBe("Error: path and content must be strings");
+	});

-  it("returns error when content is not a string", async () => {
-    const result = await writeFileTool.execute({ path: "x.txt", content: 42 }, ctx);
-    expect(result).toBe("Error: path and content must be strings");
-  });
+	it("returns error when content is not a string", async () => {
+		const result = await writeFileTool.execute({ path: "x.txt", content: 42 }, ctx);
+		expect(result).toBe("Error: path and content must be strings");
+	});

-  it("returns error when args is null", async () => {
-    const result = await writeFileTool.execute(null, ctx);
-    expect(result).toBe("Error: path and content must be strings");
-  });
+	it("returns error when args is null", async () => {
+		const result = await writeFileTool.execute(null, ctx);
+		expect(result).toBe("Error: path and content must be strings");
+	});
 });
@@ -1,6 +1,6 @@
 {
  "name": "@united-workforce/agent-builtin",
-  "version": "0.1.1",
+  "version": "0.5.0",
  "files": [
    "src",
    "dist",
@@ -8,7 +8,7 @@
  ],
  "type": "module",
  "bin": {
-    "uwf-builtin": "./dist/cli.js"
+    "uwf-builtin": "./src/cli.ts"
  },
  "exports": {
    ".": {
@@ -17,6 +17,7 @@
    }
  },
  "scripts": {
+    "prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1",
    "test": "vitest run __tests__/",
    "test:ci": "vitest run __tests__/"
  },
@@ -82,13 +82,7 @@ async function runBuiltinWithMessages(

  if (loopResult.turnCount === 0) {
    log("5RWTK9NB", "no turns produced, returning empty output");
-    return {
-      output: "",
-      detailHash: "",
-      sessionId: session.sessionId,
-      assembledPrompt: "",
-      usage: null,
-    };
+    return { output: "", detailHash: "", sessionId: session.sessionId, assembledPrompt: "" };
  }

  // Read jsonl → persist turns to CAS → store detail
@@ -105,7 +99,6 @@ async function runBuiltinWithMessages(
    detailHash,
    sessionId: session.sessionId,
    assembledPrompt: "",
-    usage: null,
  };
 }

@@ -1,12 +1,5 @@
 #!/usr/bin/env node

-// eslint-disable-next-line -- dynamic import for version
-const pkg = await import("../package.json", { with: { type: "json" } });
-if (process.argv.includes("--version") || process.argv.includes("-V")) {
-  process.stdout.write(`${pkg.default.version}\n`);
-  process.exit(0);
-}
-
 import { createBuiltinAgent } from "./agent.js";

 const main = createBuiltinAgent();
@@ -1,6 +1,6 @@
 {
  "name": "@united-workforce/agent-claude-code",
-  "version": "0.1.1",
+  "version": "0.1.0",
  "files": [
    "src",
    "dist",
@@ -8,7 +8,7 @@
  ],
  "type": "module",
  "bin": {
-    "uwf-claude-code": "./dist/cli.js"
+    "uwf-claude-code": "./src/cli.ts"
  },
  "exports": {
    ".": {
@@ -17,12 +17,12 @@
    }
  },
  "scripts": {
+    "prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1",
    "test": "vitest run __tests__/",
    "test:ci": "vitest run __tests__/"
  },
  "dependencies": {
    "@ocas/core": "^0.3.0",
-    "@united-workforce/protocol": "workspace:^",
    "@united-workforce/util": "workspace:^",
    "@united-workforce/util-agent": "workspace:^"
  },
@@ -1,6 +1,5 @@
 import { spawn } from "node:child_process";
 import type { Store } from "@ocas/core";
-import type { Usage } from "@united-workforce/protocol";
 import { createLogger } from "@united-workforce/util";
 import {
  type AgentContext,
@@ -146,14 +145,7 @@ async function processClaudeOutput(
      );
    }

-    const usage: Usage = {
-      turns: parsed.numTurns,
-      inputTokens: parsed.usage.inputTokens,
-      outputTokens: parsed.usage.outputTokens,
-      duration: Math.round(parsed.durationMs / 1000),
-    };
-
-    return { output, detailHash, sessionId, assembledPrompt, usage };
+    return { output, detailHash, sessionId, assembledPrompt };
  }

  // Truly unparseable output - provide enhanced error message
@@ -1,12 +1,5 @@
 #!/usr/bin/env node

-// eslint-disable-next-line -- dynamic import for version
-const pkg = await import("../package.json", { with: { type: "json" } });
-if (process.argv.includes("--version") || process.argv.includes("-V")) {
-  process.stdout.write(`${pkg.default.version}\n`);
-  process.exit(0);
-}
-
 import { createClaudeCodeAgent } from "./claude-code.js";

 const model = process.env.CLAUDE_MODEL ?? null;
@@ -2,5 +2,5 @@
  "extends": "../../tsconfig.json",
  "compilerOptions": { "rootDir": "src", "outDir": "dist" },
  "include": ["src"],
-  "references": [{ "path": "../protocol" }, { "path": "../util-agent" }]
+  "references": [{ "path": "../util-agent" }]
 }
@@ -1,18 +0,0 @@
-# @united-workforce/agent-hermes
-
-## 0.1.1
-
-### Patch Changes
-
- 8085d1d: fix: read token usage from ACP PromptResponse instead of DB
-
-  Token counts (inputTokens, outputTokens) now come from the ACP
-  `PromptResponse.usage` field, which is populated synchronously from
-  `run_conversation()` return data — no WAL race condition.
-
-  Turns (assistant message count) still come from the DB via
-  `snapshotTurns()` before/after delta.
-
-  Previously both tokens and turns were read from the Hermes state DB
-  after the ACP prompt returned, but due to WAL write lag the DB often
-  had incomplete token data at read time (e.g. 235 vs actual 26,080).
@@ -0,0 +1,55 @@
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { HermesAcpClient } from "../../src/acp-client.js";
+
+const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
+
+describe("HermesAcpClient", () => {
+  let client: HermesAcpClient;
+
+  beforeEach(() => {
+    client = new HermesAcpClient();
+  });
+
+  afterEach(async () => {
+    await client.close();
+  });
+
+  it(
+    "connect() returns a UUID sessionId",
+    async () => {
+      const sessionId = await client.connect(process.cwd());
+      expect(typeof sessionId).toBe("string");
+      expect(sessionId).toMatch(UUID_RE);
+    },
+    { timeout: 2 * 60 * 1000 },
+  );
+
+  it(
+    "prompt() returns a non-empty text response",
+    async () => {
+      await client.connect(process.cwd());
+      const result = await client.prompt("Reply with exactly the word: PONG");
+      expect(typeof result.text).toBe("string");
+      expect(result.text.length).toBeGreaterThan(0);
+      expect(typeof result.sessionId).toBe("string");
+      expect(result.sessionId).toMatch(UUID_RE);
+    },
+    { timeout: 2 * 60 * 1000 },
+  );
+
+  it(
+    "prompt() can be called twice on the same session (resume)",
+    async () => {
+      await client.connect(process.cwd());
+
+      const first = await client.prompt("Say the word ALPHA and nothing else.");
+      expect(first.text.length).toBeGreaterThan(0);
+
+      const second = await client.prompt("Now say the word BETA and nothing else.");
+      expect(second.text.length).toBeGreaterThan(0);
+
+      expect(first.sessionId).toBe(second.sessionId);
+    },
+    { timeout: 2 * 60 * 1000 },
+  );
+});
@@ -0,0 +1,56 @@
+import { afterEach, describe, expect, it } from "vitest";
+import { HermesAcpClient } from "../../src/acp-client.js";
+
+/**
+ * E2E test for cross-process session resume.
+ *
+ * Simulates the workflow re-entry scenario:
+ * 1. Client A: connect → prompt → close (developer first run)
+ * 2. Client B: resume(sessionId) → prompt (developer re-entry after reviewer reject)
+ *
+ * This is what happens when uwf thread step spawns uwf-hermes twice for the same role.
+ */
+describe("HermesAcpClient cross-process resume", () => {
+  const clients: HermesAcpClient[] = [];
+
+  afterEach(async () => {
+    for (const c of clients) {
+      await c.close();
+    }
+    clients.length = 0;
+  });
+
+  // TODO(#435): flaky — depends on live LLM; mock or move to integration suite
+  it.skip(
+    "resume() after close — second prompt returns non-empty text",
+    async () => {
+      // --- Client A: first run ---
+      const clientA = new HermesAcpClient();
+      clients.push(clientA);
+
+      await clientA.connect(process.cwd());
+      const first = await clientA.prompt(
+        "Remember the secret code: WATERMELON. Reply with exactly: ACKNOWLEDGED",
+      );
+      expect(first.text.length).toBeGreaterThan(0);
+      const sessionId = first.sessionId;
+
+      // Close client A (simulates uwf-hermes process exit)
+      await clientA.close();
+
+      // --- Client B: resume (simulates re-entry) ---
+      const clientB = new HermesAcpClient();
+      clients.push(clientB);
+
+      await clientB.resume(sessionId, process.cwd());
+      const second = await clientB.prompt(
+        "What was the secret code I told you earlier? Reply with just the code word.",
+      );
+
+      // The critical assertion: resumed session produces non-empty output
+      expect(second.text.length).toBeGreaterThan(0);
+      expect(second.sessionId).toBe(sessionId);
+    },
+    { timeout: 3 * 60 * 1000 },
+  );
+});
@@ -140,9 +140,7 @@ function createTestDb(dbPath: string): TestDb {
  db.exec(`CREATE TABLE sessions (
    id TEXT PRIMARY KEY,
    model TEXT NOT NULL,
-    started_at INTEGER NOT NULL,
-    input_tokens INTEGER DEFAULT 0,
-    output_tokens INTEGER DEFAULT 0
+    started_at INTEGER NOT NULL
  )`);
  db.exec(`CREATE TABLE messages (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -1,122 +0,0 @@
-import { describe, expect, test } from "vitest";
-import type { AcpUsage } from "../src/acp-client.js";
-import { buildUsage, snapshotTurns } from "../src/hermes.js";
-import type { HermesSessionJson } from "../src/types.js";
-
-function makeSession(overrides: Partial<HermesSessionJson> = {}): HermesSessionJson {
-  return {
-    session_id: "test-session",
-    model: "test-model",
-    session_start: "2026-01-01T00:00:00Z",
-    messages: [],
-    inputTokens: 0,
-    outputTokens: 0,
-    ...overrides,
-  };
-}
-
-describe("snapshotTurns", () => {
-  test("returns zero for null session", () => {
-    const result = snapshotTurns(null);
-    expect(result).toEqual({ turns: 0 });
-  });
-
-  test("returns zero for empty session", () => {
-    const result = snapshotTurns(makeSession());
-    expect(result).toEqual({ turns: 0 });
-  });
-
-  test("counts assistant messages as turns", () => {
-    const result = snapshotTurns(
-      makeSession({
-        messages: [
-          { role: "user", content: "hello", reasoning: null, tool_calls: null },
-          { role: "assistant", content: "hi", reasoning: null, tool_calls: null },
-          { role: "user", content: "do X", reasoning: null, tool_calls: null },
-          { role: "tool", content: "result", reasoning: null, tool_calls: null },
-          { role: "assistant", content: "done", reasoning: null, tool_calls: null },
-        ],
-        inputTokens: 1000,
-        outputTokens: 500,
-      }),
-    );
-    expect(result).toEqual({ turns: 2 });
-  });
-
-  test("ignores non-assistant messages for turn count", () => {
-    const result = snapshotTurns(
-      makeSession({
-        messages: [
-          { role: "user", content: "hello", reasoning: null, tool_calls: null },
-          { role: "tool", content: "result", reasoning: null, tool_calls: null },
-        ],
-      }),
-    );
-    expect(result.turns).toBe(0);
-  });
-});
-
-describe("buildUsage", () => {
-  const acpUsage: AcpUsage = { inputTokens: 5000, outputTokens: 2000, totalTokens: 7000 };
-
-  test("first visit: tokens from ACP, turns from DB delta", () => {
-    const beforeTurns = { turns: 0 };
-    const afterTurns = { turns: 3 };
-    const result = buildUsage(acpUsage, beforeTurns, afterTurns, 12.5);
-    expect(result).toEqual({
-      turns: 3,
-      inputTokens: 5000,
-      outputTokens: 2000,
-      duration: 13,
-    });
-  });
-
-  test("re-entry: turn delta computed correctly, tokens from ACP", () => {
-    const beforeTurns = { turns: 2 };
-    const afterTurns = { turns: 4 };
-    const acpDelta: AcpUsage = { inputTokens: 8000, outputTokens: 3500, totalTokens: 11500 };
-    const result = buildUsage(acpDelta, beforeTurns, afterTurns, 7.3);
-    expect(result).toEqual({
-      turns: 2,
-      inputTokens: 8000,
-      outputTokens: 3500,
-      duration: 7,
-    });
-  });
-
-  test("floors negative turn deltas at 0, then defaults to 1", () => {
-    const beforeTurns = { turns: 5 };
-    const afterTurns = { turns: 3 };
-    const result = buildUsage(acpUsage, beforeTurns, afterTurns, 1.0);
-    // turns would be negative (-2), floored to 0, then || 1 gives 1
-    expect(result.turns).toBe(1);
-  });
-
-  test("zero turns delta defaults to 1 (at least one turn happened)", () => {
-    const beforeTurns = { turns: 3 };
-    const afterTurns = { turns: 3 };
-    const result = buildUsage(acpUsage, beforeTurns, afterTurns, 5.0);
-    // turns delta is 0, || 1 gives 1
-    expect(result.turns).toBe(1);
-  });
-
-  test("null ACP usage yields zero tokens", () => {
-    const beforeTurns = { turns: 0 };
-    const afterTurns = { turns: 2 };
-    const result = buildUsage(null, beforeTurns, afterTurns, 10.0);
-    expect(result).toEqual({
-      turns: 2,
-      inputTokens: 0,
-      outputTokens: 0,
-      duration: 10,
-    });
-  });
-
-  test("duration is rounded", () => {
-    const beforeTurns = { turns: 0 };
-    const afterTurns = { turns: 1 };
-    expect(buildUsage(acpUsage, beforeTurns, afterTurns, 3.7).duration).toBe(4);
-    expect(buildUsage(acpUsage, beforeTurns, afterTurns, 3.2).duration).toBe(3);
-    expect(buildUsage(acpUsage, beforeTurns, afterTurns, 0.0).duration).toBe(0);
-  });
-});
@@ -1,6 +1,6 @@
 {
  "name": "@united-workforce/agent-hermes",
-  "version": "0.1.2",
+  "version": "0.5.0",
  "files": [
    "src",
    "dist",
@@ -8,7 +8,7 @@
  ],
  "type": "module",
  "bin": {
-    "uwf-hermes": "./dist/cli.js"
+    "uwf-hermes": "./src/cli.ts"
  },
  "exports": {
    ".": {
@@ -17,6 +17,7 @@
    }
  },
  "scripts": {
+    "prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1",
    "test": "vitest run __tests__/",
    "test:ci": "vitest run __tests__/"
  },
@@ -1,16 +1,6 @@
 import type { ChildProcess } from "node:child_process";
 import { spawn } from "node:child_process";
-import { readFileSync } from "node:fs";
-import { dirname, join } from "node:path";
 import { createInterface } from "node:readline";
-import { fileURLToPath } from "node:url";
-
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const OWN_VERSION = (
-  JSON.parse(readFileSync(join(__dirname, "..", "package.json"), "utf-8")) as {
-    version: string;
-  }
-).version;

 const HERMES_COMMAND = "hermes";
 const PROTOCOL_VERSION = 1;
@@ -27,17 +17,9 @@ type PendingRequest = {
  reject: (reason: Error) => void;
 };

-/** Token usage returned by ACP PromptResponse. */
-export type AcpUsage = {
-  inputTokens: number;
-  outputTokens: number;
-  totalTokens: number;
-};
-
 export type AcpPromptResult = {
  text: string;
  sessionId: string;
-  usage: AcpUsage | null;
 };

 export class HermesAcpClient {
@@ -90,11 +72,6 @@ export class HermesAcpClient {
    return sessionId;
  }

-  /** Return the current session ID, or null if not connected. */
-  getSessionId(): string | null {
-    return this.sessionId;
-  }
-
  /** Send prompt and collect final assistant text from ACP stream chunks. */
  async prompt(text: string): Promise<AcpPromptResult> {
    if (this.sessionId === null) {
@@ -114,25 +91,9 @@ export class HermesAcpClient {
      );
    }

-    // Extract token usage from ACP PromptResponse.result.usage (camelCase wire format)
-    const result = (response as { result?: Record<string, unknown> }).result;
-    const rawUsage = result?.usage as Record<string, unknown> | undefined;
-    const usage: AcpUsage | null =
-      rawUsage !== undefined &&
-      typeof rawUsage.inputTokens === "number" &&
-      typeof rawUsage.outputTokens === "number" &&
-      typeof rawUsage.totalTokens === "number"
-        ? {
-            inputTokens: rawUsage.inputTokens,
-            outputTokens: rawUsage.outputTokens,
-            totalTokens: rawUsage.totalTokens,
-          }
-        : null;
-
    return {
      text: this.messageChunks.join(""),
      sessionId: this.sessionId,
-      usage,
    };
  }

@@ -309,7 +270,7 @@ export class HermesAcpClient {
  private async initialize(): Promise<void> {
    const initResponse = await this.sendRequest("initialize", {
      protocolVersion: PROTOCOL_VERSION,
-      clientInfo: { name: "uwf-hermes", version: OWN_VERSION },
+      clientInfo: { name: "uwf", version: "0.1.0" },
      capabilities: {},
    });

@@ -1,12 +1,5 @@
 #!/usr/bin/env node

-// eslint-disable-next-line -- dynamic import for version
-const pkg = await import("../package.json", { with: { type: "json" } });
-if (process.argv.includes("--version") || process.argv.includes("-V")) {
-  process.stdout.write(`${pkg.default.version}\n`);
-  process.exit(0);
-}
-
 import { createHermesAgent } from "./hermes.js";
 import { isResumeDisabled } from "./session-cache.js";

@@ -1,5 +1,4 @@
 import type { Store } from "@ocas/core";
-import type { Usage } from "@united-workforce/protocol";
 import { createLogger } from "@united-workforce/util";
 import {
  type AgentContext,
@@ -8,50 +7,13 @@ import {
  buildRolePrompt,
  createAgent,
 } from "@united-workforce/util-agent";
-import type { AcpUsage } from "./acp-client.js";
+
 import { HermesAcpClient } from "./acp-client.js";
 import { getCachedSessionId, setCachedSessionId } from "./session-cache.js";
 import { loadHermesSession, storeHermesSessionDetail } from "./session-detail.js";
-import type { HermesSessionJson } from "./types.js";

 const log = createLogger({ sink: { kind: "stderr" } });

-/** Snapshot of session metrics taken before and after a prompt call. */
-type TurnsSnapshot = {
-  turns: number;
-};
-
-const ZERO_TURNS: TurnsSnapshot = { turns: 0 };
-
-/** Extract assistant turn count from a session. Returns zero for null sessions. */
-export function snapshotTurns(session: HermesSessionJson | null): TurnsSnapshot {
-  if (session === null) {
-    return ZERO_TURNS;
-  }
-  return {
-    turns: session.messages.filter((m) => m.role === "assistant").length,
-  };
-}
-
-/**
- * Build Usage from ACP token data + DB turn delta.
- * Tokens come from ACP PromptResponse (synchronous, accurate).
- * Turns come from DB before/after snapshots (may have WAL lag, but acceptable).
- */
-export function buildUsage(
-  acpUsage: AcpUsage | null,
-  beforeTurns: TurnsSnapshot,
-  afterTurns: TurnsSnapshot,
-  durationSec: number,
-): Usage {
-  return {
-    turns: Math.max(0, afterTurns.turns - beforeTurns.turns) || 1,
-    inputTokens: acpUsage?.inputTokens ?? 0,
-    outputTokens: acpUsage?.outputTokens ?? 0,
-    duration: Math.round(durationSec),
-  };
-}
-
 /** Assemble system prompt, task, and prior step outputs for Hermes. */
 export function buildHermesPrompt(ctx: AgentContext): string {
  const parts: string[] = [];
@@ -146,45 +108,25 @@ export function createHermesAgent(resumeDisabled: boolean): () => Promise<void>
    void client.close();
  });

-  async function runPrompt(
-    ctx: AgentContext,
-    useContinuation: boolean,
-    beforeTurns: TurnsSnapshot,
-  ): Promise<AgentRunResult> {
+  async function runPrompt(ctx: AgentContext, useContinuation: boolean): Promise<AgentRunResult> {
    const effectiveCtx = useContinuation ? ctx : { ...ctx, isFirstVisit: true };
    const fullPrompt = buildHermesPrompt(effectiveCtx);
-    const startMs = Date.now();
-    const { text, sessionId, usage: acpUsage } = await client.prompt(fullPrompt);
-    const durationSec = (Date.now() - startMs) / 1000;
+    const { text, sessionId } = await client.prompt(fullPrompt);
    const { detailHash } = await storePromptResult(ctx.store, sessionId);

    if (!resumeDisabled) {
      await setCachedSessionId(ctx.threadId, ctx.role, sessionId, ctx.storageRoot);
    }

-    // Turns from DB (may lag slightly due to WAL, but acceptable)
-    const afterSession = await loadHermesSession(sessionId);
-    const afterTurns = snapshotTurns(afterSession);
-    const usage = buildUsage(acpUsage, beforeTurns, afterTurns, durationSec);
-
-    return { output: text, detailHash, sessionId, assembledPrompt: fullPrompt, usage };
+    return { output: text, detailHash, sessionId, assembledPrompt: fullPrompt };
  }

  async function runHermes(ctx: AgentContext): Promise<AgentRunResult> {
    const cwd = process.cwd();
    const attempt = await prepareSession(client, ctx, cwd, resumeDisabled);

-    // Snapshot before prompt: for resumed sessions, captures cumulative state
-    // so we can compute the turn delta. For new sessions, this is ZERO_TURNS.
-    const currentSessionId = client.getSessionId();
-    const beforeSession =
-      attempt.resumed && currentSessionId !== null
-        ? await loadHermesSession(currentSessionId)
-        : null;
-    const beforeTurns = snapshotTurns(beforeSession);
-
    try {
-      return await runPrompt(ctx, attempt.useContinuation, beforeTurns);
+      return await runPrompt(ctx, attempt.useContinuation);
    } catch (error) {
      if (!attempt.resumed) {
        throw error;
@@ -194,8 +136,7 @@ export function createHermesAgent(resumeDisabled: boolean): () => Promise<void>
      log("8FQW2R6N", `continuation prompt failed, retrying with initial prompt: ${message}`);
      await client.close();
      await client.connect(cwd);
-      // Fresh session after retry — reset snapshot to zero
-      return runPrompt(ctx, false, ZERO_TURNS);
+      return runPrompt(ctx, false);
    }
  }

@@ -206,22 +147,9 @@ export function createHermesAgent(resumeDisabled: boolean): () => Promise<void>
  ): Promise<AgentRunResult> {
    // Client is already connected from runHermes — same ACP session,
    // so the agent sees the full conversation history (crucial for retries).
-    // Snapshot turns before the continuation prompt for delta computation.
-    const currentSessionId = client.getSessionId();
-    const beforeSession =
-      currentSessionId !== null ? await loadHermesSession(currentSessionId) : null;
-    const beforeTurns = snapshotTurns(beforeSession);
-
-    const startMs = Date.now();
-    const { text, sessionId, usage: acpUsage } = await client.prompt(message);
-    const durationSec = (Date.now() - startMs) / 1000;
+    const { text, sessionId } = await client.prompt(message);
    const { detailHash } = await storePromptResult(store, sessionId);
-
-    const afterSession = await loadHermesSession(sessionId);
-    const afterTurns = snapshotTurns(afterSession);
-    const usage = buildUsage(acpUsage, beforeTurns, afterTurns, durationSec);
-
-    return { output: text, detailHash, sessionId, assembledPrompt: "", usage };
+    return { output: text, detailHash, sessionId, assembledPrompt: "" };
  }

  const agentMain = createAgent({
@@ -1,8 +1,2 @@
-export type { AcpUsage } from "./acp-client.js";
 export { HermesAcpClient } from "./acp-client.js";
-export {
-  buildHermesPrompt,
-  buildUsage,
-  createHermesAgent,
-  snapshotTurns,
-} from "./hermes.js";
+export { buildHermesPrompt, createHermesAgent } from "./hermes.js";
@@ -106,7 +106,7 @@ function parseSessionJson(raw: unknown): HermesSessionJson | null {
      messages.push(msg);
    }
  }
-  return { session_id, model, session_start, messages, inputTokens: 0, outputTokens: 0 };
+  return { session_id, model, session_start, messages };
 }

 export function getHermesDbPath(): string {
@@ -117,8 +117,6 @@ type DbSessionRow = {
  id: string;
  model: string;
  started_at: number;
-  input_tokens: number;
-  output_tokens: number;
 };

 type DbMessageRow = {
@@ -158,9 +156,7 @@ export function loadHermesSessionFromDb(
  try {
    db = new DatabaseSync(resolvedPath, { readOnly: true });
    const session = db
-      .prepare(
-        "SELECT id, model, started_at, input_tokens, output_tokens FROM sessions WHERE id = ?",
-      )
+      .prepare("SELECT id, model, started_at FROM sessions WHERE id = ?")
      .get(sessionId) as DbSessionRow | null;
    if (session === null) {
      return null;
@@ -185,8 +181,6 @@ export function loadHermesSessionFromDb(
      model: session.model,
      session_start: new Date(session.started_at * 1000).toISOString(),
      messages,
-      inputTokens: session.input_tokens ?? 0,
-      outputTokens: session.output_tokens ?? 0,
    };
  } catch {
    return null;
@@ -40,6 +40,4 @@ export type HermesSessionJson = {
  model: string;
  session_start: string;
  messages: HermesSessionMessage[];
-  inputTokens: number;
-  outputTokens: number;
 };
@@ -1,6 +1,6 @@
 {
  "name": "@united-workforce/agent-mock",
-  "version": "0.1.1",
+  "version": "0.5.0",
  "files": [
    "src",
    "dist",
@@ -17,6 +17,7 @@
    }
  },
  "scripts": {
+    "prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1",
    "test": "vitest run __tests__/",
    "test:ci": "vitest run __tests__/"
  },
@@ -1,12 +1,5 @@
 #!/usr/bin/env node

-// eslint-disable-next-line -- dynamic import for version
-const pkg = await import("../package.json", { with: { type: "json" } });
-if (process.argv.includes("--version") || process.argv.includes("-V")) {
-  process.stdout.write(`${pkg.default.version}\n`);
-  process.exit(0);
-}
-
 import { createMockAgent } from "./mock-agent.js";

 const USAGE = "usage: uwf-mock --mock-data <path> --thread <id> --role <role> --prompt <text>";
@@ -103,7 +103,6 @@ export function createMockAgent(mockDataPath: string): () => Promise<void> {
      detailHash,
      sessionId,
      assembledPrompt: "",
-      usage: { turns: 1, inputTokens: 0, outputTokens: 0, duration: 0 },
    };
    lastResult = result;
    return result;
@@ -1,9 +0,0 @@
-# @united-workforce/cli
-
-## 0.1.1
-
-### Patch Changes
-
- 850a3b2: fix: resolve --agent override via config alias before raw command
-
-  `resolveAgentConfig()` now checks `config.agents[alias]` first before falling back to `parseAgentOverride()`. Eval CLI default `--agent` changed from `"hermes"` to `"uwf-hermes"`.
@@ -1,6 +1,6 @@
 {
  "name": "@united-workforce/cli",
-  "version": "0.1.1",
+  "version": "0.5.0",
  "files": [
    "src",
    "dist",
@@ -22,6 +22,7 @@
    "yaml": "^2.8.4"
  },
  "scripts": {
+    "prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1",
    "test": "vitest run src/",
    "test:ci": "vitest run src/"
  },
@@ -6,7 +6,13 @@ import type { CasRef, ThreadId } from "@united-workforce/protocol";
 import { describe, expect, test } from "vitest";
 import { createMarker, deleteMarker } from "../background/index.js";
 import { cmdThreadList, cmdThreadShow, cmdThreadStart } from "../commands/thread.js";
-import { completeThread, createUwfStore, loadActiveThreads, setThread } from "../store.js";
+import {
+  addHistoryEntry,
+  createUwfStore,
+  deleteThread,
+  loadAllThreads,
+  setThread,
+} from "../store.js";

 const OUTPUT_SCHEMA = {
  type: "object" as const,
@@ -42,7 +48,7 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string, enum: ["done"] }
+        $status: { type: string }
 graph:
  $START:
    _:
@@ -59,7 +65,7 @@ graph:
      prompt: "Try again"
      location: null
  roleB:
-    done:
+    _:
      role: $END
      prompt: "Done"
      location: null
@@ -92,7 +98,7 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string, enum: ["done"] }
+        $status: { type: string }
  roleC:
    description: Fail role
    goal: Do C
@@ -104,7 +110,7 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string, enum: ["done"] }
+        $status: { type: string }
 graph:
  $START:
    _:
@@ -121,12 +127,12 @@ graph:
      prompt: "Do C (fail)"
      location: null
  roleB:
-    done:
+    _:
      role: $END
      prompt: "Done"
      location: null
  roleC:
-    done:
+    _:
      role: $END
      prompt: "Done"
      location: null
@@ -147,7 +153,7 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string, enum: ["done"] }
+        $status: { type: string }
 graph:
  $START:
    _:
@@ -155,7 +161,7 @@ graph:
      prompt: "Work"
      location: null
  worker:
-    done:
+    _:
      role: $END
      prompt: "Done"
      location: null
@@ -169,7 +175,7 @@ async function insertStepNode(
  outputPayload: Record<string, unknown>,
 ): Promise<void> {
  const uwf = await createUwfStore(storageRoot);
-  const index = loadActiveThreads(uwf.varStore);
+  const index = loadAllThreads(uwf.varStore);
  const headEntry = index[threadId];
  if (headEntry === undefined) throw new Error(`thread ${threadId} not in index`);
  const head = headEntry.head;
@@ -200,13 +206,7 @@ async function insertStepNode(
    assembledPrompt: null,
  })) as CasRef;

-  setThread(uwf.varStore, threadId, {
-    head: stepHash,
-    status: "idle",
-    suspendedRole: null,
-    suspendMessage: null,
-    completedAt: null,
-  });
+  setThread(uwf.varStore, threadId, { head: stepHash, suspendedRole: null, suspendMessage: null });
 }

 describe("currentRole field", () => {
@@ -282,12 +282,19 @@ describe("currentRole field", () => {
    try {
      const wf = join(tmpDir, "test-current-role.yaml");
      await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
-      const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
+      const { thread, workflow } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
      const tid = thread as ThreadId;

      const uwfForIndex = await createUwfStore(storageRoot);
-      loadActiveThreads(uwfForIndex.varStore)[tid]!.head;
-      completeThread(uwfForIndex.varStore, tid, "completed");
+      const head = loadAllThreads(uwfForIndex.varStore)[tid]!.head;
+      deleteThread(uwfForIndex.varStore, tid);
+      addHistoryEntry(uwfForIndex.varStore, {
+        thread: tid,
+        workflow,
+        head,
+        completedAt: Date.now(),
+        reason: "completed",
+      });

      const result = await cmdThreadShow(storageRoot, tid);
      expect(result.status).toBe("completed");
@@ -303,12 +310,19 @@ describe("currentRole field", () => {
    try {
      const wf = join(tmpDir, "test-current-role.yaml");
      await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
-      const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
+      const { thread, workflow } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
      const tid = thread as ThreadId;

      const uwfForIndex = await createUwfStore(storageRoot);
-      loadActiveThreads(uwfForIndex.varStore)[tid]!.head;
-      completeThread(uwfForIndex.varStore, tid, "cancelled");
+      const head = loadAllThreads(uwfForIndex.varStore)[tid]!.head;
+      deleteThread(uwfForIndex.varStore, tid);
+      addHistoryEntry(uwfForIndex.varStore, {
+        thread: tid,
+        workflow,
+        head,
+        completedAt: Date.now(),
+        reason: "cancelled",
+      });

      const result = await cmdThreadShow(storageRoot, tid);
      expect(result.status).toBe("cancelled");
@@ -361,8 +375,15 @@ describe("currentRole field", () => {
      const comp = await cmdThreadStart(storageRoot, wf, "completed", tmpDir);
      const compId = comp.thread as ThreadId;
      const uwfForIndex = await createUwfStore(storageRoot);
-      const _compHead = loadActiveThreads(uwfForIndex.varStore)[compId]!.head;
-      completeThread(uwfForIndex.varStore, compId, "completed");
+      const compHead = loadAllThreads(uwfForIndex.varStore)[compId]!.head;
+      deleteThread(uwfForIndex.varStore, compId);
+      addHistoryEntry(uwfForIndex.varStore, {
+        thread: compId,
+        workflow: comp.workflow,
+        head: compHead,
+        completedAt: Date.now(),
+        reason: "completed",
+      });

      const list = await cmdThreadList(storageRoot, null, null, null, 0, 100);

@@ -426,8 +447,8 @@ describe("currentRole field", () => {
      await writeFile(wf, SINGLE_ROLE_WORKFLOW_YAML, "utf8");

      const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
-      // worker → done maps to $END
-      await insertStepNode(storageRoot, thread as ThreadId, "worker", { $status: "done" });
+      // worker → _ maps to $END
+      await insertStepNode(storageRoot, thread as ThreadId, "worker", {});

      const result = await cmdThreadShow(storageRoot, thread as ThreadId);
      expect(result.currentRole).toBe(null);
@@ -10,7 +10,7 @@ import { afterEach, beforeAll, beforeEach, describe, expect, test } from "vitest
 import { stringify } from "yaml";
 import { cmdThreadStart } from "../commands/thread.js";
 import { cmdWorkflowAdd } from "../commands/workflow.js";
-import { createUwfStore, getThread } from "../store.js";
+import { createUwfStore, findHistoryEntry, getThread } from "../store.js";

 // ── paths ──────────────────────────────────────────────────────────────────

@@ -106,13 +106,9 @@ async function addWorkflow(workflowFixture: string, workflowName: string): Promi

 type ExecResult = { stdout: string; stderr: string; exitCode: number };

-function runExec(threadId: string, count: number | null = null): ExecResult {
-  const args = [CLI_PATH, "thread", "exec", threadId];
-  if (count !== null) {
-    args.push("--count", String(count));
-  }
+function runExec(threadId: string): ExecResult {
  try {
-    const stdout = execFileSync(process.execPath, args, {
+    const stdout = execFileSync(process.execPath, [CLI_PATH, "thread", "exec", threadId], {
      encoding: "utf8",
      stdio: ["ignore", "pipe", "pipe"],
      env: { ...process.env, UWF_HOME: uwfHome, OCAS_HOME: casDir },
@@ -130,38 +126,11 @@ function runExec(threadId: string, count: number | null = null): ExecResult {
  }
 }

-/** Invoke `uwf thread resume <threadId> -p <prompt>` through the built CLI. */
-function runResume(threadId: string, prompt: string): ExecResult {
-  try {
-    const stdout = execFileSync(
-      process.execPath,
-      [CLI_PATH, "thread", "resume", threadId, "-p", prompt],
-      {
-        encoding: "utf8",
-        stdio: ["ignore", "pipe", "pipe"],
-        env: { ...process.env, UWF_HOME: uwfHome, OCAS_HOME: casDir },
-        cwd: tmpDir,
-        timeout: 30000,
-      },
-    );
-    return { stdout, stderr: "", exitCode: 0 };
-  } catch (e: unknown) {
-    const err = e as NodeJS.ErrnoException & {
-      stdout?: string;
-      stderr?: string;
-      status?: number;
-    };
-    return { stdout: err.stdout ?? "", stderr: err.stderr ?? "", exitCode: err.status ?? 1 };
-  }
-}
-
 type StepOutputJson = {
  thread: string;
  head: string;
  status: string;
  currentRole: string | null;
-  suspendedRole: string | null;
-  suspendMessage: string | null;
  done: boolean;
 };

@@ -229,25 +198,19 @@ describe("E2E mock-agent: full uwf pipeline", () => {
    expect(getStatus(store, s1.output)).toBe("ready");
    expect(getStatus(store, s2.output)).toBe("done");

-    // Mock agent reports usage stats in step nodes.
-    expect(s1.usage).toEqual({ turns: 1, inputTokens: 0, outputTokens: 0, duration: 0 });
-    expect(s2.usage).toEqual({ turns: 1, inputTokens: 0, outputTokens: 0, duration: 0 });
-
    // The start node points at the registered workflow.
    const startNode = store.cas.get(startHash as CasRef);
    expect((startNode!.payload as StartNodePayload).workflow).toBe(workflowHash);

-    // Thread is completed: status changed to "completed", head updated.
+    // Thread is completed: removed from active index, present in history.
    const uwf = await createUwfStore(uwfHome);
-    const finalEntry = getThread(uwf.varStore, threadId);
-    expect(finalEntry).not.toBeNull();
-    expect(finalEntry!.status).toBe("completed");
-    expect(finalEntry!.head).toBe(step2.head);
+    expect(getThread(uwf.varStore, threadId)).toBeNull();
+    const hist = findHistoryEntry(uwf.varStore, threadId);
+    expect(hist).not.toBeNull();
+    expect(hist!.head).toBe(step2.head);
  });

-  test("2. branching workflow loops developer→reviewer→developer→reviewer→$END", {
-    timeout: 30_000,
-  }, async () => {
+  test("2. branching workflow loops developer→reviewer→developer→reviewer→$END", async () => {
    await writeMockConfig("e2e-loop.mock.yaml");
    const workflowHash = await addWorkflow("e2e-loop.workflow.yaml", "test-loop");

@@ -300,14 +263,11 @@ describe("E2E mock-agent: full uwf pipeline", () => {
    expect(getStatus(store, n4.output)).toBe("approved");

    const uwf = await createUwfStore(uwfHome);
-    const finalEntry = getThread(uwf.varStore, threadId);
-    expect(finalEntry).not.toBeNull();
-    expect(finalEntry!.status).toBe("completed");
+    expect(getThread(uwf.varStore, threadId)).toBeNull();
+    expect(findHistoryEntry(uwf.varStore, threadId)).not.toBeNull();
  });

-  test("3. role mismatch in mock data makes the agent exit with an error", {
-    timeout: 30_000,
-  }, async () => {
+  test("3. role mismatch in mock data makes the agent exit with an error", async () => {
    // Reuses the linear workflow but with a mock whose step[1].role is wrong.
    await writeMockConfig("e2e-mismatch.mock.yaml");
    const workflowHash = await addWorkflow("e2e-linear.workflow.yaml", "test-linear");
@@ -327,172 +287,7 @@ describe("E2E mock-agent: full uwf pipeline", () => {

    // The thread remains active (no step node was written for the failed step).
    const uwf = await createUwfStore(uwfHome);
-    const entry = getThread(uwf.varStore, threadId);
-    expect(entry).not.toBeNull();
-    expect(entry!.status).not.toBe("completed");
-    expect(entry!.head).toBe(step1.head);
-  });
-
-  test("4. planner $SUSPEND then resume re-runs planner and reaches $END", {
-    timeout: 30_000,
-  }, async () => {
-    await writeMockConfig("e2e-suspend.mock.yaml");
-    const workflowHash = await addWorkflow("e2e-suspend.workflow.yaml", "test-suspend");
-
-    const start = await cmdThreadStart(uwfHome, workflowHash, "Analyze the task", uwfHome, tmpDir);
-    const threadId = start.thread;
-
-    // Step 1 → planner emits insufficient_info → thread suspends.
-    const step1 = execStep(threadId);
-    expect(step1.status).toBe("suspended");
-    expect(step1.done).toBe(false);
-    expect(step1.currentRole).toBeNull();
-    expect(step1.suspendedRole).toBe("planner");
-    expect(step1.suspendMessage).toBe("Need more info: missing requirements");
-
-    // Thread index entry reflects the suspension with rendered metadata.
-    const suspendedEntry = getThread((await createUwfStore(uwfHome)).varStore, threadId);
-    expect(suspendedEntry).not.toBeNull();
-    expect(suspendedEntry!.status).toBe("suspended");
-    expect(suspendedEntry!.suspendedRole).toBe("planner");
-    expect(suspendedEntry!.suspendMessage).toBe("Need more info: missing requirements");
-
-    // Resume re-runs the planner role; the second scripted step is `ready` → $END.
-    const resume = runResume(threadId, "Here are the requirements");
-    expect(resume.exitCode).toBe(0);
-    const resumeOut = JSON.parse(resume.stdout.trim()) as StepOutputJson;
-    expect(resumeOut.status).toBe("completed");
-    expect(resumeOut.done).toBe(true);
-    expect(resumeOut.currentRole).toBeNull();
-    expect(resumeOut.suspendedRole).toBeNull();
-
-    // CAS chain: suspended planner step → resumed planner step.
-    const store = await openStore(casDir);
-    const s1 = getStepNode(store, step1.head);
-    const s2 = getStepNode(store, resumeOut.head);
-    expect(s1.role).toBe("planner");
-    expect(s2.role).toBe("planner");
-    expect(s2.prev).toBe(step1.head);
-    expect(getStatus(store, s1.output)).toBe("insufficient_info");
-    expect(getStatus(store, s2.output)).toBe("ready");
-
-    const finalEntry = getThread((await createUwfStore(uwfHome)).varStore, threadId);
-    expect(finalEntry).not.toBeNull();
-    expect(finalEntry!.status).toBe("completed");
-    expect(finalEntry!.head).toBe(resumeOut.head);
-  });
-
-  test("5. --count 3 runs the whole linear pipeline in one invocation", {
-    timeout: 30_000,
-  }, async () => {
-    await writeMockConfig("e2e-count.mock.yaml");
-    const workflowHash = await addWorkflow("e2e-count.workflow.yaml", "test-count");
-
-    const start = await cmdThreadStart(uwfHome, workflowHash, "Ship the feature", uwfHome, tmpDir);
-    const threadId = start.thread;
-
-    // Single invocation with --count 3 → moderator drives analyst → developer → reviewer → $END.
-    const { stdout, stderr, exitCode } = runExec(threadId, 3);
-    expect(exitCode, `stderr: ${stderr}`).toBe(0);
-
-    // Multi-step exec emits a JSON array (one entry per executed step).
-    const results = JSON.parse(stdout.trim()) as StepOutputJson[];
-    expect(Array.isArray(results)).toBe(true);
-    expect(results).toHaveLength(3);
-
-    expect(results[0].status).toBe("idle");
-    expect(results[0].currentRole).toBe("developer");
-    expect(results[1].status).toBe("idle");
-    expect(results[1].currentRole).toBe("reviewer");
-    expect(results[2].status).toBe("completed");
-    expect(results[2].done).toBe(true);
-
-    // Verify the CAS chain holds 3 step nodes in the correct order.
-    const store = await openStore(casDir);
-    const n1 = getStepNode(store, results[0].head);
-    const n2 = getStepNode(store, results[1].head);
-    const n3 = getStepNode(store, results[2].head);
-    expect([n1.role, n2.role, n3.role]).toEqual(["analyst", "developer", "reviewer"]);
-    expect(n1.prev).toBeNull();
-    expect(n2.prev).toBe(results[0].head);
-    expect(n3.prev).toBe(results[1].head);
-    expect(new Set([n1.start, n2.start, n3.start]).size).toBe(1);
-
-    const finalEntry = getThread((await createUwfStore(uwfHome)).varStore, threadId);
-    expect(finalEntry).not.toBeNull();
-    expect(finalEntry!.status).toBe("completed");
-    expect(finalEntry!.head).toBe(results[2].head);
-  });
-
-  test("6. mustache edge prompt renders planner variables into the worker step", {
-    timeout: 30_000,
-  }, async () => {
-    await writeMockConfig("e2e-mustache.mock.yaml");
-    const workflowHash = await addWorkflow("e2e-mustache.workflow.yaml", "test-mustache");
-
-    const start = await cmdThreadStart(uwfHome, workflowHash, "Plan the task", uwfHome, tmpDir);
-    const threadId = start.thread;
-
-    // Step 1 → planner emits branch + repoPath.
-    const step1 = execStep(threadId);
-    expect(step1.status).toBe("idle");
-    expect(step1.currentRole).toBe("worker");
-
-    // Step 2 → worker; the moderator renders the templated edge prompt before spawning it.
-    const step2 = execStep(threadId);
-    expect(step2.done).toBe(true);
-    expect(step2.status).toBe("completed");
-
-    const store = await openStore(casDir);
-    const plannerStep = getStepNode(store, step1.head);
-    expect(getStatus(store, plannerStep.output)).toBe("ready");
-
-    // The worker step's edgePrompt is the mustache-rendered template.
-    const workerStep = getStepNode(store, step2.head);
-    expect(workerStep.role).toBe("worker");
-    expect(workerStep.edgePrompt).toContain("fix/42-auth");
-    expect(workerStep.edgePrompt).toContain("/tmp/my-repo");
-    expect(workerStep.edgePrompt).toBe("Work on branch fix/42-auth in /tmp/my-repo");
-  });
-
-  test("7. completed thread can be resumed (衔尾蛇: end → start)", {
-    timeout: 30_000,
-  }, async () => {
-    // Reuse the suspend workflow (planner with ready → $END), but mock data
-    // goes straight to ready on first run, then ready again after resume.
-    await writeMockConfig("e2e-completed-resume.mock.yaml");
-    const workflowHash = await addWorkflow("e2e-suspend.workflow.yaml", "test-suspend");
-
-    const start = await cmdThreadStart(uwfHome, workflowHash, "Do the work", uwfHome, tmpDir);
-    const threadId = start.thread;
-
-    // Step 1: planner outputs ready → $END → thread completed.
-    const step1 = execStep(threadId);
-    expect(step1.done).toBe(true);
-    expect(step1.status).toBe("completed");
-
-    const uwf1 = await createUwfStore(uwfHome);
-    const entry1 = getThread(uwf1.varStore, threadId);
-    expect(entry1).not.toBeNull();
-    expect(entry1!.status).toBe("completed");
-
-    // Resume the completed thread — should re-evaluate $START → planner.
-    const resumeResult = runResume(threadId, "Additional context for round 2");
-    expect(resumeResult.exitCode).toBe(0);
-
-    // After resume step, planner ran again (step index 1 in mock) → ready → $END.
-    const uwf2 = await createUwfStore(uwfHome);
-    const entry2 = getThread(uwf2.varStore, threadId);
-    expect(entry2).not.toBeNull();
-    expect(entry2!.status).toBe("completed");
-    // Head should have advanced (not the same as step1).
-    expect(entry2!.head).not.toBe(step1.head);
-
-    // CAS chain: step2.prev === step1 head (chain is preserved across resume).
-    const store = await openStore(casDir);
-    const resumeOutput = JSON.parse(resumeResult.stdout.trim());
-    const step2Node = getStepNode(store, resumeOutput.head);
-    expect(step2Node.role).toBe("planner");
-    expect(step2Node.prev).toBe(step1.head);
+    expect(getThread(uwf.varStore, threadId)).not.toBeNull();
+    expect(getThread(uwf.varStore, threadId)!.head).toBe(step1.head);
  });
 });
@@ -1,15 +0,0 @@
-steps:
-  # Step 0: planner → ready → $END (thread completes)
-  - role: planner
-    output: |
-      ---
-      $status: ready
-      ---
-      Initial plan complete.
-  # Step 1: after resume, planner runs again from $START → ready → $END again
-  - role: planner
-    output: |
-      ---
-      $status: ready
-      ---
-      Revised plan after resume.
@@ -1,19 +0,0 @@
-steps:
-  - role: analyst
-    output: |
-      ---
-      $status: analyzed
-      ---
-      Analysis complete.
-  - role: developer
-    output: |
-      ---
-      $status: implemented
-      ---
-      Implementation complete.
-  - role: reviewer
-    output: |
-      ---
-      $status: approved
-      ---
-      Approved.
@@ -1,45 +0,0 @@
-name: test-count
-description: 3-step linear pipeline (analyst -> developer -> reviewer -> $END)
-roles:
-  analyst:
-    description: Analyzes the task
-    goal: Analyze the task
-    capabilities: []
-    procedure: Analyze it
-    output: Output the analysis and set $status to analyzed
-    frontmatter:
-      oneOf:
-        - properties:
-            $status: { const: analyzed }
-          required: [$status]
-  developer:
-    description: Implements the change
-    goal: Implement the change
-    capabilities: []
-    procedure: Write code
-    output: Output the implementation and set $status to implemented
-    frontmatter:
-      oneOf:
-        - properties:
-            $status: { const: implemented }
-          required: [$status]
-  reviewer:
-    description: Reviews the change
-    goal: Review the change
-    capabilities: []
-    procedure: Review code
-    output: Approve and set $status to approved
-    frontmatter:
-      oneOf:
-        - properties:
-            $status: { const: approved }
-          required: [$status]
-graph:
-  $START:
-    _: { role: analyst, prompt: 'Analyze the task' }
-  analyst:
-    analyzed: { role: developer, prompt: 'Implement the change' }
-  developer:
-    implemented: { role: reviewer, prompt: 'Review the change' }
-  reviewer:
-    approved: { role: '$END', prompt: 'Done' }
@@ -1,15 +0,0 @@
-steps:
-  - role: planner
-    output: |
-      ---
-      $status: ready
-      branch: fix/42-auth
-      repoPath: /tmp/my-repo
-      ---
-      Planned the work.
-  - role: worker
-    output: |
-      ---
-      $status: done
-      ---
-      Work complete.
@@ -1,34 +0,0 @@
-name: test-mustache
-description: Planner emits template variables consumed by the worker edge prompt
-roles:
-  planner:
-    description: Plans work and emits branch + repo path
-    goal: Plan the task
-    capabilities: []
-    procedure: Decide the branch and repo path
-    output: Set $status to ready and emit branch and repoPath
-    frontmatter:
-      oneOf:
-        - properties:
-            $status: { const: ready }
-            branch: { type: string }
-            repoPath: { type: string }
-          required: [$status, branch, repoPath]
-  worker:
-    description: Works on the planned branch
-    goal: Do the work
-    capabilities: []
-    procedure: Do it
-    output: Output the result and set $status to done
-    frontmatter:
-      oneOf:
-        - properties:
-            $status: { const: done }
-          required: [$status]
-graph:
-  $START:
-    _: { role: planner, prompt: 'Plan the task' }
-  planner:
-    ready: { role: worker, prompt: 'Work on branch {{{branch}}} in {{{repoPath}}}' }
-  worker:
-    done: { role: '$END', prompt: 'Complete' }
@@ -1,14 +0,0 @@
-steps:
-  - role: planner
-    output: |
-      ---
-      $status: insufficient_info
-      reason: missing requirements
-      ---
-      I need more information before I can plan this.
-  - role: planner
-    output: |
-      ---
-      $status: ready
-      ---
-      I now have what I need. Ready to proceed.
@@ -1,24 +0,0 @@
-name: test-suspend
-description: Planner can suspend for more info or finish when ready
-roles:
-  planner:
-    description: Plans work and may request more info
-    goal: Analyze the task
-    capabilities: []
-    procedure: Analyze the task and decide if more info is needed
-    output: Set $status to insufficient_info (with reason) or ready
-    frontmatter:
-      oneOf:
-        - properties:
-            $status: { const: insufficient_info }
-            reason: { type: string }
-          required: [$status, reason]
-        - properties:
-            $status: { const: ready }
-          required: [$status]
-graph:
-  $START:
-    _: { role: planner, prompt: 'Analyze the task' }
-  planner:
-    insufficient_info: { role: '$SUSPEND', prompt: 'Need more info: {{{reason}}}' }
-    ready: { role: '$END', prompt: 'Done' }
@@ -8,10 +8,10 @@ const solveIssueGraph: WorkflowPayload["graph"] = {
    _: { role: "planner", prompt: "Start planning from the issue in the task.", location: null },
  },
  planner: {
-    planned: { role: "developer", prompt: "Implement the plan: {{plan}}", location: null },
+    _: { role: "developer", prompt: "Implement the plan: {{plan}}", location: null },
  },
  developer: {
-    implemented: { role: "reviewer", prompt: "Review the changes: {{summary}}", location: null },
+    _: { role: "reviewer", prompt: "Review the changes: {{summary}}", location: null },
  },
  reviewer: {
    approved: { role: "$END", prompt: "Done.", location: null },
@@ -112,7 +112,7 @@ describe("evaluate", () => {

  test("mustache template rendering with simple fields", () => {
    const result = evaluate(solveIssueGraph, "planner", {
-      $status: "planned",
+      $status: "_",
      plan: "Add auth middleware",
    });
    expect(result).toEqual({
@@ -139,11 +139,11 @@ describe("evaluate", () => {
  test("triple mustache also works for unescaped output", () => {
    const graph: Record<string, Record<string, Target>> = {
      reviewer: {
-        rejected: { role: "developer", prompt: "Fix: {{{comments}}}", location: null },
+        _: { role: "developer", prompt: "Fix: {{{comments}}}", location: null },
      },
    };
    const result = evaluate(graph, "reviewer", {
-      $status: "rejected",
+      $status: "_",
      comments: "<script>alert(1)</script>",
    });
    expect(result).toEqual({
@@ -152,22 +152,24 @@ describe("evaluate", () => {
    });
  });

-  test("missing $status → error (no unit fallback)", () => {
+  test("missing $status defaults to _ (unit routing)", () => {
    const result = evaluate(solveIssueGraph, "planner", {
      plan: "Add auth middleware",
    });
-    expect(result.ok).toBe(false);
-    if (!result.ok) {
-      expect(result.error.message).toBe(
-        'agent output for role "planner" is missing required "$status" string',
-      );
-    }
+    expect(result).toEqual({
+      ok: true,
+      value: {
+        role: "developer",
+        prompt: "Implement the plan: Add auth middleware",
+        location: null,
+      },
+    });
  });

  test("mustache template with nested object paths", () => {
    const graph: Record<string, Record<string, Target>> = {
      reviewer: {
-        rejected: {
+        _: {
          role: "developer",
          prompt: "Address: {{review.comments}}",
          location: null,
@@ -175,7 +177,7 @@ describe("evaluate", () => {
      },
    };
    const result = evaluate(graph, "reviewer", {
-      $status: "rejected",
+      $status: "_",
      review: { comments: "refactor the handler" },
    });
    expect(result).toEqual({
@@ -6,124 +6,101 @@ import { describe, expect, test } from "vitest";
 const __dirname = dirname(fileURLToPath(import.meta.url));

 import {
-  cmdPromptAdapterDeveloping,
-  cmdPromptBootstrap,
+  cmdPromptAdapter,
+  cmdPromptAuthor,
+  cmdPromptDeveloper,
  cmdPromptList,
  cmdPromptSetup,
  cmdPromptUsage,
-  cmdPromptUsageReference,
-  cmdPromptWorkflowAuthoring,
+  cmdPromptUser,
 } from "../commands/prompt.js";

 describe("prompt commands", () => {
-  test("prompt list returns new prompt names", () => {
+  test("prompt list returns all prompt names", () => {
    const result = cmdPromptList();
    expect(result).toBeInstanceOf(Array);
-    expect(result).toContain("usage");
-    expect(result).toContain("workflow-authoring");
-    expect(result).toContain("adapter-developing");
-    expect(result).toContain("bootstrap");
-    expect(result).not.toContain("user");
-    expect(result).not.toContain("author");
-    expect(result).not.toContain("developer");
-    expect(result).not.toContain("adapter");
+    expect(result).toContain("user");
+    expect(result).toContain("author");
+    expect(result).toContain("developer");
+    expect(result).toContain("adapter");
    for (const name of result) {
      expect(name).toMatch(/^\S+$/);
    }
  });

-  test("prompt usage-reference returns non-empty markdown string with frontmatter", () => {
-    const result = cmdPromptUsageReference();
+  test("prompt user returns non-empty markdown string", () => {
+    const result = cmdPromptUser();
    expect(typeof result).toBe("string");
    expect(result).toContain("uwf");
    expect(result).toContain("thread");
    expect(result).toContain("workflow");
    expect(result).toContain("Quick Start");
-    expect(result).toContain("---");
-    expect(result).toContain("name:");
-    expect(result).toContain("version:");
    expect(result.length).toBeGreaterThan(500);
  });

-  test("prompt workflow-authoring returns non-empty markdown string with frontmatter", () => {
-    const result = cmdPromptWorkflowAuthoring();
+  test("prompt author returns non-empty markdown string", () => {
+    const result = cmdPromptAuthor();
    expect(typeof result).toBe("string");
    expect(result).toContain("frontmatter");
    expect(result).toContain("graph");
    expect(result).toContain("$START");
    expect(result).toContain("$END");
    expect(result).toContain("$status");
-    expect(result).toContain("---");
-    expect(result).toContain("name:");
-    expect(result).toContain("version:");
    expect(result.length).toBeGreaterThan(500);
  });

-  test("prompt adapter-developing returns non-empty markdown string with frontmatter", () => {
-    const result = cmdPromptAdapterDeveloping();
+  test("prompt developer returns non-empty markdown string", () => {
+    const result = cmdPromptDeveloper();
+    expect(typeof result).toBe("string");
+    expect(result).toContain("Monorepo");
+    expect(result).toContain("CAS");
+    expect(result).toContain("Biome");
+    expect(result.length).toBeGreaterThan(500);
+  });
+
+  test("prompt adapter returns non-empty markdown string", () => {
+    const result = cmdPromptAdapter();
    expect(typeof result).toBe("string");
    expect(result).toContain("createAgent");
    expect(result).toContain("AgentContext");
    expect(result).toContain("frontmatter");
-    expect(result).toContain("---");
-    expect(result).toContain("name:");
-    expect(result).toContain("version:");
    expect(result.length).toBeGreaterThan(500);
  });

-  test("prompt bootstrap returns non-empty skill with frontmatter", () => {
-    const result = cmdPromptBootstrap();
-    expect(typeof result).toBe("string");
-    expect(result).toContain("uwf");
-    expect(result).toContain("---");
-    expect(result.length).toBeGreaterThan(100);
-  });
-
-  test("prompt usage combines remaining references (no developer)", () => {
+  test("prompt usage combines all references", () => {
    const result = cmdPromptUsage();
    expect(typeof result).toBe("string");
-    expect(result).toContain("Usage Reference");
-    expect(result).toContain("Workflow Authoring Reference");
-    expect(result).toContain("Adapter Developing Reference");
-    expect(result).not.toContain("Developer Reference");
+    expect(result).toContain("User Reference");
+    expect(result).toContain("Author Reference");
+    expect(result).toContain("Developer Reference");
+    expect(result).toContain("Adapter Reference");
    expect(result).toContain("---");
    expect(result.length).toBeGreaterThan(2000);
  });

-  test("prompt setup returns simplified setup instructions", () => {
+  test("prompt setup returns setup instructions", () => {
    const result = cmdPromptSetup();
    expect(typeof result).toBe("string");
    expect(result).toContain("uwf Skill Setup");
-    expect(result).toContain("uwf prompt bootstrap");
+    expect(result).toContain("uwf prompt usage");
+    expect(result).toContain("uwf prompt setup");
    expect(result).toContain("SKILL.md");
    expect(result).toContain("version");
-    expect(result).not.toMatch(/\bbun (install|run|test|changeset|version|release)\b/);
  });

-  test("prompt setup references new subcommand names", () => {
-    const result = cmdPromptSetup();
-    expect(result).toContain("uwf prompt usage");
-    expect(result).toContain("uwf prompt workflow-authoring");
-    expect(result).toContain("uwf prompt adapter-developing");
-    expect(result).not.toContain("uwf prompt user");
-    expect(result).not.toContain("uwf prompt author");
-    expect(result).not.toContain("uwf prompt developer");
-    expect(result).not.toMatch(/uwf prompt adapter\b(?!-developing)/);
-  });
-
-  test("prompt help subcommand is suppressed", { timeout: 30_000 }, () => {
-    const cliPath = join(__dirname, "..", "..", "dist", "cli.js");
-    const output = execFileSync("node", [cliPath, "prompt", "--help"], {
+  test("prompt help subcommand is suppressed", () => {
+    const output = execFileSync("npx", ["tsx", "src/cli.ts", "prompt", "--help"], {
+      cwd: join(__dirname, "..", ".."),
      encoding: "utf-8",
-      env: { ...process.env },
+      env: { ...process.env, PATH: `/opt/homebrew/bin:${process.env.PATH}` },
    });
    expect(output).not.toMatch(/help\s+\[command\]/i);
    expect(output).toContain("usage");
    expect(output).toContain("setup");
-    expect(output).toContain("workflow-authoring");
-    expect(output).toContain("adapter-developing");
-    expect(output).toContain("bootstrap");
+    expect(output).toContain("user");
+    expect(output).toContain("author");
+    expect(output).toContain("developer");
+    expect(output).toContain("adapter");
    expect(output).toContain("list");
-    expect(output).not.toContain("developer");
  });
 });
@@ -4,7 +4,7 @@ import { join } from "node:path";
 import { type CasRef, createThreadIndexEntry, type ThreadId } from "@united-workforce/protocol";
 import { afterEach, beforeEach, describe, expect, test } from "vitest";
 import { resolveHeadHash } from "../commands/shared.js";
-import { completeThread, createUwfStore, setThread } from "../store.js";
+import { addHistoryEntry, createUwfStore, setThread } from "../store.js";

 let tmpDir: string;

@@ -31,13 +31,19 @@ describe("resolveHeadHash", () => {
    expect(result).toBe(headHash);
  });

-  test("finds completed thread", async () => {
+  test("falls back to history variable when thread not in active index", async () => {
    const threadId = "01JTEST0000000000000000002" as ThreadId;
+    const workflowHash = "workflow_hash_789" as CasRef;

    const uwf = await createUwfStore(tmpDir);
    const headHash = (await uwf.store.cas.put(uwf.schemas.text, "completed-head")) as CasRef;
-    setThread(uwf.varStore, threadId, createThreadIndexEntry(headHash));
-    completeThread(uwf.varStore, threadId, "completed");
+    addHistoryEntry(uwf.varStore, {
+      thread: threadId,
+      workflow: workflowHash,
+      head: headHash,
+      completedAt: Date.now(),
+      reason: null,
+    });

    const result = await resolveHeadHash(tmpDir, threadId);

@@ -48,36 +54,58 @@ describe("resolveHeadHash", () => {
  // calls fail() which does process.exit(1), terminating the test runner.
  // The error behavior is tested in integration tests below via CLI invocation.

-  test("prioritizes active thread", async () => {
+  test("prioritizes active thread over history when thread exists in both", async () => {
    const threadId = "01JTEST0000000000000000004" as ThreadId;
+    const workflowHash = "workflow_hash_xyz" as CasRef;

    const uwf = await createUwfStore(tmpDir);
    const activeHead = (await uwf.store.cas.put(uwf.schemas.text, "active-v2")) as CasRef;
+    const historicalHash = (await uwf.store.cas.put(uwf.schemas.text, "historical-v1")) as CasRef;
    setThread(uwf.varStore, threadId, createThreadIndexEntry(activeHead));
+    addHistoryEntry(uwf.varStore, {
+      thread: threadId,
+      workflow: workflowHash,
+      head: historicalHash,
+      completedAt: Date.now(),
+      reason: null,
+    });

    const result = await resolveHeadHash(tmpDir, threadId);

-    // Should return the active head
+    // Should return the active head, not the historical one
    expect(result).toBe(activeHead);
  });

-  test("finds thread from multiple completed threads", async () => {
+  test("finds thread from multiple history entries", async () => {
    const threadId1 = "01JTEST0000000000000000005" as ThreadId;
    const threadId2 = "01JTEST0000000000000000006" as ThreadId;
    const threadId3 = "01JTEST0000000000000000007" as ThreadId;
+    const workflowHash = "workflow_hash_abc" as CasRef;
    const uwf = await createUwfStore(tmpDir);
    const hash1 = (await uwf.store.cas.put(uwf.schemas.text, "hash-thread1")) as CasRef;
    const hash2 = (await uwf.store.cas.put(uwf.schemas.text, "hash-thread2")) as CasRef;
    const hash3 = (await uwf.store.cas.put(uwf.schemas.text, "hash-thread3")) as CasRef;
-
-    setThread(uwf.varStore, threadId1, createThreadIndexEntry(hash1));
-    completeThread(uwf.varStore, threadId1, "completed");
-
-    setThread(uwf.varStore, threadId2, createThreadIndexEntry(hash2));
-    completeThread(uwf.varStore, threadId2, "completed");
-
-    setThread(uwf.varStore, threadId3, createThreadIndexEntry(hash3));
-    completeThread(uwf.varStore, threadId3, "completed");
+    addHistoryEntry(uwf.varStore, {
+      thread: threadId1,
+      workflow: workflowHash,
+      head: hash1,
+      completedAt: Date.now() - 2000,
+      reason: null,
+    });
+    addHistoryEntry(uwf.varStore, {
+      thread: threadId2,
+      workflow: workflowHash,
+      head: hash2,
+      completedAt: Date.now() - 1000,
+      reason: null,
+    });
+    addHistoryEntry(uwf.varStore, {
+      thread: threadId3,
+      workflow: workflowHash,
+      head: hash3,
+      completedAt: Date.now(),
+      reason: null,
+    });

    const result = await resolveHeadHash(tmpDir, threadId2);

@@ -118,7 +118,6 @@ async function createTestStep(
    completedAtMs: Date.now() + 1000,
    assembledPrompt: null,
    cwd: "/tmp",
-    usage: null,
  };
  return store.cas.put(schemas.stepNode, stepPayload);
 }
@@ -96,7 +96,6 @@ describe("protocol types", () => {
      completedAtMs: 2000,
      assembledPrompt: null,
      cwd: "/test/path",
-      usage: null,
    };
    expect(record.startedAtMs).toBe(1000);
    expect(record.completedAtMs).toBe(2000);
@@ -111,7 +110,6 @@ describe("protocol types", () => {
      agent: "uwf-test",
      timestamp: 123,
      durationMs: 5000,
-      usage: null,
    };
    expect(entry.durationMs).toBe(5000);
  });
@@ -254,7 +252,7 @@ describe("thread read timing", () => {
      },
      graph: {
        $START: { _: { role: "worker", prompt: "go", location: null } },
-        worker: { done: { role: "$END", prompt: "", location: null } },
+        worker: { _: { role: "$END", prompt: "", location: null } },
      },
    });

@@ -320,7 +318,7 @@ describe("thread read timing", () => {
      },
      graph: {
        $START: { _: { role: "worker", prompt: "go", location: null } },
-        worker: { done: { role: "$END", prompt: "", location: null } },
+        worker: { _: { role: "$END", prompt: "", location: null } },
      },
    });

@@ -226,15 +226,19 @@ describe("Global CAS directory", () => {
    const uwf = await createUwfStore(storageRoot);
    const threadId = "thread-123" as ThreadId;
    const headHash = await uwf.store.cas.put(uwf.schemas.text, "history-head");
-    const { completeThread, setThread, getThread } = await import("../store.js");
-    const { createThreadIndexEntry } = await import("@united-workforce/protocol");
+    const { addHistoryEntry, findHistoryEntry } = await import("../store.js");
+    addHistoryEntry(uwf.varStore, {
+      thread: threadId,
+      workflow: "workflow-456",
+      head: headHash,
+      completedAt: Date.now(),
+      reason: "completed",
+    });

-    setThread(uwf.varStore, threadId, createThreadIndexEntry(headHash));
-    completeThread(uwf.varStore, threadId, "completed");
-
-    const entry = getThread(uwf.varStore, threadId);
+    const entry = findHistoryEntry(uwf.varStore, threadId);
+    expect(entry?.thread).toBe(threadId);
+    expect(entry?.workflow).toBe("workflow-456");
    expect(entry?.head).toBe(headHash);
-    expect(entry?.status).toBe("completed");

    const { access } = await import("node:fs/promises");
    await access(join(globalCasDir, "vars"));
@@ -270,12 +274,15 @@ describe("Global CAS directory", () => {
    );

    const uwf = await createUwfStore(storageRoot);
-    const { getThread } = await import("../store.js");
-    const entry = getThread(uwf.varStore, threadId);
-    expect(entry).not.toBeNull();
-    expect(entry?.head).toBe(headHash);
-    expect(entry?.status).toBe("cancelled");
-    expect(entry?.completedAt).toBe(completedAt);
+    const { findHistoryEntry } = await import("../store.js");
+    const entry = findHistoryEntry(uwf.varStore, threadId);
+    expect(entry).toEqual({
+      thread: threadId,
+      workflow: workflowHash,
+      head: headHash,
+      completedAt,
+      reason: "cancelled",
+    });

    await expect(access(historyPath)).rejects.toThrow();
    const migratedContent = await readFile(`${historyPath}.migrated`, "utf8");
@@ -1,235 +0,0 @@
-import { mkdir, mkdtemp } from "node:fs/promises";
-import { tmpdir } from "node:os";
-import { join } from "node:path";
-import type { CasRef, ThreadId } from "@united-workforce/protocol";
-import { describe, expect, test } from "vitest";
-import {
-  completeThread,
-  createUwfStore,
-  getThread,
-  loadActiveThreads,
-  loadHistoryThreads,
-  setThread,
-} from "../store.js";
-
-async function makeUwfStore(storageRoot: string) {
-  const casDir = join(storageRoot, "cas");
-  await mkdir(casDir, { recursive: true });
-  process.env.OCAS_HOME = casDir;
-  return createUwfStore(storageRoot);
-}
-
-async function seedThreadHead(
-  uwf: Awaited<ReturnType<typeof createUwfStore>>,
-  label: string,
-): Promise<CasRef> {
-  return (await uwf.store.cas.put(uwf.schemas.text, label)) as CasRef;
-}
-
-describe("unified thread storage", () => {
-  test("loadActiveThreads excludes completed threads", async () => {
-    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-active-test-"));
-    const uwf = await makeUwfStore(tmpDir);
-
-    const threadId1 = "01JTEST000000000000ACTIVE1" as ThreadId;
-    const threadId2 = "01JTEST000000000000ACTIVE2" as ThreadId;
-    const head1 = await seedThreadHead(uwf, "active-head");
-    const head2 = await seedThreadHead(uwf, "completed-head");
-
-    setThread(uwf.varStore, threadId1, {
-      head: head1,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
-    });
-
-    setThread(uwf.varStore, threadId2, {
-      head: head2,
-      status: "completed",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: Date.now(),
-    });
-
-    const active = loadActiveThreads(uwf.varStore);
-    expect(Object.keys(active)).toHaveLength(1);
-    expect(active[threadId1]).toBeDefined();
-    expect(active[threadId2]).toBeUndefined();
-  });
-
-  test("loadActiveThreads excludes cancelled threads", async () => {
-    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-active-test-"));
-    const uwf = await makeUwfStore(tmpDir);
-
-    const threadId1 = "01JTEST000000000000ACTIVE3" as ThreadId;
-    const threadId2 = "01JTEST000000000000ACTIVE4" as ThreadId;
-    const head1 = await seedThreadHead(uwf, "active-head");
-    const head2 = await seedThreadHead(uwf, "cancelled-head");
-
-    setThread(uwf.varStore, threadId1, {
-      head: head1,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
-    });
-
-    setThread(uwf.varStore, threadId2, {
-      head: head2,
-      status: "cancelled",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: Date.now(),
-    });
-
-    const active = loadActiveThreads(uwf.varStore);
-    expect(Object.keys(active)).toHaveLength(1);
-    expect(active[threadId1]).toBeDefined();
-    expect(active[threadId2]).toBeUndefined();
-  });
-
-  test("loadHistoryThreads only returns completed and cancelled", async () => {
-    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-history-test-"));
-    const uwf = await makeUwfStore(tmpDir);
-
-    const threadId1 = "01JTEST000000000000HISTOR1" as ThreadId;
-    const threadId2 = "01JTEST000000000000HISTOR2" as ThreadId;
-    const threadId3 = "01JTEST000000000000HISTOR3" as ThreadId;
-    const head1 = await seedThreadHead(uwf, "active-head");
-    const head2 = await seedThreadHead(uwf, "completed-head");
-    const head3 = await seedThreadHead(uwf, "cancelled-head");
-
-    setThread(uwf.varStore, threadId1, {
-      head: head1,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
-    });
-
-    setThread(uwf.varStore, threadId2, {
-      head: head2,
-      status: "completed",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: Date.now(),
-    });
-
-    setThread(uwf.varStore, threadId3, {
-      head: head3,
-      status: "cancelled",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: Date.now(),
-    });
-
-    const history = loadHistoryThreads(uwf.varStore);
-    expect(Object.keys(history)).toHaveLength(2);
-    expect(history[threadId1]).toBeUndefined();
-    expect(history[threadId2]).toBeDefined();
-    expect(history[threadId3]).toBeDefined();
-  });
-
-  test("completeThread marks thread as completed", async () => {
-    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-complete-test-"));
-    const uwf = await makeUwfStore(tmpDir);
-    const threadId = "01JTEST000000000000COMPLE1" as ThreadId;
-    const head = await seedThreadHead(uwf, "active-head");
-
-    setThread(uwf.varStore, threadId, {
-      head,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
-    });
-
-    completeThread(uwf.varStore, threadId, "completed");
-
-    const entry = getThread(uwf.varStore, threadId);
-    expect(entry).not.toBeNull();
-    expect(entry?.status).toBe("completed");
-    expect(entry?.completedAt).toBeDefined();
-    expect(entry?.completedAt).toBeGreaterThan(0);
-  });
-
-  test("completeThread marks thread as cancelled", async () => {
-    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-complete-test-"));
-    const uwf = await makeUwfStore(tmpDir);
-    const threadId = "01JTEST000000000000COMPLE2" as ThreadId;
-    const head = await seedThreadHead(uwf, "active-head");
-
-    setThread(uwf.varStore, threadId, {
-      head,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
-    });
-
-    completeThread(uwf.varStore, threadId, "cancelled");
-
-    const entry = getThread(uwf.varStore, threadId);
-    expect(entry).not.toBeNull();
-    expect(entry?.status).toBe("cancelled");
-    expect(entry?.completedAt).toBeDefined();
-    expect(entry?.completedAt).toBeGreaterThan(0);
-  });
-
-  test("completeThread clears suspend metadata", async () => {
-    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-complete-test-"));
-    const uwf = await makeUwfStore(tmpDir);
-    const threadId = "01JTEST000000000000COMPLE3" as ThreadId;
-    const head = await seedThreadHead(uwf, "suspended-head");
-
-    setThread(uwf.varStore, threadId, {
-      head,
-      status: "suspended",
-      suspendedRole: "test-role",
-      suspendMessage: "test message",
-      completedAt: null,
-    });
-
-    completeThread(uwf.varStore, threadId, "completed");
-
-    const entry = getThread(uwf.varStore, threadId);
-    expect(entry).not.toBeNull();
-    expect(entry?.status).toBe("completed");
-    expect(entry?.suspendedRole).toBeNull();
-    expect(entry?.suspendMessage).toBeNull();
-  });
-
-  test("completeThread handles non-existent thread gracefully", async () => {
-    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-complete-test-"));
-    const uwf = await makeUwfStore(tmpDir);
-    const threadId = "01JTEST000000000000NOEXIST" as ThreadId;
-
-    // Should not throw
-    completeThread(uwf.varStore, threadId, "completed");
-
-    const entry = getThread(uwf.varStore, threadId);
-    expect(entry).toBeNull();
-  });
-
-  test("status and completedAt tags are persisted and loaded", async () => {
-    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-tags-test-"));
-    const uwf = await makeUwfStore(tmpDir);
-    const threadId = "01JTEST000000000000TAGTEST" as ThreadId;
-    const head = await seedThreadHead(uwf, "test-head");
-    const now = Date.now();
-
-    setThread(uwf.varStore, threadId, {
-      head,
-      status: "completed",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: now,
-    });
-
-    const entry = getThread(uwf.varStore, threadId);
-    expect(entry).not.toBeNull();
-    expect(entry?.status).toBe("completed");
-    expect(entry?.completedAt).toBe(now);
-  });
-});
@@ -3,13 +3,7 @@ import { tmpdir } from "node:os";
 import { join } from "node:path";
 import type { CasRef, ThreadId } from "@united-workforce/protocol";
 import { describe, expect, test } from "vitest";
-import {
-  completeThread,
-  createUwfStore,
-  getThread,
-  loadHistoryThreads,
-  setThread,
-} from "../store.js";
+import { addHistoryEntry, createUwfStore, loadAllHistory } from "../store.js";

 async function makeUwfStore(storageRoot: string) {
  const casDir = join(storageRoot, "cas");
@@ -26,113 +20,88 @@ async function seedHistoryHead(
 }

 describe("thread cancel status", () => {
-  test("cancelled thread has status 'cancelled'", async () => {
+  test("cancelled history entry has reason 'cancelled'", async () => {
    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
    const threadId = "01JTEST000000000000CANCEL1" as ThreadId;
    const uwf = await makeUwfStore(tmpDir);
    const head = await seedHistoryHead(uwf, "cancelled-head");

-    setThread(uwf.varStore, threadId, {
+    addHistoryEntry(uwf.varStore, {
+      thread: threadId,
+      workflow: "test-workflow",
      head,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
+      completedAt: Date.now(),
+      reason: "cancelled",
    });

-    completeThread(uwf.varStore, threadId, "cancelled");
-
-    const entry = getThread(uwf.varStore, threadId);
-    expect(entry).not.toBeNull();
-    expect(entry?.status).toBe("cancelled");
+    const history = loadAllHistory(uwf.varStore);
+    expect(history).toHaveLength(1);
+    expect(history[0]?.reason).toBe("cancelled");
  });

-  test("completed thread has status 'completed'", async () => {
+  test("completed history entry has reason 'completed'", async () => {
    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
    const threadId = "01JTEST000000000000CANCEL2" as ThreadId;
    const uwf = await makeUwfStore(tmpDir);
    const head = await seedHistoryHead(uwf, "completed-head");

-    setThread(uwf.varStore, threadId, {
+    addHistoryEntry(uwf.varStore, {
+      thread: threadId,
+      workflow: "test-workflow",
      head,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
+      completedAt: Date.now(),
+      reason: "completed",
    });

-    completeThread(uwf.varStore, threadId, "completed");
-
-    const entry = getThread(uwf.varStore, threadId);
-    expect(entry).not.toBeNull();
-    expect(entry?.status).toBe("completed");
+    const history = loadAllHistory(uwf.varStore);
+    expect(history).toHaveLength(1);
+    expect(history[0]?.reason).toBe("completed");
  });

-  test("loadHistoryThreads returns completed and cancelled", async () => {
+  test("history entry with null reason is stored as completed", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
+    const threadId = "01JTEST000000000000CANCEL3" as ThreadId;
+    const uwf = await makeUwfStore(tmpDir);
+    const head = await seedHistoryHead(uwf, "legacy-head");
+
+    addHistoryEntry(uwf.varStore, {
+      thread: threadId,
+      workflow: "test-workflow",
+      head,
+      completedAt: Date.now(),
+      reason: null,
+    });
+
+    const history = loadAllHistory(uwf.varStore);
+    expect(history).toHaveLength(1);
+    expect(history[0]?.reason).toBe("completed");
+  });
+
+  test("mixed completed and cancelled entries preserve distinct reasons", async () => {
    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
    const uwf = await makeUwfStore(tmpDir);
    const head1 = await seedHistoryHead(uwf, "head1");
    const head2 = await seedHistoryHead(uwf, "head2");

-    const threadId1 = "01JTEST000000000000CANCEL4" as ThreadId;
-    setThread(uwf.varStore, threadId1, {
+    addHistoryEntry(uwf.varStore, {
+      thread: "01JTEST000000000000CANCEL4" as ThreadId,
+      workflow: "test-workflow",
      head: head1,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
+      completedAt: Date.now(),
+      reason: "completed",
    });
-    completeThread(uwf.varStore, threadId1, "completed");

-    const threadId2 = "01JTEST000000000000CANCEL5" as ThreadId;
-    setThread(uwf.varStore, threadId2, {
+    addHistoryEntry(uwf.varStore, {
+      thread: "01JTEST000000000000CANCEL5" as ThreadId,
+      workflow: "test-workflow",
      head: head2,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
+      completedAt: Date.now(),
+      reason: "cancelled",
    });
-    completeThread(uwf.varStore, threadId2, "cancelled");

-    const history = loadHistoryThreads(uwf.varStore);
-    expect(Object.keys(history)).toHaveLength(2);
-    const statuses = Object.values(history)
-      .map((entry) => entry.status)
-      .sort();
-    expect(statuses).toEqual(["cancelled", "completed"]);
-  });
-
-  test("mixed completed and cancelled entries preserve distinct statuses", async () => {
-    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
-    const uwf = await makeUwfStore(tmpDir);
-    const head1 = await seedHistoryHead(uwf, "head1");
-    const head2 = await seedHistoryHead(uwf, "head2");
-
-    const threadId1 = "01JTEST000000000000CANCEL6" as ThreadId;
-    setThread(uwf.varStore, threadId1, {
-      head: head1,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
-    });
-    completeThread(uwf.varStore, threadId1, "completed");
-
-    const threadId2 = "01JTEST000000000000CANCEL7" as ThreadId;
-    setThread(uwf.varStore, threadId2, {
-      head: head2,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
-    });
-    completeThread(uwf.varStore, threadId2, "cancelled");
-
-    const history = loadHistoryThreads(uwf.varStore);
-    expect(Object.keys(history)).toHaveLength(2);
-    const statuses = Object.values(history)
-      .map((entry) => entry.status)
-      .sort();
-    expect(statuses).toEqual(["cancelled", "completed"]);
+    const history = loadAllHistory(uwf.varStore);
+    expect(history).toHaveLength(2);
+    const reasons = history.map((entry) => entry.reason).sort();
+    expect(reasons).toEqual(["cancelled", "completed"]);
  });
 });
@@ -10,8 +10,9 @@ import { cmdThreadList } from "../commands/thread.js";
 import { parseTimeInput } from "../commands/thread-time-parser.js";
 import type { UwfStore } from "../store.js";
 import {
-  completeThread as completeThreadInStore,
+  addHistoryEntry,
  createUwfStore,
+  deleteThread,
  loadAllThreads,
  setThread,
 } from "../store.js";
@@ -72,11 +73,18 @@ async function markThreadRunning(storageRoot: string, threadId: ThreadId, workfl
 async function completeThread(
  storageRoot: string,
  threadId: ThreadId,
-  _workflowHash: CasRef,
-  _headHash: CasRef,
+  workflowHash: CasRef,
+  headHash: CasRef,
 ) {
  const uwfIdx = await createUwfStore(storageRoot);
-  completeThreadInStore(uwfIdx.varStore, threadId, "completed");
+  deleteThread(uwfIdx.varStore, threadId);
+  addHistoryEntry(uwfIdx.varStore, {
+    thread: threadId,
+    workflow: workflowHash,
+    head: headHash,
+    completedAt: Date.now(),
+    reason: null,
+  });
 }

 // ── test setup ────────────────────────────────────────────────────────────────
@@ -492,10 +500,8 @@ describe("edge cases", () => {
    )) as CasRef;
    index["INVALID_ULID_FORMAT_HERE" as ThreadId] = {
      head: placeholderHead,
-      status: "idle",
      suspendedRole: null,
      suspendMessage: null,
-      completedAt: null,
    };
    for (const [tid, ent] of Object.entries(index)) {
      setThread(uwfIdx.varStore, tid as ThreadId, ent);
@@ -54,7 +54,7 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string, enum: ["ready"] }
+        $status: { type: string }
 graph:
  $START:
    _:
@@ -62,7 +62,7 @@ graph:
      prompt: "Plan the work"
      location: null
  planner:
-    ready:
+    _:
      role: $END
      prompt: "Done"
      location: null
@@ -110,7 +110,7 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string, enum: ["ready"] }
+        $status: { type: string }
 graph:
  $START:
    _:
@@ -118,7 +118,7 @@ graph:
      prompt: "Plan"
      location: null
  planner:
-    ready:
+    _:
      role: $END
      prompt: "Done"
      location: null
@@ -153,7 +153,7 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string, enum: ["ready"] }
+        $status: { type: string }
 graph:
  $START:
    _:
@@ -161,7 +161,7 @@ graph:
      prompt: "Plan"
      location: null
  planner:
-    ready:
+    _:
      role: $END
      prompt: "Done"
      location: null
@@ -79,7 +79,7 @@ async function setupSuspendedThread(mode: MockAgentMode): Promise<{
        },
        ok: { role: "reviewer", prompt: "Review the work", location: null },
      },
-      reviewer: { done: { role: "$END", prompt: "Done", location: null } },
+      reviewer: { _: { role: "$END", prompt: "Done", location: null } },
    },
  });

@@ -118,10 +118,8 @@ async function setupSuspendedThread(mode: MockAgentMode): Promise<{
  await seedThreads(tmpDir, {
    [THREAD_ID]: {
      head: stepHash,
-      status: "suspended",
      suspendedRole: "worker",
      suspendMessage: SUSPEND_MESSAGE,
-      completedAt: null,
    },
  });

@@ -234,7 +232,7 @@ describe("uwf thread resume", () => {
      },
      graph: {
        $START: { _: { role: "worker", prompt: "Start", location: null } },
-        worker: { done: { role: "$END", prompt: "Done", location: null } },
+        worker: { _: { role: "$END", prompt: "Done", location: null } },
      },
    });

@@ -249,7 +247,7 @@ describe("uwf thread resume", () => {

    const result = runUwf(["thread", "resume", THREAD_ID], casDir);
    expect(result.status).not.toBe(0);
-    expect(result.stderr).toContain("thread cannot be resumed");
+    expect(result.stderr).toContain("thread is not suspended");
  });

  test("resume suspended thread executes step and becomes idle", async () => {
@@ -349,10 +347,8 @@ describe("uwf thread resume", () => {
      const uwfAfterFirst = await createUwfStore(tmpDir);
      expect(getThread(uwfAfterFirst.varStore, THREAD_ID)).toEqual({
        head: firstResume.head,
-        status: "suspended",
        suspendedRole: "worker",
        suspendMessage: SUSPEND_MESSAGE,
-        completedAt: null,
      });

      const { mockAgentPath: okMockAgentPath } = await setupOkMockAgent(
@@ -448,263 +444,3 @@ echo '${adapterJson}'

  return { mockAgentPath };
 }
-
-describe("uwf thread resume - completed threads", () => {
-  test("resume completed thread starts from $START role", async () => {
-    const casDir = join(tmpDir, "cas");
-    await mkdir(casDir, { recursive: true });
-    const store = await openStore(casDir);
-    const schemas = await registerUwfSchemas(store);
-    const outputSchemaHash = await putSchema(store, OUTPUT_SCHEMA);
-
-    const workflowHash = await store.cas.put(schemas.workflow, {
-      name: "test-completed-resume",
-      description: "completed thread resume test",
-      roles: {
-        worker: {
-          description: "Worker role",
-          goal: "Work",
-          capabilities: [],
-          procedure: "work",
-          output: "result",
-          frontmatter: outputSchemaHash,
-        },
-        reviewer: {
-          description: "Reviewer role",
-          goal: "Review",
-          capabilities: [],
-          procedure: "review",
-          output: "result",
-          frontmatter: outputSchemaHash,
-        },
-      },
-      graph: {
-        $START: { _: { role: "worker", prompt: "Start work", location: null } },
-        worker: { done: { role: "reviewer", prompt: "Review the work", location: null } },
-        reviewer: { done: { role: "$END", prompt: "Done", location: null } },
-      },
-    });
-
-    const startHash = await store.cas.put(schemas.startNode, {
-      workflow: workflowHash,
-      prompt: "Initial task",
-      cwd: tmpDir,
-    });
-
-    process.env.OCAS_HOME = casDir;
-
-    const workerOutputHash = await store.cas.put(outputSchemaHash, { $status: "done" });
-    const reviewerOutputHash = await store.cas.put(outputSchemaHash, { $status: "done" });
-    const detailHash = await store.cas.put(schemas.text, "mock detail");
-
-    const workerStepHash = await store.cas.put(schemas.stepNode, {
-      start: startHash,
-      prev: null,
-      role: "worker",
-      output: workerOutputHash,
-      detail: detailHash,
-      agent: "uwf-mock",
-      edgePrompt: "Start work",
-      startedAtMs: 1716600000000,
-      completedAtMs: 1716600001000,
-      cwd: tmpDir,
-      assembledPrompt: null,
-    });
-
-    const reviewerStepHash = await store.cas.put(schemas.stepNode, {
-      start: startHash,
-      prev: workerStepHash,
-      role: "reviewer",
-      output: reviewerOutputHash,
-      detail: detailHash,
-      agent: "uwf-mock",
-      edgePrompt: "Review the work",
-      startedAtMs: 1716600001000,
-      completedAtMs: 1716600002000,
-      cwd: tmpDir,
-      assembledPrompt: null,
-    });
-
-    await seedThreads(tmpDir, {
-      [THREAD_ID]: {
-        head: reviewerStepHash,
-        status: "completed",
-        suspendedRole: null,
-        suspendMessage: null,
-        completedAt: 1716600002000,
-      },
-    });
-
-    // Verify the status was actually set
-    const { createUwfStore, getThread } = await import("../store.js");
-    const verifyUwf = await createUwfStore(tmpDir);
-    const verifyEntry = getThread(verifyUwf.varStore, THREAD_ID);
-    console.log("Seeded entry status:", verifyEntry?.status);
-    console.log("Seeded entry:", JSON.stringify(verifyEntry, null, 2));
-
-    const promptCapturePath = join(tmpDir, "captured-prompt-completed.txt");
-    const mockAgentPath = join(tmpDir, "mock-agent-completed.sh");
-
-    const newWorkerStepHash = await store.cas.put(schemas.stepNode, {
-      start: startHash,
-      prev: reviewerStepHash,
-      role: "worker",
-      output: workerOutputHash,
-      detail: detailHash,
-      agent: "uwf-mock",
-      edgePrompt: "Start work",
-      startedAtMs: 1716600003000,
-      completedAtMs: 1716600004000,
-      cwd: tmpDir,
-      assembledPrompt: null,
-    });
-
-    const adapterJson = JSON.stringify({
-      stepHash: newWorkerStepHash,
-      detailHash,
-      role: "worker",
-      frontmatter: { $status: "done" },
-      body: "",
-      startedAtMs: 1716600003000,
-      completedAtMs: 1716600004000,
-    });
-
-    await writeFile(
-      mockAgentPath,
-      `#!/bin/sh
-prompt=""
-while [ $# -gt 0 ]; do
-  if [ "$1" = "--prompt" ]; then
-    prompt="$2"
-    shift 2
-  else
-    shift
-  fi
-done
-printf '%s' "$prompt" > '${promptCapturePath}'
-echo '${adapterJson}'
-`,
-      { mode: 0o755 },
-    );
-
-    const configPath = join(tmpDir, "config.yaml");
-    await writeFile(
-      configPath,
-      `defaultAgent: uwf-hermes\ndefaultModel: test-model\nagentOverrides: null\nagents: {}\nproviders: {}\nmodels: {}\n`,
-    );
-
-    const result = runUwf(
-      ["thread", "resume", THREAD_ID, "-p", "Additional context", "--agent", mockAgentPath],
-      casDir,
-    );
-
-    if (result.status !== 0) {
-      console.error("Command failed:", result.stderr);
-    }
-
-    expect(result.status).toBe(0);
-
-    const cliOutput = JSON.parse(result.stdout.trim());
-    expect(cliOutput.status).toBe("idle");
-    expect(cliOutput.currentRole).toBe("reviewer");
-    expect(cliOutput.done).toBe(false);
-
-    const capturedPrompt = await readFile(promptCapturePath, "utf8");
-    expect(capturedPrompt).toContain("Previous run completed");
-    expect(capturedPrompt).toContain("Additional context");
-
-    const storeModule = await import("../store.js");
-    const uwf2 = await storeModule.createUwfStore(tmpDir);
-    const entry2 = storeModule.getThread(uwf2.varStore, THREAD_ID);
-    expect(entry2?.status).toBe("idle");
-    expect(entry2?.completedAt).toBeNull();
-  });
-
-  test("resume cancelled thread returns error", async () => {
-    const casDir = join(tmpDir, "cas");
-    await mkdir(casDir, { recursive: true });
-    const store = await openStore(casDir);
-    const schemas = await registerUwfSchemas(store);
-
-    const workflowHash = await store.cas.put(schemas.workflow, {
-      name: "cancelled-workflow",
-      description: "cancelled thread",
-      roles: {
-        worker: {
-          description: "Worker",
-          goal: "Work",
-          capabilities: [],
-          procedure: "work",
-          output: "result",
-          frontmatter: await putSchema(store, OUTPUT_SCHEMA),
-        },
-      },
-      graph: {
-        $START: { _: { role: "worker", prompt: "Start", location: null } },
-        worker: { done: { role: "$END", prompt: "Done", location: null } },
-      },
-    });
-
-    const startHash = await store.cas.put(schemas.startNode, {
-      workflow: workflowHash,
-      prompt: "task",
-      cwd: tmpDir,
-    });
-
-    process.env.OCAS_HOME = casDir;
-    await seedThreads(tmpDir, {
-      [THREAD_ID]: {
-        head: startHash,
-        status: "cancelled",
-        suspendedRole: null,
-        suspendMessage: null,
-        completedAt: null,
-      },
-    });
-
-    const result = runUwf(["thread", "resume", THREAD_ID], casDir);
-    expect(result.status).not.toBe(0);
-    expect(result.stderr).toContain("thread cannot be resumed");
-    expect(result.stderr).toContain("cancelled");
-  });
-
-  test("resume idle thread returns error", async () => {
-    const casDir = join(tmpDir, "cas");
-    await mkdir(casDir, { recursive: true });
-    const store = await openStore(casDir);
-    const schemas = await registerUwfSchemas(store);
-
-    const workflowHash = await store.cas.put(schemas.workflow, {
-      name: "idle-workflow",
-      description: "idle thread",
-      roles: {
-        worker: {
-          description: "Worker",
-          goal: "Work",
-          capabilities: [],
-          procedure: "work",
-          output: "result",
-          frontmatter: await putSchema(store, OUTPUT_SCHEMA),
-        },
-      },
-      graph: {
-        $START: { _: { role: "worker", prompt: "Start", location: null } },
-        worker: { done: { role: "$END", prompt: "Done", location: null } },
-      },
-    });
-
-    const startHash = await store.cas.put(schemas.startNode, {
-      workflow: workflowHash,
-      prompt: "task",
-      cwd: tmpDir,
-    });
-
-    process.env.OCAS_HOME = casDir;
-    await seedThreads(tmpDir, { [THREAD_ID]: startHash });
-
-    const result = runUwf(["thread", "resume", THREAD_ID], casDir);
-    expect(result.status).not.toBe(0);
-    expect(result.stderr).toContain("thread cannot be resumed");
-    expect(result.stderr).toContain("idle");
-  });
-});
@@ -6,7 +6,13 @@ import type { CasRef, ThreadId } from "@united-workforce/protocol";
 import { describe, expect, test } from "vitest";
 import { createMarker, deleteMarker } from "../background/index.js";
 import { cmdThreadShow, cmdThreadStart } from "../commands/thread.js";
-import { completeThread, createUwfStore, loadAllThreads, setThread } from "../store.js";
+import {
+  addHistoryEntry,
+  createUwfStore,
+  deleteThread,
+  loadAllThreads,
+  setThread,
+} from "../store.js";

 const OUTPUT_SCHEMA = {
  type: "object" as const,
@@ -31,7 +37,7 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string, enum: ["ready"] }
+        $status: { type: string }
 graph:
  $START:
    _:
@@ -39,7 +45,7 @@ graph:
      prompt: "Plan the work"
      location: null
  planner:
-    ready:
+    _:
      role: $END
      prompt: "Done"
      location: null
@@ -112,13 +118,7 @@ async function insertStepNode(
    assembledPrompt: null,
  })) as CasRef;

-  setThread(uwf.varStore, threadId, {
-    head: stepHash,
-    status: "idle",
-    suspendedRole: null,
-    suspendMessage: null,
-    completedAt: null,
-  });
+  setThread(uwf.varStore, threadId, { head: stepHash, suspendedRole: null, suspendMessage: null });
 }

 describe("thread show status field", () => {
@@ -200,7 +200,7 @@ describe("thread show status field", () => {
    // Create a thread
    const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
    const threadId = startResult.thread as ThreadId;
-    const _workflow = startResult.workflow;
+    const workflow = startResult.workflow;

    // Get the head hash before moving to history
    const uwfForIndex = await createUwfStore(storageRoot);
@@ -208,7 +208,15 @@ describe("thread show status field", () => {
    const head = index[threadId]!.head;
    if (!head) throw new Error("Thread not found in index");

-    completeThread(uwfForIndex.varStore, threadId, "completed");
+    deleteThread(uwfForIndex.varStore, threadId);
+
+    addHistoryEntry(uwfForIndex.varStore, {
+      thread: threadId,
+      workflow,
+      head,
+      completedAt: Date.now(),
+      reason: "completed",
+    });

    const result = await cmdThreadShow(storageRoot, threadId);

@@ -229,7 +237,7 @@ describe("thread show status field", () => {
    // Create a thread
    const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
    const threadId = startResult.thread as ThreadId;
-    const _workflow = startResult.workflow;
+    const workflow = startResult.workflow;

    // Get the head hash before moving to history
    const uwfForIndex = await createUwfStore(storageRoot);
@@ -237,7 +245,15 @@ describe("thread show status field", () => {
    const head = index[threadId]!.head;
    if (!head) throw new Error("Thread not found in index");

-    completeThread(uwfForIndex.varStore, threadId, "cancelled");
+    deleteThread(uwfForIndex.varStore, threadId);
+
+    addHistoryEntry(uwfForIndex.varStore, {
+      thread: threadId,
+      workflow,
+      head,
+      completedAt: Date.now(),
+      reason: "cancelled",
+    });

    const result = await cmdThreadShow(storageRoot, threadId);

@@ -258,7 +274,7 @@ describe("thread show status field", () => {
    // Create a thread
    const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
    const threadId = startResult.thread as ThreadId;
-    const _workflow = startResult.workflow;
+    const workflow = startResult.workflow;

    // Get the head hash before moving to history
    const uwfForIndex = await createUwfStore(storageRoot);
@@ -266,7 +282,15 @@ describe("thread show status field", () => {
    const head = index[threadId]!.head;
    if (!head) throw new Error("Thread not found in index");

-    completeThread(uwfForIndex.varStore, threadId, "completed");
+    deleteThread(uwfForIndex.varStore, threadId);
+
+    addHistoryEntry(uwfForIndex.varStore, {
+      thread: threadId,
+      workflow,
+      head,
+      completedAt: Date.now(),
+      reason: null,
+    });

    const result = await cmdThreadShow(storageRoot, threadId);

@@ -54,7 +54,7 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string, enum: ["ready"] }
+        $status: { type: string }
 graph:
  $START:
    _:
@@ -62,7 +62,7 @@ graph:
      prompt: "Plan the work"
      location: null
  planner:
-    ready:
+    _:
      role: $END
      prompt: "Done"
      location: null
@@ -2,28 +2,19 @@ import { execFileSync } from "node:child_process";
 import { dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
 import { describe, expect, test } from "vitest";
-import { validateCount } from "../commands/thread.js";

-const CLI_PATH = join(dirname(fileURLToPath(import.meta.url)), "..", "..", "dist", "cli.js");
+const CLI_PATH = join(dirname(fileURLToPath(import.meta.url)), "..", "cli.js");

-function runCli(args: string[]): {
-  stdout: string;
-  stderr: string;
-  exitCode: number;
-} {
+function runCli(args: string[]): { stdout: string; stderr: string; exitCode: number } {
  try {
-    const stdout = execFileSync("node", [CLI_PATH, ...args], {
+    const stdout = execFileSync("npx", ["tsx", CLI_PATH, ...args], {
      encoding: "utf8",
      env: { ...process.env, UWF_HOME: "/tmp/uwf-test-nonexistent" },
      stdio: ["ignore", "pipe", "pipe"],
    });
    return { stdout, stderr: "", exitCode: 0 };
  } catch (e: unknown) {
-    const err = e as NodeJS.ErrnoException & {
-      stdout?: string;
-      stderr?: string;
-      status?: number;
-    };
+    const err = e as NodeJS.ErrnoException & { stdout?: string; stderr?: string; status?: number };
    return {
      stdout: err.stdout ?? "",
      stderr: err.stderr ?? "",
@@ -32,39 +23,50 @@ function runCli(args: string[]): {
  }
 }

-describe("thread exec --count CLI parsing", { timeout: 30_000 }, () => {
+describe("thread exec --count CLI parsing", () => {
  test("--help shows -c/--count option", () => {
    const result = runCli(["thread", "exec", "--help"]);
-    const combined = result.stdout + result.stderr;
-    expect(combined).toContain("--count");
-    expect(combined).toContain("-c");
+    expect(result.stdout).toContain("--count");
+    expect(result.stdout).toContain("-c");
  });

  test("description says 'one or more steps'", () => {
    const result = runCli(["thread", "exec", "--help"]);
-    const combined = result.stdout + result.stderr;
-    expect(combined).toContain("one or more steps");
+    expect(result.stdout).toContain("one or more steps");
  });
 });

-describe("validateCount", () => {
-  test("count=0 throws validation error", () => {
-    expect(() => validateCount(0)).toThrow("positive integer");
+describe("cmdThreadExec count logic", () => {
+  test("count=0 fails with validation error", () => {
+    const result = runCli(["thread", "exec", "FAKE_THREAD_ID", "-c", "0"]);
+    expect(result.exitCode).not.toBe(0);
+    expect(result.stderr).toContain("positive integer");
  });

-  test("negative count throws validation error", () => {
-    expect(() => validateCount(-1)).toThrow("positive integer");
+  test("negative count fails with validation error", () => {
+    const result = runCli(["thread", "exec", "FAKE_THREAD_ID", "-c", "-1"]);
+    expect(result.exitCode).not.toBe(0);
+    expect(result.stderr).toContain("positive integer");
  });

-  test("non-integer count throws validation error", () => {
-    expect(() => validateCount(1.5)).toThrow("positive integer");
+  test("non-integer count fails with validation error", () => {
+    const result = runCli(["thread", "exec", "FAKE_THREAD_ID", "-c", "1.5"]);
+    expect(result.exitCode).not.toBe(0);
+    expect(result.stderr).toContain("positive integer");
  });

-  test("count=1 passes validation", () => {
-    expect(() => validateCount(1)).not.toThrow();
+  test("count=1 is the default (no -c flag)", () => {
+    // Without -c, it should attempt to run 1 step (failing on missing thread, not on count validation)
+    const result = runCli(["thread", "exec", "FAKE_THREAD_ID"]);
+    expect(result.exitCode).not.toBe(0);
+    // Should NOT contain "positive integer" error — should fail on thread lookup instead
+    expect(result.stderr).not.toContain("positive integer");
  });

-  test("count=3 passes validation", () => {
-    expect(() => validateCount(3)).not.toThrow();
+  test("count=3 passes validation (fails on thread lookup)", () => {
+    const result = runCli(["thread", "exec", "FAKE_THREAD_ID", "-c", "3"]);
+    expect(result.exitCode).not.toBe(0);
+    // Should NOT contain "positive integer" error — should fail on thread/storage lookup
+    expect(result.stderr).not.toContain("positive integer");
  });
 });
@@ -160,10 +160,8 @@ describe("suspend step CAS chain and threads.yaml metadata", () => {
      const threadEntry = getThread(uwf.varStore, threadId);
      expect(threadEntry).toEqual({
        head: stepHash,
-        status: "suspended",
        suspendedRole: "worker",
        suspendMessage: "Please clarify: Which API?",
-        completedAt: null,
      });

      const showResult = await cmdThreadShow(tmpDir, threadId);
@@ -11,7 +11,7 @@ import {
  THREAD_READ_DEFAULT_QUOTA,
 } from "../commands/thread.js";
 import type { UwfStore } from "../store.js";
-import { completeThread, createUwfStore, setThread } from "../store.js";
+import { addHistoryEntry, createUwfStore } from "../store.js";
 import { seedThreads } from "./thread-test-helpers.js";

 // ── schemas used in tests ────────────────────────────────────────────────────
@@ -745,14 +745,13 @@ describe("cmdStepList with completed threads", () => {
    const threadId = "01JTEST0000000000000000A2" as ThreadId;
    // Thread is NOT in active index (simulating completed thread)
    // But it IS in history variable store
-    setThread(uwf.varStore, threadId, {
+    addHistoryEntry(uwf.varStore, {
+      thread: threadId,
+      workflow: workflowHash,
      head: step2Hash,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
+      completedAt: Date.now(),
+      reason: null,
    });
-    completeThread(uwf.varStore, threadId, "completed");

    const result = await cmdStepList(tmpDir, threadId);

@@ -873,15 +872,14 @@ describe("cmdStepShow with completed threads", () => {

    const threadId = "01JTEST0000000000000000B2" as ThreadId;
    // Thread is NOT in active index
-    // But it IS in the unified store with completed status
-    setThread(uwf.varStore, threadId, {
+    // But it IS in history variable store
+    addHistoryEntry(uwf.varStore, {
+      thread: threadId,
+      workflow: workflowHash,
      head: stepHash,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
+      completedAt: Date.now(),
+      reason: null,
    });
-    completeThread(uwf.varStore, threadId, "completed");

    const result = await cmdStepShow(tmpDir, stepHash);

@@ -936,15 +934,15 @@ describe("cmdThreadRead with completed threads", () => {
    });

    const threadId = "01JTEST0000000000000000C1" as ThreadId;
-    // Thread is in store with completed status
-    setThread(uwf.varStore, threadId, {
+    // Thread is NOT in active index
+    // But it IS in history variable store
+    addHistoryEntry(uwf.varStore, {
+      thread: threadId,
+      workflow: workflowHash,
      head: stepHash,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
+      completedAt: Date.now(),
+      reason: null,
    });
-    completeThread(uwf.varStore, threadId, "completed");

    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);

@@ -1000,14 +998,13 @@ describe("cmdThreadRead with completed threads", () => {
    });

    const threadId = "01JTEST0000000000000000C2" as ThreadId;
-    setThread(uwf.varStore, threadId, {
+    addHistoryEntry(uwf.varStore, {
+      thread: threadId,
+      workflow: workflowHash,
      head: step3Hash,
-      status: "idle",
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt: null,
+      completedAt: Date.now(),
+      reason: null,
    });
-    completeThread(uwf.varStore, threadId, "completed");

    const markdown = await cmdThreadRead(
      tmpDir,
@@ -17,7 +17,7 @@ function makeWorkflow(overrides?: Partial<WorkflowPayload>): WorkflowPayload {
        frontmatter: {
          type: "object",
          properties: {
-            $status: { enum: ["done"] },
+            $status: { enum: ["_"] },
            plan: { type: "string" },
          },
          required: ["$status", "plan"],
@@ -52,7 +52,7 @@ function makeWorkflow(overrides?: Partial<WorkflowPayload>): WorkflowPayload {
    },
    graph: {
      $START: { _: { role: "writer", prompt: "Begin writing", location: null } },
-      writer: { done: { role: "reviewer", prompt: "Review this: {{{plan}}}", location: null } },
+      writer: { _: { role: "reviewer", prompt: "Review this: {{{plan}}}", location: null } },
      reviewer: {
        approved: { role: "$END", prompt: "Done: {{{summary}}}", location: null },
        rejected: { role: "writer", prompt: "Fix: {{{reason}}}", location: null },
@@ -82,7 +82,7 @@ describe("Suite 1: Role Reference Integrity", () => {
      output: "None",
      frontmatter: {
        type: "object",
-        properties: { $status: { enum: ["done"] } },
+        properties: { $status: { enum: ["_"] } },
        required: ["$status"],
      } as unknown as string,
    };
@@ -173,11 +173,11 @@ describe("Suite 2: Graph Structure", () => {
      output: "Isolated",
      frontmatter: {
        type: "object",
-        properties: { $status: { enum: ["done"] } },
+        properties: { $status: { enum: ["_"] } },
        required: ["$status"],
      } as unknown as string,
    };
-    wf.graph.isolated = { done: { role: "$END", prompt: "done", location: null } };
+    wf.graph.isolated = { _: { role: "$END", prompt: "done", location: null } };
    const errors = validateWorkflow(wf);
    expect(errors.some((e) => e.includes('role "isolated" is not reachable from $START'))).toBe(
      true,
@@ -186,34 +186,34 @@ describe("Suite 2: Graph Structure", () => {

  test("2.6 edge target references invalid role", () => {
    const wf = makeWorkflow();
-    wf.graph.writer = { done: { role: "ghost", prompt: "Go to ghost", location: null } };
+    wf.graph.writer = { _: { role: "ghost", prompt: "Go to ghost", location: null } };
    const errors = validateWorkflow(wf);
    expect(errors.some((e) => e.includes('unknown target role "ghost"'))).toBe(true);
  });
 });

 describe("Suite 3: Status-Edge Consistency", () => {
-  test("3.1 user role using _ graph key is rejected", () => {
+  test("3.1 single-exit role with multiple graph keys", () => {
    const wf = makeWorkflow();
-    wf.graph.writer = { _: { role: "reviewer", prompt: "Review", location: null } };
+    wf.graph.writer = {
+      _: { role: "reviewer", prompt: "Review", location: null },
+      extra: { role: "$END", prompt: "Done", location: null },
+    };
    const errors = validateWorkflow(wf);
    expect(
      errors.some((e) =>
-        e.includes('role "writer" must use explicit $status keys in graph, not "_"'),
+        e.includes('role "writer" is single-exit but has status keys other than "_"'),
      ),
    ).toBe(true);
  });

-  test("3.2 user role graph key not matching $status enum", () => {
+  test("3.2 single-exit role missing _ key", () => {
    const wf = makeWorkflow();
-    wf.graph.writer = { wrong: { role: "reviewer", prompt: "Review", location: null } };
+    wf.graph.writer = { done: { role: "reviewer", prompt: "Review", location: null } };
    const errors = validateWorkflow(wf);
-    expect(errors.some((e) => e.includes('role "writer" graph has extra status keys: wrong'))).toBe(
-      true,
-    );
-    expect(errors.some((e) => e.includes('role "writer" graph is missing status keys: done'))).toBe(
-      true,
-    );
+    expect(
+      errors.some((e) => e.includes('role "writer" is single-exit but graph has no "_" key')),
+    ).toBe(true);
  });

  test("3.3 multi-exit role with extra statuses", () => {
@@ -244,11 +244,9 @@ describe("Suite 3: Status-Edge Consistency", () => {
    const wf = makeWorkflow();
    wf.graph.reviewer = { _: { role: "$END", prompt: "Done", location: null } };
    const errors = validateWorkflow(wf);
-    expect(
-      errors.some((e) =>
-        e.includes('role "reviewer" must use explicit $status keys in graph, not "_"'),
-      ),
-    ).toBe(true);
+    expect(errors.some((e) => e.includes('role "reviewer" is multi-exit but graph uses "_"'))).toBe(
+      true,
+    );
  });
 });

@@ -316,20 +314,20 @@ describe("Suite 3b: Enum-Based Multi-Exit", () => {
    expect(errors.some((e) => e.includes("missing status keys: rejected"))).toBe(true);
  });

-  test("3b.4 enum with single explicit value passes", () => {
+  test("3b.4 enum with single value (not multi-exit) treated as single-exit", () => {
    const wf = makeWorkflow();
    wf.roles.writer = {
      ...wf.roles.writer,
      frontmatter: {
        type: "object",
        properties: {
-          $status: { enum: ["ready"] },
+          $status: { enum: ["_"] },
          plan: { type: "string" },
        },
        required: ["$status", "plan"],
      } as unknown as string,
    };
-    wf.graph.writer = { ready: { role: "reviewer", prompt: "Review: {{{plan}}}", location: null } };
+    wf.graph.writer = { _: { role: "reviewer", prompt: "Review: {{{plan}}}", location: null } };
    const errors = validateWorkflow(wf);
    expect(errors).toEqual([]);
  });
@@ -357,15 +355,13 @@ describe("Suite 3b: Enum-Based Multi-Exit", () => {
 });

 describe("Suite 4: Mustache Template Variable Existence", () => {
-  test("4.1 prompt references nonexistent variable (enum status)", () => {
+  test("4.1 prompt references nonexistent variable (single-exit)", () => {
    const wf = makeWorkflow();
-    wf.graph.writer = {
-      done: { role: "reviewer", prompt: "Review: {{{branch}}}", location: null },
-    };
+    wf.graph.writer = { _: { role: "reviewer", prompt: "Review: {{{branch}}}", location: null } };
    const errors = validateWorkflow(wf);
    expect(
-      errors.some(
-        (e) => e.includes('prompt variable "branch"') && e.includes('role "writer" frontmatter'),
+      errors.some((e) =>
+        e.includes('prompt variable "branch" not found in role "writer" frontmatter'),
      ),
    ).toBe(true);
  });
@@ -392,7 +388,7 @@ describe("Suite 4: Mustache Template Variable Existence", () => {

  test("4.4 $status variable is always valid", () => {
    const wf = makeWorkflow();
-    wf.graph.writer = { done: { role: "reviewer", prompt: "Status: {{$status}}", location: null } };
+    wf.graph.writer = { _: { role: "reviewer", prompt: "Status: {{$status}}", location: null } };
    const errors = validateWorkflow(wf);
    expect(errors).toEqual([]);
  });
@@ -460,14 +456,14 @@ describe("Suite 6: Multiple Errors Collection", () => {
      output: "None",
      frontmatter: {
        type: "object",
-        properties: { $status: { enum: ["done"] } },
+        properties: { $status: { enum: ["_"] } },
        required: ["$status"],
      } as unknown as string,
    };
    // unknown graph reference
-    wf.graph.nonexistent = { done: { role: "$END", prompt: "done", location: null } };
+    wf.graph.nonexistent = { _: { role: "$END", prompt: "done", location: null } };
    // bad mustache var
-    wf.graph.writer = { done: { role: "reviewer", prompt: "{{{badvar}}}", location: null } };
+    wf.graph.writer = { _: { role: "reviewer", prompt: "{{{badvar}}}", location: null } };
    const errors = validateWorkflow(wf);
    expect(errors.length).toBeGreaterThanOrEqual(3);
  });
@@ -31,7 +31,7 @@ function makeMinimalPayload(name: string, description: string): WorkflowPayload
        frontmatter: {
          type: "object",
          properties: {
-            $status: { type: "string", enum: ["done"] },
+            $status: { type: "string" },
          },
          required: ["$status"],
        } as unknown as CasRef,
@@ -39,7 +39,7 @@ function makeMinimalPayload(name: string, description: string): WorkflowPayload
    },
    graph: {
      $START: { _: { role: "worker", prompt: "start working", location: null } },
-      worker: { done: { role: "$END", prompt: "done", location: null } },
+      worker: { _: { role: "$END", prompt: "done", location: null } },
    },
  };
 }
@@ -5,13 +5,14 @@ import { Command } from "commander";
 import { cmdConfigGet, cmdConfigList, cmdConfigSet } from "./commands/config.js";
 import { cmdLogClean, cmdLogList, cmdLogShow } from "./commands/log.js";
 import {
-  cmdPromptAdapterDeveloping,
+  cmdPromptAdapter,
+  cmdPromptAuthor,
  cmdPromptBootstrap,
+  cmdPromptDeveloper,
  cmdPromptList,
  cmdPromptSetup,
  cmdPromptUsage,
-  cmdPromptUsageReference,
-  cmdPromptWorkflowAuthoring,
+  cmdPromptUser,
 } from "./commands/prompt.js";
 import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js";
 import { cmdStepFork, cmdStepList, cmdStepRead, cmdStepShow } from "./commands/step.js";
@@ -522,24 +523,31 @@ prompt
  });

 prompt
-  .command("usage-reference")
-  .description("Print the usage reference (CLI guide + typical workflows)")
+  .command("adapter")
+  .description("Print the adapter reference (building agent adapters)")
  .action(() => {
-    console.log(cmdPromptUsageReference());
+    console.log(cmdPromptAdapter());
  });

 prompt
-  .command("workflow-authoring")
-  .description("Print the workflow authoring reference (YAML design guide)")
+  .command("author")
+  .description("Print the author reference (workflow YAML design guide)")
  .action(() => {
-    console.log(cmdPromptWorkflowAuthoring());
+    console.log(cmdPromptAuthor());
  });

 prompt
-  .command("adapter-developing")
-  .description("Print the adapter developing reference (building agent adapters)")
+  .command("developer")
+  .description("Print the developer reference (coding conventions + architecture)")
  .action(() => {
-    console.log(cmdPromptAdapterDeveloping());
+    console.log(cmdPromptDeveloper());
+  });
+
+prompt
+  .command("user")
+  .description("Print the user reference (CLI guide + typical workflows)")
+  .action(() => {
+    console.log(cmdPromptUser());
  });

 prompt
@@ -1,21 +1,24 @@
 import {
-  generateAdapterDevelopingReference,
+  generateAdapterReference,
+  generateAuthorReference,
  generateBootstrapReference,
-  generateUsageReference,
-  generateWorkflowAuthoringReference,
+  generateDeveloperReference,
+  generateUserReference,
 } from "@united-workforce/util";

 export {
-  generateAdapterDevelopingReference as cmdPromptAdapterDeveloping,
+  generateAdapterReference as cmdPromptAdapter,
+  generateAuthorReference as cmdPromptAuthor,
  generateBootstrapReference as cmdPromptBootstrap,
-  generateUsageReference as cmdPromptUsageReference,
-  generateWorkflowAuthoringReference as cmdPromptWorkflowAuthoring,
+  generateDeveloperReference as cmdPromptDeveloper,
+  generateUserReference as cmdPromptUser,
 };

 const PROMPT_ENTRIES: ReadonlyArray<{ name: string; generate: () => string }> = [
-  { name: "usage", generate: generateUsageReference },
-  { name: "workflow-authoring", generate: generateWorkflowAuthoringReference },
-  { name: "adapter-developing", generate: generateAdapterDevelopingReference },
+  { name: "user", generate: generateUserReference },
+  { name: "author", generate: generateAuthorReference },
+  { name: "developer", generate: generateDeveloperReference },
+  { name: "adapter", generate: generateAdapterReference },
  { name: "bootstrap", generate: generateBootstrapReference },
 ];

@@ -24,9 +27,7 @@ export function cmdPromptList(): ReadonlyArray<string> {
 }

 export function cmdPromptUsage(): string {
-  return PROMPT_ENTRIES.filter((e) => e.name !== "bootstrap")
-    .map((e) => e.generate())
-    .join("\n\n---\n\n");
+  return PROMPT_ENTRIES.map((e) => e.generate()).join("\n\n---\n\n");
 }

 export function cmdPromptSetup(): string {
@@ -41,7 +42,7 @@ so that you know how to use the \`uwf\` CLI for workflow orchestration.
   \`\`\`bash
   uwf --help
   \`\`\`
-   If not installed: \`npm install -g @united-workforce/cli\`
+   If not installed: \`bun add -g @united-workforce/cli\`

 2. **Clean up old versions of the skill** (if any exist):
   - Look for any existing uwf/workflow skill files in your skill directories
@@ -50,24 +51,43 @@ so that you know how to use the \`uwf\` CLI for workflow orchestration.
   - If the version matches the current CLI version (\`uwf --version\`), you're done — no update needed.
   - If the version is older, **delete the old skill file** before proceeding.

-3. **Install the bootstrap skill:**
+3. **Get the usage skill content:**
   \`\`\`bash
-   uwf prompt bootstrap > ~/.hermes/skills/uwf/SKILL.md
+   uwf prompt usage
   \`\`\`
-   The bootstrap prompt already includes complete YAML frontmatter — no editing needed.

-4. **Verify** the skill is loadable by your agent framework.
+4. **Create a SKILL.md file** in your skill directory (e.g. \`~/.hermes/skills/uwf/SKILL.md\`).
+   The output of \`uwf prompt usage\` is the skill body WITHOUT frontmatter.
+   You must prepend the following YAML frontmatter:
+
+   \`\`\`yaml
+   ---
+   name: uwf
+   description: "Use when orchestrating multi-agent workflows with uwf (United Workforce). Covers CLI usage, workflow YAML authoring, moderator logic, and agent development."
+   version: <VERSION>
+   author: United Workforce
+   license: MIT
+   metadata:
+     hermes:
+       tags: [workflow, agents, orchestration, cli, yaml]
+   ---
+   \`\`\`
+
+   Replace \`<VERSION>\` with the installed CLI version (from \`uwf --version\`).
+
+5. **Verify** the skill is loadable by your agent framework.

 ## Individual prompts

-Each prompt outputs a complete SKILL.md with frontmatter — pipe directly to a file:
+You can also get individual reference sections:

 \`\`\`bash
-uwf prompt list                                              # list available prompt names
-uwf prompt usage > ~/.hermes/skills/uwf-usage/SKILL.md      # CLI usage guide
-uwf prompt workflow-authoring > ~/.hermes/skills/uwf-workflow-authoring/SKILL.md
-uwf prompt adapter-developing > ~/.hermes/skills/uwf-adapter-developing/SKILL.md
-uwf prompt bootstrap > ~/.hermes/skills/uwf/SKILL.md        # bootstrap skill
+uwf prompt list                # list available prompt names
+uwf prompt user                # user reference (CLI guide + typical workflows)
+uwf prompt author              # author reference (workflow YAML design guide)
+uwf prompt developer           # developer reference (coding conventions + architecture)
+uwf prompt adapter             # adapter reference (building agent adapters)
+uwf prompt bootstrap           # bootstrap skill YAML for Hermes agents
 \`\`\`

 ## Notes
@@ -6,7 +6,7 @@ import type {
  StepNodePayload,
  ThreadId,
 } from "@united-workforce/protocol";
-import { createUwfStore, getThread, type UwfStore } from "../store.js";
+import { createUwfStore, findHistoryEntry, getThread, type UwfStore } from "../store.js";

 type ChainState = {
  startHash: CasRef;
@@ -207,6 +207,10 @@ async function resolveHeadHash(storageRoot: string, threadId: ThreadId): Promise
  if (entry !== null) {
    return entry.head;
  }
+  const hist = findHistoryEntry(uwf.varStore, threadId);
+  if (hist !== null) {
+    return hist.head;
+  }
  fail(`thread not found: ${threadId}`);
 }

@@ -66,7 +66,6 @@ export async function cmdStepList(
      agent: item.payload.agent,
      timestamp: item.timestamp,
      durationMs: item.payload.completedAtMs - item.payload.startedAtMs,
-      usage: item.payload.usage ?? null,
    });
  }

@@ -115,10 +114,8 @@ export async function cmdStepFork(
  const newThreadId = generateUlid(Date.now()) as ThreadId;
  setThread(uwf.varStore, newThreadId, {
    head: stepHash,
-    status: "idle",
    suspendedRole: null,
    suspendMessage: null,
-    completedAt: null,
  });

  return {
@@ -38,14 +38,17 @@ import { createMarker, deleteMarker, isThreadRunning } from "../background/index
 import { createIncludeTag } from "../include.js";
 import { evaluate, isSuspendResult } from "../moderator/index.js";
 import {
-  completeThread,
+  addHistoryEntry,
  createUwfStore,
+  deleteThread,
+  findHistoryEntry,
  getThread,
-  loadActiveThreads,
-  loadHistoryThreads,
+  loadAllHistory,
+  loadAllThreads,
  loadWorkflowRegistry,
  resolveWorkflowHash,
  setThread,
+  type ThreadHistoryLine,
  type UwfStore,
 } from "../store.js";
 import { checkWorkflowFilenameConsistency, isCasRef, parseWorkflowPayload } from "../validate.js";
@@ -482,55 +485,61 @@ export async function cmdThreadShow(
 ): Promise<ThreadShowOutput> {
  const uwf = await createUwfStore(storageRoot);
  const entry = getThread(uwf.varStore, threadId);
-  if (entry === null) {
-    fail(`thread not found: ${threadId}`);
-  }
+  if (entry !== null) {
+    const activeHead = entry.head;
+    const workflow = resolveWorkflowFromHead(uwf, activeHead);
+    if (workflow === null) {
+      fail(`failed to resolve workflow from head: ${activeHead}`);
+    }

-  const activeHead = entry.head;
-  const workflow = resolveWorkflowFromHead(uwf, activeHead);
-  if (workflow === null) {
-    fail(`failed to resolve workflow from head: ${activeHead}`);
-  }
+    const status = await resolveActiveThreadStatus(
+      storageRoot,
+      threadId,
+      uwf,
+      activeHead,
+      workflow,
+    );
+    const currentRole = resolveCurrentRole(uwf, activeHead, workflow);
+    const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, activeHead, workflow);
+
+    const hint =
+      status === "suspended"
+        ? `Thread is suspended. Resume with: uwf thread resume ${threadId}`
+        : null;

-  // Determine if this is a completed/cancelled thread
-  if (entry.status === "completed" || entry.status === "cancelled") {
-    const hint = null;
    return {
      workflow,
      thread: threadId,
      head: activeHead,
-      status: entry.status,
-      currentRole: null,
-      suspendedRole: null,
-      suspendMessage: null,
-      done: true,
+      status,
+      currentRole,
+      suspendedRole: suspendFields.suspendedRole,
+      suspendMessage: suspendFields.suspendMessage,
+      done: false,
      background: null,
      hint,
    };
  }

-  // Active thread
-  const status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, activeHead, workflow);
-  const currentRole = resolveCurrentRole(uwf, activeHead, workflow);
-  const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, activeHead, workflow);
+  const hist = findHistoryEntry(uwf.varStore, threadId);
+  if (hist !== null) {
+    const status: ThreadStatus = hist.reason === "cancelled" ? "cancelled" : "completed";

-  const hint =
-    status === "suspended"
-      ? `Thread is suspended. Resume with: uwf thread resume ${threadId}`
-      : null;
+    return {
+      workflow: hist.workflow,
+      thread: threadId,
+      head: hist.head,
+      status,
+      currentRole: null,
+      suspendedRole: null,
+      suspendMessage: null,
+      done: true,
+      background: null,
+      hint: null,
+    };
+  }

-  return {
-    workflow,
-    thread: threadId,
-    head: activeHead,
-    status,
-    currentRole,
-    suspendedRole: suspendFields.suspendedRole,
-    suspendMessage: suspendFields.suspendMessage,
-    done: false,
-    background: null,
-    hint,
-  };
+  fail(`thread not found: ${threadId}`);
 }

 export type ThreadListItemWithStatus = ThreadListItem & {
@@ -585,20 +594,19 @@ async function collectActiveThreads(
 }

 function collectCompletedThreads(
-  uwf: UwfStore,
+  varStore: VarStore,
  activeIds: Set<ThreadId>,
 ): ThreadListItemWithStatus[] {
  const items: ThreadListItemWithStatus[] = [];
-  const history = loadHistoryThreads(uwf.varStore);
+  const history = loadAllHistory(varStore);
  const seen = new Set<ThreadId>(); // Deduplication (issue #470)
-  for (const [threadId, entry] of Object.entries(history)) {
-    if (!activeIds.has(threadId as ThreadId) && !seen.has(threadId as ThreadId)) {
-      seen.add(threadId as ThreadId);
-      const status = entry.status;
-      const workflow = resolveWorkflowFromHead(uwf, entry.head);
+  for (const entry of history) {
+    if (!activeIds.has(entry.thread) && !seen.has(entry.thread)) {
+      seen.add(entry.thread);
+      const status = entry.reason === "cancelled" ? "cancelled" : "completed";
      items.push({
-        thread: threadId as ThreadId,
-        workflow: workflow ?? "",
+        thread: entry.thread,
+        workflow: entry.workflow,
        head: entry.head,
        status,
        currentRole: null,
@@ -651,7 +659,7 @@ export async function cmdThreadList(
  take: number | null,
 ): Promise<ThreadListItemWithStatus[]> {
  const uwf = await createUwfStore(storageRoot);
-  const index = loadActiveThreads(uwf.varStore);
+  const index = loadAllThreads(uwf.varStore);

  // Collect active threads
  let items = await collectActiveThreads(storageRoot, uwf, index);
@@ -663,7 +671,7 @@ export async function cmdThreadList(
    statusFilter.includes("cancelled");
  if (includeCompleted) {
    const activeIds = new Set(items.map((i) => i.thread));
-    const completedItems = collectCompletedThreads(uwf, activeIds);
+    const completedItems = collectCompletedThreads(uwf.varStore, activeIds);
    items = items.concat(completedItems);
  }

@@ -961,12 +969,6 @@ function resolveAgentConfig(
  agentOverride: string | null,
 ): AgentConfig {
  if (agentOverride !== null) {
-    // Try config alias first (e.g. "hermes" → config.agents.hermes),
-    // then fall back to raw command name (e.g. "uwf-hermes" or "/usr/bin/agent").
-    const fromAlias = config.agents[agentOverride as AgentAlias];
-    if (fromAlias !== undefined) {
-      return fromAlias;
-    }
    return parseAgentOverride(agentOverride);
  }

@@ -1033,11 +1035,17 @@ function spawnAgent(
  return obj as unknown as AdapterOutput;
 }

-function archiveThread(uwf: UwfStore, threadId: ThreadId, _workflow: CasRef, _head: CasRef): void {
-  completeThread(uwf.varStore, threadId, "completed");
+function archiveThread(uwf: UwfStore, threadId: ThreadId, workflow: CasRef, head: CasRef): void {
+  deleteThread(uwf.varStore, threadId);
+  addHistoryEntry(uwf.varStore, {
+    thread: threadId,
+    workflow,
+    head,
+    completedAt: Date.now(),
+    reason: "completed",
+  });
 }

-// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: orchestration function with inherent branching
 export async function cmdThreadResume(
  storageRoot: string,
  threadId: ThreadId,
@@ -1059,87 +1067,43 @@ export async function cmdThreadResume(
  const chain = walkChain(uwf, headHash);
  const workflowHash = chain.start.workflow;

-  // Check entry.status first for completed/cancelled (like in cmdThreadShow)
-  let status: ThreadStatus;
-  if (entry.status === "completed" || entry.status === "cancelled") {
-    status = entry.status;
-  } else {
-    status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, headHash, workflowHash);
+  const status = await resolveActiveThreadStatus(
+    storageRoot,
+    threadId,
+    uwf,
+    headHash,
+    workflowHash,
+  );
+  if (status !== "suspended") {
+    fail(`thread is not suspended: ${threadId} (status: ${status})`);
  }

-  if (status !== "suspended" && status !== "completed") {
-    fail(`thread cannot be resumed: ${threadId} (status: ${status})`);
+  const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, headHash, workflowHash);
+  if (suspendFields.suspendedRole === null) {
+    fail(`thread is suspended but suspendedRole is missing: ${threadId}`);
+  }
+  if (suspendFields.suspendMessage === null) {
+    fail(`thread is suspended but suspendMessage is missing: ${threadId}`);
  }

+  const resumePrompt = buildResumePrompt(suspendFields.suspendMessage, supplement);
  const plog = createProcessLogger({
    storageRoot,
    context: { thread: threadId, workflow: workflowHash },
  });

-  if (status === "suspended") {
-    const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, headHash, workflowHash);
-    if (suspendFields.suspendedRole === null) {
-      fail(`thread is suspended but suspendedRole is missing: ${threadId}`);
-    }
-    if (suspendFields.suspendMessage === null) {
-      fail(`thread is suspended but suspendMessage is missing: ${threadId}`);
-    }
-
-    const resumePrompt = buildResumePrompt(suspendFields.suspendMessage, supplement);
-
-    plog.log(
-      PL_THREAD_RESUME,
-      `resume role=${suspendFields.suspendedRole} supplement=${supplement !== null}`,
-      null,
-    );
-
-    return cmdThreadStepOnce(storageRoot, threadId, agentOverride, plog, {
-      role: suspendFields.suspendedRole,
-      prompt: resumePrompt,
-    });
-  }
-
-  // status === "completed"
-  const workflow = loadWorkflowPayload(uwf, workflowHash);
-  const startResult = evaluate(workflow.graph, START_ROLE, {});
-  if (!startResult.ok) {
-    fail(`failed to evaluate $START: ${startResult.error.message}`);
-  }
-  if (isSuspendResult(startResult.value)) {
-    fail("workflow cannot start with $SUSPEND");
-  }
-  if (startResult.value.role === END_ROLE) {
-    fail("workflow cannot start with $END");
-  }
-
-  const startRole = startResult.value.role;
-  const completedPromptPrefix = "Previous run completed. Resuming with additional context.";
-  const completedResumePrompt =
-    supplement !== null && supplement !== ""
-      ? `${completedPromptPrefix}\n\n${supplement}`
-      : completedPromptPrefix;
-
-  const updatedEntry = { ...entry, status: "idle" as const, completedAt: null };
-  setThread(uwf.varStore, threadId, updatedEntry);
-
  plog.log(
    PL_THREAD_RESUME,
-    `resume completed role=${startRole} supplement=${supplement !== null}`,
+    `resume role=${suspendFields.suspendedRole} supplement=${supplement !== null}`,
    null,
  );

  return cmdThreadStepOnce(storageRoot, threadId, agentOverride, plog, {
-    role: startRole,
-    prompt: completedResumePrompt,
+    role: suspendFields.suspendedRole,
+    prompt: resumePrompt,
  });
 }

-export function validateCount(count: number): void {
-  if (count < 1 || !Number.isInteger(count)) {
-    throw new Error(`--count must be a positive integer, got: ${count}`);
-  }
-}
-
 export async function cmdThreadExec(
  storageRoot: string,
  threadId: ThreadId,
@@ -1148,7 +1112,9 @@ export async function cmdThreadExec(
  background: boolean,
  backgroundWorker: boolean,
 ): Promise<StepOutput[]> {
-  validateCount(count);
+  if (count < 1 || !Number.isInteger(count)) {
+    fail(`--count must be a positive integer, got: ${count}`);
+  }

  // Check if thread is already running in background (unless we ARE the background worker)
  if (!backgroundWorker) {
@@ -1283,7 +1249,7 @@ function resolveResumeStepTarget(
 }

 async function resolveModeratorStepTarget(
-  _storageRoot: string,
+  storageRoot: string,
  threadId: ThreadId,
  entry: ThreadIndexEntry,
  headHash: CasRef,
@@ -1352,7 +1318,7 @@ async function resolveModeratorStepTarget(
 }

 async function finalizeAgentStep(
-  _storageRoot: string,
+  storageRoot: string,
  threadId: ThreadId,
  workflowHash: CasRef,
  workflow: WorkflowPayload,
@@ -1484,6 +1450,10 @@ async function resolveHeadHash(storageRoot: string, threadId: ThreadId): Promise
  if (entry !== null) {
    return entry.head;
  }
+  const hist = findHistoryEntry(uwf.varStore, threadId);
+  if (hist !== null) {
+    return hist.head;
+  }
  fail(`thread not found: ${threadId}`);
 }

@@ -1563,6 +1533,7 @@ export async function cmdThreadCancel(
  if (entry === null) {
    fail(`thread not active: ${threadId}`);
  }
+  const head = entry.head;

  // Check if thread is running in background and terminate it
  const runningMarker = await isThreadRunning(storageRoot, threadId);
@@ -1575,7 +1546,21 @@ export async function cmdThreadCancel(
    await deleteMarker(storageRoot, threadId);
  }

-  completeThread(uwf.varStore, threadId, "cancelled");
+  const workflow = resolveWorkflowFromHead(uwf, head);
+  if (workflow === null) {
+    fail(`failed to resolve workflow from head: ${head}`);
+  }
+
+  deleteThread(uwf.varStore, threadId);
+
+  const historyEntry: ThreadHistoryLine = {
+    thread: threadId,
+    workflow,
+    head,
+    completedAt: Date.now(),
+    reason: "cancelled",
+  };
+  addHistoryEntry(uwf.varStore, historyEntry);

  return { thread: threadId, cancelled: true };
 }
@@ -8,8 +8,7 @@ mustache.escape = (text: string) => text;

 const START_ROLE = "$START";
 const SUSPEND_ROLE = "$SUSPEND";
-// $START is a special entry node with no agent output — it always uses this key.
-const START_STATUS = "_";
+const UNIT_STATUS = "_";

 type LastOutput = Record<string, unknown>;

@@ -20,17 +19,12 @@ export function evaluate(
  lastRole: string,
  lastOutput: LastOutput,
 ): Result<EvaluateResult, Error> {
-  let status: string;
-  if (lastRole === START_ROLE) {
-    status = START_STATUS;
-  } else if (typeof lastOutput[STATUS_KEY] === "string") {
-    status = lastOutput[STATUS_KEY] as string;
-  } else {
-    return {
-      ok: false,
-      error: new Error(`agent output for role "${lastRole}" is missing required "$status" string`),
-    };
-  }
+  const status =
+    lastRole === START_ROLE
+      ? UNIT_STATUS
+      : typeof lastOutput[STATUS_KEY] === "string"
+        ? (lastOutput[STATUS_KEY] as string)
+        : UNIT_STATUS;

  const roleTargets = graph[lastRole];
  if (roleTargets === undefined) {
@@ -6,7 +6,13 @@ import { join } from "node:path";

 import { bootstrap, type Hash, type Store, type VarStore } from "@ocas/core";
 import { createFsStore, createSqliteVarStore } from "@ocas/fs";
-import type { CasRef, ThreadId, ThreadIndexEntry, ThreadsIndex } from "@united-workforce/protocol";
+import type {
+  CasRef,
+  ThreadId,
+  ThreadIndexEntry,
+  ThreadListItem,
+  ThreadsIndex,
+} from "@united-workforce/protocol";
 import { parseThreadsIndex } from "@united-workforce/protocol";
 import { parse } from "yaml";

@@ -20,6 +26,9 @@ export const REGISTRY_VAR_PREFIX = "@uwf/registry/";
 /** Variable name prefix for active thread entries (`@uwf/thread/<thread-id>`). */
 export const THREAD_VAR_PREFIX = "@uwf/thread/";

+/** Variable name prefix for completed/cancelled thread history (`@uwf/history/<thread-id>`). */
+export const HISTORY_VAR_PREFIX = "@uwf/history/";
+
 /** A workflow entry discovered from the project-local .workflows/ directory. */
 export type ProjectWorkflowEntry = {
  /** Workflow name (from YAML `name` field, equals filename stem). */
@@ -147,6 +156,11 @@ export function getThreadsPath(storageRoot: string): string {
  return join(storageRoot, "threads.yaml");
 }

+export type ThreadHistoryLine = ThreadListItem & {
+  completedAt: number;
+  reason: "completed" | "cancelled" | null;
+};
+
 export type UwfStore = {
  storageRoot: string;
  store: Store;
@@ -165,7 +179,6 @@ export async function createUwfStore(storageRoot: string): Promise<UwfStore> {
  await migrateWorkflowRegistryIfNeeded(storageRoot, varStore);
  await migrateThreadsIndexIfNeeded(storageRoot, varStore);
  await migrateHistoryIfNeeded(storageRoot, varStore);
-  migrateHistoryVarsToThreadVars(varStore);
  return { storageRoot, store, schemas, varStore };
 }

@@ -286,10 +299,8 @@ function threadVarName(threadId: ThreadId): string {
 function entryFromVariable(v: { value: string; tags: Record<string, string> }): ThreadIndexEntry {
  return {
    head: v.value as CasRef,
-    status: (v.tags.status ?? "idle") as ThreadIndexEntry["status"],
    suspendedRole: v.tags.suspendedRole ?? null,
    suspendMessage: v.tags.suspendMessage ?? null,
-    completedAt: v.tags.completedAt !== undefined ? Number(v.tags.completedAt) : null,
  };
 }

@@ -320,74 +331,21 @@ export function setThread(varStore: VarStore, threadId: ThreadId, entry: ThreadI
  // Head CAS nodes may use different schemas (StartNode vs StepNode) — clear all variants first.
  varStore.remove(name);
  const tags: Record<string, string> = {};
-  if (entry.status !== "idle") {
-    tags.status = entry.status;
-  }
  if (entry.suspendedRole !== null) {
    tags.suspendedRole = entry.suspendedRole;
  }
  if (entry.suspendMessage !== null) {
    tags.suspendMessage = entry.suspendMessage;
  }
-  if (entry.completedAt !== null) {
-    tags.completedAt = String(entry.completedAt);
-  }
  varStore.set(name, entry.head, { tags });
 }

-/** Load only active threads (status not in completed/cancelled). */
-export function loadActiveThreads(varStore: VarStore): ThreadsIndex {
-  const all = loadAllThreads(varStore);
-  const active: ThreadsIndex = {};
-  for (const [threadId, entry] of Object.entries(all)) {
-    if (entry.status !== "completed" && entry.status !== "cancelled") {
-      active[threadId as ThreadId] = entry;
-    }
-  }
-  return active;
+/** Remove an active thread entry (on complete/cancel). */
+export function deleteThread(varStore: VarStore, threadId: ThreadId): void {
+  varStore.remove(threadVarName(threadId));
 }

-/** Load only completed/cancelled threads (history). */
-export function loadHistoryThreads(varStore: VarStore): ThreadsIndex {
-  const all = loadAllThreads(varStore);
-  const history: ThreadsIndex = {};
-  for (const [threadId, entry] of Object.entries(all)) {
-    if (entry.status === "completed" || entry.status === "cancelled") {
-      history[threadId as ThreadId] = entry;
-    }
-  }
-  return history;
-}
-
-/** Complete a thread by marking it completed or cancelled. */
-export function completeThread(
-  varStore: VarStore,
-  threadId: ThreadId,
-  reason: "completed" | "cancelled",
-): void {
-  const entry = getThread(varStore, threadId);
-  if (entry === null) {
-    return;
-  }
-  const completed = {
-    head: entry.head,
-    status: reason,
-    suspendedRole: null,
-    suspendMessage: null,
-    completedAt: Date.now(),
-  } as ThreadIndexEntry;
-  setThread(varStore, threadId, completed);
-}
-
-type LegacyHistoryEntry = {
-  thread: ThreadId;
-  workflow: CasRef;
-  head: CasRef;
-  completedAt: number;
-  reason: "completed" | "cancelled" | null;
-};
-
-function parseLegacyHistoryJsonlLine(trimmed: string): LegacyHistoryEntry | null {
+function parseHistoryJsonlLine(trimmed: string): ThreadHistoryLine | null {
  let raw: unknown;
  try {
    raw = JSON.parse(trimmed) as unknown;
@@ -421,7 +379,7 @@ function parseLegacyHistoryJsonlLine(trimmed: string): LegacyHistoryEntry | null
  return null;
 }

-/** One-time migration: `~/.uwf/history.jsonl` → `@uwf/thread/*` variables with status tags. */
+/** One-time migration: `~/.uwf/history.jsonl` → `@uwf/history/*` variables. */
 export async function migrateHistoryIfNeeded(
  storageRoot: string,
  varStore: VarStore,
@@ -437,43 +395,47 @@ export async function migrateHistoryIfNeeded(
    if (trimmed === "") {
      continue;
    }
-    const entry = parseLegacyHistoryJsonlLine(trimmed);
+    const entry = parseHistoryJsonlLine(trimmed);
    if (entry !== null) {
-      const status = entry.reason === "cancelled" ? "cancelled" : "completed";
-      const threadEntry: ThreadIndexEntry = {
-        head: entry.head,
-        status: status as ThreadIndexEntry["status"],
-        suspendedRole: null,
-        suspendMessage: null,
-        completedAt: entry.completedAt,
-      };
-      setThread(varStore, entry.thread, threadEntry);
+      addHistoryEntry(varStore, entry);
    }
  }

  await rename(path, `${path}.migrated`);
 }

-/** Migrate `@uwf/history/*` variables to `@uwf/thread/*` with status tags. */
-export function migrateHistoryVarsToThreadVars(varStore: VarStore): void {
-  const LEGACY_HISTORY_VAR_PREFIX = "@uwf/history/";
-  const vars = varStore.list({ namePrefix: LEGACY_HISTORY_VAR_PREFIX });
-
-  for (const v of vars) {
-    const threadId = v.name.slice(LEGACY_HISTORY_VAR_PREFIX.length) as ThreadId;
-    const reason = v.tags.reason;
-    const status = reason === "cancelled" ? "cancelled" : "completed";
-    const completedAt = Number(v.tags.completedAt ?? Date.now());
-
-    const threadEntry: ThreadIndexEntry = {
-      head: v.value as CasRef,
-      status: status as ThreadIndexEntry["status"],
-      suspendedRole: null,
-      suspendMessage: null,
-      completedAt,
-    };
-
-    setThread(varStore, threadId, threadEntry);
-    varStore.remove(v.name);
-  }
+export function loadAllHistory(varStore: VarStore): ThreadHistoryLine[] {
+  const vars = varStore.list({ namePrefix: HISTORY_VAR_PREFIX });
+  return vars.map((v) => ({
+    thread: v.name.slice(HISTORY_VAR_PREFIX.length) as ThreadId,
+    workflow: v.tags.workflow ?? "",
+    head: v.value as CasRef,
+    completedAt: Number(v.tags.completedAt ?? "0"),
+    reason: v.tags.reason === "completed" || v.tags.reason === "cancelled" ? v.tags.reason : null,
+  }));
+}
+
+export function findHistoryEntry(varStore: VarStore, threadId: ThreadId): ThreadHistoryLine | null {
+  const vars = varStore.list({ namePrefix: `${HISTORY_VAR_PREFIX}${threadId}` });
+  const v = vars.find((entry) => entry.name === `${HISTORY_VAR_PREFIX}${threadId}`);
+  if (v === undefined) {
+    return null;
+  }
+  return {
+    thread: threadId,
+    workflow: v.tags.workflow ?? "",
+    head: v.value as CasRef,
+    completedAt: Number(v.tags.completedAt ?? "0"),
+    reason: v.tags.reason === "completed" || v.tags.reason === "cancelled" ? v.tags.reason : null,
+  };
+}
+
+export function addHistoryEntry(varStore: VarStore, entry: ThreadHistoryLine): void {
+  varStore.set(`${HISTORY_VAR_PREFIX}${entry.thread}`, entry.head, {
+    tags: {
+      workflow: entry.workflow,
+      completedAt: String(entry.completedAt),
+      reason: entry.reason ?? "completed",
+    },
+  });
 }
@@ -24,13 +24,17 @@ function isOneOfSchema(fm: unknown): fm is SchemaObj & { oneOf: SchemaObj[] } {
  return Array.isArray(obj.oneOf);
 }

-/** Check if a frontmatter schema declares "$status" as an enum (the required form for user roles). */
-function hasStatusEnum(fm: unknown): boolean {
+/** Check if a frontmatter schema uses enum-based multi-exit ($status with multiple enum values). */
+function isEnumMultiExit(fm: unknown): boolean {
  if (typeof fm !== "object" || fm === null) return false;
  const obj = fm as SchemaObj;
  const props = obj.properties as Record<string, SchemaObj> | undefined;
  if (!props?.$status) return false;
-  return Array.isArray(props.$status.enum);
+  const statusDef = props.$status;
+  if (!Array.isArray(statusDef.enum)) return false;
+  // Filter out "_" (wildcard) — if remaining values > 1, it's multi-exit
+  const statuses = (statusDef.enum as string[]).filter((s) => s !== "_");
+  return statuses.length > 1;
 }

 /** Extract status values from an enum-based $status field. */
@@ -39,7 +43,7 @@ function getEnumStatuses(fm: SchemaObj): string[] {
  if (!props?.$status) return [];
  const statusDef = props.$status;
  if (!Array.isArray(statusDef.enum)) return [];
-  return statusDef.enum as string[];
+  return (statusDef.enum as string[]).filter((s) => s !== "_");
 }

 /** Get property names from a schema object. */
@@ -190,19 +194,15 @@ function checkOneOfDiscriminant(
  }
 }

-/** Check status-edge consistency for a user role. "_" is reserved for $START and rejected here. */
-function checkStatusEdges(
+/** Check status-edge consistency for a multi-exit role. */
+function checkMultiExitEdges(
  roleName: string,
  graphKeys: Set<string>,
  statusSet: Set<string>,
  errors: string[],
 ): void {
  if (graphKeys.has("_")) {
-    errors.push(`role "${roleName}" must use explicit $status keys in graph, not "_"`);
-    return;
-  }
-  if (statusSet.has("_")) {
-    errors.push(`role "${roleName}" $status enum must use explicit values, not "_"`);
+    errors.push(`role "${roleName}" is multi-exit but graph uses "_"`);
    return;
  }

@@ -255,23 +255,50 @@ function checkRoleConsistency(payload: WorkflowPayload, errors: string[]): void
      const statuses = getOneOfStatuses(variants);

      checkOneOfDiscriminant(roleName, variants, statuses, errors);
-      checkStatusEdges(roleName, graphKeys, new Set(statuses), errors);
+      checkMultiExitEdges(roleName, graphKeys, new Set(statuses), errors);
      checkMultiExitMustache(roleName, graphEntry, variants, errors);
-    } else if (hasStatusEnum(fm)) {
+    } else if (isEnumMultiExit(fm)) {
      const statuses = getEnumStatuses(fm as SchemaObj);
-      checkStatusEdges(roleName, graphKeys, new Set(statuses), errors);
+      checkMultiExitEdges(roleName, graphKeys, new Set(statuses), errors);
      // For enum-based schemas, mustache vars come from the flat properties
-      checkEnumMustache(roleName, graphEntry, fm as SchemaObj, errors);
+      checkSingleExitMustache(roleName, graphEntry, fm as SchemaObj, errors);
    } else {
-      errors.push(
-        `role "${roleName}" must define "$status" as an enum (or oneOf const) in frontmatter`,
-      );
+      checkSingleExitRole(roleName, graphKeys, graphEntry, fm as SchemaObj | null, errors);
+    }
+  }
+}
+
+/** Check single-exit role status and mustache. */
+function checkSingleExitRole(
+  roleName: string,
+  graphKeys: Set<string>,
+  graphEntry: Record<string, { role: string; prompt: string }>,
+  fm: SchemaObj | null,
+  errors: string[],
+): void {
+  if (graphKeys.size > 1 || (graphKeys.size === 1 && !graphKeys.has("_"))) {
+    if (!graphKeys.has("_")) {
+      errors.push(`role "${roleName}" is single-exit but graph has no "_" key`);
+    } else {
+      errors.push(`role "${roleName}" is single-exit but has status keys other than "_"`);
+    }
+  }
+
+  const singleTarget = graphEntry._;
+  if (!singleTarget) return;
+
+  const vars = extractMustacheVars(singleTarget.prompt);
+  const propNames = fm ? getPropertyNames(fm) : new Set<string>();
+  for (const v of vars) {
+    if (v === "$status") continue;
+    if (!propNames.has(v)) {
+      errors.push(`prompt variable "${v}" not found in role "${roleName}" frontmatter`);
    }
  }
 }

 /** Check mustache vars in all edge prompts against flat schema properties. */
-function checkEnumMustache(
+function checkSingleExitMustache(
  roleName: string,
  graphEntry: Record<string, { role: string; prompt: string }>,
  fm: SchemaObj,
@@ -57,18 +57,9 @@ function isGraph(value: unknown): boolean {
  if (!isRecord(value)) {
    return false;
  }
-  return Object.entries(value).every(([node, statusMap]) => {
-    if (!isRecord(statusMap)) {
-      return false;
-    }
-    return Object.entries(statusMap).every(([status, target]) => {
-      // "_" is only valid as a status key for the $START entry node.
-      if (status === "_" && node !== "$START") {
-        return false;
-      }
-      return isTarget(target);
-    });
-  });
+  return Object.values(value).every(
+    (statusMap) => isRecord(statusMap) && Object.values(statusMap).every((t) => isTarget(t)),
+  );
 }

 /**
@@ -105,7 +96,6 @@ export function checkWorkflowFilenameConsistency(
 }

 /** Validate YAML-parsed workflow document shape (outputSchema may be inline JSON Schema). */
-// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: validation function with many field checks
 export function parseWorkflowPayload(raw: unknown): WorkflowPayload | null {
  if (!isRecord(raw)) {
    return null;
@@ -1,6 +1,6 @@
 {
  "name": "@united-workforce/dashboard",
-  "version": "0.1.0",
+  "version": "0.5.0-alpha.4",
  "private": true,
  "type": "module",
  "scripts": {
@@ -1,9 +0,0 @@
-# @united-workforce/eval
-
-## 0.1.2
-
-### Patch Changes
-
- 850a3b2: fix: resolve --agent override via config alias before raw command
-
-  `resolveAgentConfig()` now checks `config.agents[alias]` first before falling back to `parseAgentOverride()`. Eval CLI default `--agent` changed from `"hermes"` to `"uwf-hermes"`.
@@ -1,219 +0,0 @@
-import type { StepEntry } from "@united-workforce/protocol";
-import { beforeEach, describe, expect, test, vi } from "vitest";
-
-import {
-  runFrontmatterJudge,
-  runHallucinationJudge,
-  runTokenStatsJudge,
-  runUpstreamJudge,
-} from "../src/judge/builtin/index.js";
-
-// Mock the shared read-steps helper so the judges never shell out to `uwf`.
-vi.mock("../src/judge/builtin/read-steps.js", () => ({
-  readThreadSteps: vi.fn(),
-}));
-
-import { readThreadSteps } from "../src/judge/builtin/read-steps.js";
-
-const mockedReadSteps = vi.mocked(readThreadSteps);
-
-function makeStep(overrides: Partial<StepEntry>): StepEntry {
-  return {
-    hash: "HASH000000000",
-    role: "worker",
-    output: "---\n$status: done\n---\n\nbody",
-    detail: "DETAIL0000000",
-    agent: "hermes",
-    timestamp: 0,
-    durationMs: 0,
-    usage: null,
-    ...overrides,
-  };
-}
-
-beforeEach(() => {
-  mockedReadSteps.mockReset();
-});
-
-describe("frontmatter-compliance judge", () => {
-  test("all steps have valid frontmatter → score 1.0", async () => {
-    mockedReadSteps.mockReturnValue([
-      makeStep({ role: "a", output: "---\n$status: done\n---\n\nwork" }),
-      makeStep({ role: "b", output: "---\n$status: needs_input\n---\nmore" }),
-    ]);
-
-    const result = await runFrontmatterJudge("T1");
-    const data = result.data as { stepsTotal: number; stepsValid: number; invalidSteps: unknown[] };
-
-    expect(result.score).toBe(1.0);
-    expect(data.stepsTotal).toBe(2);
-    expect(data.stepsValid).toBe(2);
-    expect(data.invalidSteps).toHaveLength(0);
-  });
-
-  test("some steps missing $status → partial score", async () => {
-    mockedReadSteps.mockReturnValue([
-      makeStep({ role: "a", output: "---\n$status: done\n---\nok" }),
-      makeStep({ role: "b", output: "---\nfoo: bar\n---\nmissing status" }),
-      makeStep({ role: "c", output: "no frontmatter at all" }),
-    ]);
-
-    const result = await runFrontmatterJudge("T2");
-    const data = result.data as {
-      stepsTotal: number;
-      stepsValid: number;
-      invalidSteps: Array<{ stepIndex: number; role: string; errors: string[] }>;
-    };
-
-    expect(result.score).toBeCloseTo(1 / 3, 10);
-    expect(data.stepsTotal).toBe(3);
-    expect(data.stepsValid).toBe(1);
-    expect(data.invalidSteps).toHaveLength(2);
-    expect(data.invalidSteps[0]).toMatchObject({ stepIndex: 1, role: "b" });
-    expect(data.invalidSteps[1]).toMatchObject({ stepIndex: 2, role: "c" });
-  });
-
-  test("no steps → score 0 (0/0 edge case)", async () => {
-    mockedReadSteps.mockReturnValue([]);
-
-    const result = await runFrontmatterJudge("T3");
-    const data = result.data as { stepsTotal: number; stepsValid: number; invalidSteps: unknown[] };
-
-    expect(result.score).toBe(0);
-    expect(data.stepsTotal).toBe(0);
-    expect(data.stepsValid).toBe(0);
-    expect(data.invalidSteps).toHaveLength(0);
-  });
-
-  test("empty-string $status counts as invalid", async () => {
-    mockedReadSteps.mockReturnValue([makeStep({ role: "a", output: '---\n$status: ""\n---\nx' })]);
-
-    const result = await runFrontmatterJudge("T4");
-    expect(result.score).toBe(0);
-  });
-
-  test("parsed object output with $status → score 1.0", async () => {
-    mockedReadSteps.mockReturnValue([
-      makeStep({ role: "a", output: { $status: "done", summary: "fixed" } as unknown as string }),
-      makeStep({ role: "b", output: { $status: "reviewed" } as unknown as string }),
-    ]);
-
-    const result = await runFrontmatterJudge("T5");
-    const data = result.data as { stepsTotal: number; stepsValid: number; invalidSteps: unknown[] };
-
-    expect(result.score).toBe(1.0);
-    expect(data.stepsTotal).toBe(2);
-    expect(data.stepsValid).toBe(2);
-  });
-
-  test("parsed object output missing $status → score 0", async () => {
-    mockedReadSteps.mockReturnValue([
-      makeStep({ role: "a", output: { summary: "no status field" } as unknown as string }),
-    ]);
-
-    const result = await runFrontmatterJudge("T6");
-    expect(result.score).toBe(0);
-  });
-});
-
-describe("token-stats judge", () => {
-  test("steps with usage → sums correctly", async () => {
-    mockedReadSteps.mockReturnValue([
-      makeStep({
-        role: "a",
-        usage: { turns: 2, inputTokens: 100, outputTokens: 50, duration: 1.5 },
-      }),
-      makeStep({
-        role: "b",
-        usage: { turns: 3, inputTokens: 200, outputTokens: 75, duration: 2.0 },
-      }),
-    ]);
-
-    const result = await runTokenStatsJudge("T1");
-    const data = result.data as {
-      totalInput: number;
-      totalOutput: number;
-      totalTurns: number;
-      perStep: Array<{ role: string; inputTokens: number; outputTokens: number; turns: number }>;
-    };
-
-    expect(result.score).toBe(1.0);
-    expect(data.totalInput).toBe(300);
-    expect(data.totalOutput).toBe(125);
-    expect(data.totalTurns).toBe(5);
-    expect(data.perStep).toHaveLength(2);
-    expect(data.perStep[0]).toEqual({
-      role: "a",
-      inputTokens: 100,
-      outputTokens: 50,
-      turns: 2,
-      duration: 1.5,
-    });
-  });
-
-  test("steps with null usage → zeros", async () => {
-    mockedReadSteps.mockReturnValue([
-      makeStep({ role: "a", usage: null }),
-      makeStep({ role: "b", usage: null }),
-    ]);
-
-    const result = await runTokenStatsJudge("T2");
-    const data = result.data as {
-      totalInput: number;
-      totalOutput: number;
-      totalTurns: number;
-      perStep: Array<{
-        inputTokens: number;
-        outputTokens: number;
-        turns: number;
-        duration: number;
-      }>;
-    };
-
-    expect(result.score).toBe(1.0);
-    expect(data.totalInput).toBe(0);
-    expect(data.totalOutput).toBe(0);
-    expect(data.totalTurns).toBe(0);
-    expect(data.perStep[0]).toEqual({
-      role: "a",
-      inputTokens: 0,
-      outputTokens: 0,
-      turns: 0,
-      duration: 0,
-    });
-  });
-
-  test("empty steps → all zeros, score 1.0", async () => {
-    mockedReadSteps.mockReturnValue([]);
-
-    const result = await runTokenStatsJudge("T3");
-    const data = result.data as {
-      totalInput: number;
-      totalOutput: number;
-      totalTurns: number;
-      perStep: unknown[];
-    };
-
-    expect(result.score).toBe(1.0);
-    expect(data.totalInput).toBe(0);
-    expect(data.totalOutput).toBe(0);
-    expect(data.totalTurns).toBe(0);
-    expect(data.perStep).toHaveLength(0);
-  });
-});
-
-describe("LLM-as-judge stubs", () => {
-  test("upstream-consumption returns a stub", async () => {
-    const result = await runUpstreamJudge("T1");
-    expect(result.score).toBe(0);
-    expect(result.data).toEqual({ perStep: [] });
-    expect(result.schema.title).toBe("@uwf/eval-judge-upstream");
-  });
-
-  test("hallucination returns a stub", async () => {
-    const result = await runHallucinationJudge("T1");
-    expect(result.score).toBe(0);
-    expect(result.data).toEqual({ perStep: [] });
-    expect(result.schema.title).toBe("@uwf/eval-judge-hallucination");
-  });
-});
@@ -1,152 +0,0 @@
-import { bootstrap, createMemoryStore } from "@ocas/core";
-import { describe, expect, test } from "vitest";
-import type { JudgeRunner } from "../src/runner/index.js";
-import { collect, computeOverall } from "../src/runner/index.js";
-import type { EvalRunConfig, EvalStore } from "../src/storage/index.js";
-import type { JudgeEntry, TaskManifest } from "../src/task/index.js";
-
-function makeJudge(name: string, weight: number, builtin: boolean): JudgeEntry {
-  return {
-    name,
-    weight,
-    builtin,
-    entry: builtin ? null : `dist/judges/${name}.js`,
-    schema: null,
-  };
-}
-
-function makeManifest(judges: JudgeEntry[]): TaskManifest {
-  return {
-    name: "fix-off-by-one",
-    description: "test task",
-    workflow: "solve-issue",
-    prompt: "Fix the bug",
-    limits: { maxSteps: 10, timeoutMinutes: 30 },
-    judges,
-  };
-}
-
-function makeEvalStore(): EvalStore {
-  const store = createMemoryStore();
-  bootstrap(store);
-  return { store, varStore: store.var };
-}
-
-const CONFIG: EvalRunConfig = {
-  agent: "hermes",
-  model: "claude-sonnet-4",
-  engineVersion: "test",
-};
-
-/** Returns a fixed score per judge name. */
-function scriptedRunner(scores: Record<string, number>): JudgeRunner {
-  return async (_taskDir, _workDir, _threadId, judge) => ({
-    score: scores[judge.name] ?? 0,
-    data: { judged: judge.name },
-    schema: { type: "object" },
-  });
-}
-
-describe("computeOverall", () => {
-  test("computes the weighted average correctly", () => {
-    const overall = computeOverall([
-      { score: 0.8, weight: 0.3 },
-      { score: 0.6, weight: 0.3 },
-      { score: 1.0, weight: 0.4 },
-    ]);
-    // 0.24 + 0.18 + 0.4 = 0.82
-    expect(overall).toBeCloseTo(0.82, 10);
-  });
-
-  test("a weight-0 judge does not affect the result", () => {
-    const withInformational = computeOverall([
-      { score: 1.0, weight: 1.0 },
-      { score: 0.0, weight: 0.0 },
-    ]);
-    expect(withInformational).toBe(1.0);
-  });
-
-  test("returns 0 when total weight is 0", () => {
-    expect(computeOverall([{ score: 0.5, weight: 0 }])).toBe(0);
-  });
-});
-
-describe("collect", () => {
-  test("computes weighted score correctly across judges", async () => {
-    const evalStore = makeEvalStore();
-    const manifest = makeManifest([
-      makeJudge("test-pass", 0.6, false),
-      makeJudge("code-quality", 0.4, false),
-    ]);
-    const runJudge = scriptedRunner({ "test-pass": 1.0, "code-quality": 0.5 });
-
-    const result = await collect(
-      {
-        evalStore,
-        taskDir: "/tmp/task",
-        workDir: "/tmp/work",
-        threadId: "THREAD123",
-        manifest,
-        config: CONFIG,
-      },
-      runJudge,
-    );
-
-    // 1.0 * 0.6 + 0.5 * 0.4 = 0.8
-    expect(result.overall).toBeCloseTo(0.8, 10);
-    expect(result.runHash).toBeTruthy();
-    expect(result.judges).toHaveLength(2);
-    expect(result.judges[0]).toEqual({ name: "test-pass", score: 1.0, weight: 0.6 });
-
-    const latest = evalStore.varStore.list({
-      exactName: "@uwf/eval/fix-off-by-one/latest",
-    });
-    expect(latest[0]?.value).toBe(result.runHash);
-  });
-
-  test("handles a judge with weight 0 (informational)", async () => {
-    const evalStore = makeEvalStore();
-    const manifest = makeManifest([
-      makeJudge("test-pass", 1.0, false),
-      makeJudge("token-stats", 0, true),
-    ]);
-    // token-stats is builtin → default runner would score 0; give scripted score
-    // that would skew the result if it were counted.
-    const runJudge = scriptedRunner({ "test-pass": 0.5, "token-stats": 1.0 });
-
-    const result = await collect(
-      {
-        evalStore,
-        taskDir: "/tmp/task",
-        workDir: "/tmp/work",
-        threadId: "THREAD123",
-        manifest,
-        config: CONFIG,
-      },
-      runJudge,
-    );
-
-    // Only test-pass (weight 1.0) counts → overall = 0.5
-    expect(result.overall).toBeCloseTo(0.5, 10);
-    expect(result.judges).toHaveLength(2);
-    const tokenStats = result.judges.find((j) => j.name === "token-stats");
-    expect(tokenStats?.weight).toBe(0);
-  });
-
-  test("unknown builtin judge name throws via the default runner", async () => {
-    const evalStore = makeEvalStore();
-    const manifest = makeManifest([makeJudge("not-a-real-judge", 1.0, true)]);
-
-    // Use the default runner (no injected runner) → builtin dispatch → unknown name throws.
-    await expect(
-      collect({
-        evalStore,
-        taskDir: "/tmp/task",
-        workDir: "/tmp/work",
-        threadId: "THREAD123",
-        manifest,
-        config: CONFIG,
-      }),
-    ).rejects.toThrow(/unknown builtin judge/);
-  });
-});
@@ -1,171 +0,0 @@
-import { bootstrap, createMemoryStore, putSchema } from "@ocas/core";
-import type { CasRef } from "@united-workforce/protocol";
-import { describe, expect, test } from "vitest";
-
-import {
-  formatDiff,
-  formatList,
-  formatReport,
-  readEvalEntries,
-  readEvalRun,
-  selectEntries,
-} from "../src/commands/index.js";
-import type { EvalRunPayload, EvalStore } from "../src/storage/index.js";
-import { EVAL_RUN_SCHEMA, setEvalLatest } from "../src/storage/index.js";
-
-function makeEvalStore(): EvalStore {
-  const store = createMemoryStore();
-  bootstrap(store);
-  return { store, varStore: store.var };
-}
-
-function makePayload(
-  task: string,
-  overall: number,
-  timestamp: number,
-  judges: EvalRunPayload["judges"] = [
-    {
-      name: "frontmatter-compliance",
-      score: 1.0,
-      weight: 0.6,
-      dataHash: "AAAAAAAAAAAAA" as CasRef,
-    },
-    { name: "token-stats", score: 0.5, weight: 0, dataHash: "BBBBBBBBBBBBB" as CasRef },
-  ],
-  config: EvalRunPayload["config"] = {
-    agent: "hermes",
-    model: "claude-sonnet-4",
-    engineVersion: "1.0.0",
-  },
-): EvalRunPayload {
-  return { task, config, threadId: "THREAD0123456789", judges, overall, timestamp };
-}
-
-/** Store an eval-run node in CAS and index it under @uwf/eval/<task>/latest. */
-function storeRun(evalStore: EvalStore, payload: EvalRunPayload): string {
-  const schemaHash = putSchema(evalStore.store, EVAL_RUN_SCHEMA);
-  const hash = evalStore.store.cas.put(schemaHash, payload);
-  setEvalLatest(evalStore.varStore, payload.task, hash);
-  return hash;
-}
-
-describe("formatReport", () => {
-  test("includes task, overall, config and judges", () => {
-    const payload = makePayload("fix-off-by-one", 0.8, Date.UTC(2026, 0, 2, 3, 4, 5));
-    const output = formatReport(payload, "RUNHASH123456");
-
-    expect(output).toContain("fix-off-by-one");
-    expect(output).toContain("0.8000");
-    expect(output).toContain("hermes");
-    expect(output).toContain("claude-sonnet-4");
-    expect(output).toContain("1.0.0");
-    expect(output).toContain("frontmatter-compliance");
-    expect(output).toContain("token-stats");
-    expect(output).toContain("THREAD0123456789");
-    expect(output).toContain("RUNHASH123456");
-  });
-
-  test("round-trips a stored run via readEvalRun", () => {
-    const evalStore = makeEvalStore();
-    const payload = makePayload("fix-off-by-one", 0.75, Date.now());
-    const hash = storeRun(evalStore, payload);
-
-    const loaded = readEvalRun(evalStore, hash);
-    expect(loaded).not.toBeNull();
-    const output = formatReport(loaded as EvalRunPayload, hash);
-    expect(output).toContain("fix-off-by-one");
-    expect(output).toContain("0.7500");
-  });
-
-  test("readEvalRun returns null for a missing hash", () => {
-    const evalStore = makeEvalStore();
-    expect(readEvalRun(evalStore, "NOPENOPENOPE0")).toBeNull();
-  });
-});
-
-describe("list", () => {
-  test("lists eval runs stored under different tasks", () => {
-    const evalStore = makeEvalStore();
-    storeRun(evalStore, makePayload("fix-off-by-one", 0.8, 2000));
-    storeRun(evalStore, makePayload("write-docs", 0.6, 1000));
-
-    const entries = readEvalEntries(evalStore);
-    expect(entries).toHaveLength(2);
-
-    const output = formatList(selectEntries(entries, null, 20));
-    expect(output).toContain("fix-off-by-one");
-    expect(output).toContain("write-docs");
-  });
-
-  test("sorts newest-first by timestamp", () => {
-    const evalStore = makeEvalStore();
-    storeRun(evalStore, makePayload("old-task", 0.5, 1000));
-    storeRun(evalStore, makePayload("new-task", 0.5, 2000));
-
-    const selected = selectEntries(readEvalEntries(evalStore), null, 20);
-    expect(selected[0]?.task).toBe("new-task");
-    expect(selected[1]?.task).toBe("old-task");
-  });
-
-  test("--task filter only shows the matching task", () => {
-    const evalStore = makeEvalStore();
-    storeRun(evalStore, makePayload("fix-off-by-one", 0.8, 2000));
-    storeRun(evalStore, makePayload("write-docs", 0.6, 1000));
-
-    const output = formatList(selectEntries(readEvalEntries(evalStore), "write-docs", 20));
-    expect(output).toContain("write-docs");
-    expect(output).not.toContain("fix-off-by-one");
-  });
-
-  test("--limit caps the number of rows", () => {
-    const evalStore = makeEvalStore();
-    storeRun(evalStore, makePayload("task-a", 0.8, 3000));
-    storeRun(evalStore, makePayload("task-b", 0.6, 2000));
-    storeRun(evalStore, makePayload("task-c", 0.4, 1000));
-
-    const selected = selectEntries(readEvalEntries(evalStore), null, 2);
-    expect(selected).toHaveLength(2);
-    expect(selected.map((e) => e.task)).toEqual(["task-a", "task-b"]);
-  });
-
-  test("empty store renders a placeholder", () => {
-    const evalStore = makeEvalStore();
-    const output = formatList(selectEntries(readEvalEntries(evalStore), null, 20));
-    expect(output).toContain("(no eval runs found)");
-  });
-});
-
-describe("formatDiff", () => {
-  test("shows an upward delta when B scores higher", () => {
-    const a = makePayload("fix-off-by-one", 0.6, 1000);
-    const b = makePayload("fix-off-by-one", 0.8, 2000);
-    const output = formatDiff(a, "HASHA00000000", b, "HASHB00000000");
-
-    expect(output).toContain("▲");
-    expect(output).toContain("HASHA00000000");
-    expect(output).toContain("HASHB00000000");
-  });
-
-  test("shows a downward delta when B scores lower", () => {
-    const a = makePayload("fix-off-by-one", 0.9, 1000);
-    const b = makePayload("fix-off-by-one", 0.4, 2000);
-    const output = formatDiff(a, "HASHA00000000", b, "HASHB00000000");
-    expect(output).toContain("▼");
-  });
-
-  test("marks differing config values", () => {
-    const a = makePayload("fix-off-by-one", 0.6, 1000, undefined, {
-      agent: "hermes",
-      model: "claude-sonnet-4",
-      engineVersion: "1.0.0",
-    });
-    const b = makePayload("fix-off-by-one", 0.6, 2000, undefined, {
-      agent: "claude-code",
-      model: "claude-sonnet-4",
-      engineVersion: "1.0.0",
-    });
-    const output = formatDiff(a, "HASHA00000000", b, "HASHB00000000");
-    expect(output).toContain("≠");
-    expect(output).toContain("claude-code");
-  });
-});
@@ -1,74 +0,0 @@
-import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
-import { tmpdir } from "node:os";
-import { join } from "node:path";
-
-import { afterEach, beforeEach, describe, expect, test } from "vitest";
-
-import { prepare } from "../src/runner/index.js";
-
-const TASK_YAML = `
-name: fix-off-by-one
-description: Fix an off-by-one error
-workflow: solve-issue
-prompt: "Fix the bug"
-limits:
-  maxSteps: 12
-  timeoutMinutes: 20
-judges:
-  - name: frontmatter-compliance
-    weight: 0.5
-    builtin: true
-  - name: test-pass
-    weight: 0.5
-    entry: dist/judges/test-pass.js
-`;
-
-let taskDir: string;
-
-beforeEach(async () => {
-  taskDir = await mkdtemp(join(tmpdir(), "uwf-eval-task-"));
-  await writeFile(join(taskDir, "task.yaml"), TASK_YAML, "utf8");
-  const fixtureDir = join(taskDir, "fixture");
-  await mkdir(join(fixtureDir, "src"), { recursive: true });
-  await writeFile(join(fixtureDir, "src", "calc.ts"), "export const add = (a, b) => a + b + 1;\n");
-  await writeFile(join(fixtureDir, "package.json"), '{ "name": "fixture" }\n');
-});
-
-afterEach(async () => {
-  await rm(taskDir, { recursive: true, force: true });
-});
-
-describe("prepare", () => {
-  test("returns the parsed manifest", async () => {
-    const result = await prepare(taskDir);
-    expect(result.taskDir).toBe(taskDir);
-    expect(result.manifest.name).toBe("fix-off-by-one");
-    expect(result.manifest.workflow).toBe("solve-issue");
-    expect(result.manifest.limits.maxSteps).toBe(12);
-    expect(result.manifest.judges).toHaveLength(2);
-  });
-
-  test("copies fixture into a fresh temp work dir", async () => {
-    const result = await prepare(taskDir);
-    expect(result.workDir).not.toBe(taskDir);
-    expect(result.workDir.startsWith(tmpdir())).toBe(true);
-
-    const calc = await readFile(join(result.workDir, "src", "calc.ts"), "utf8");
-    expect(calc).toContain("export const add");
-    const pkg = await readFile(join(result.workDir, "package.json"), "utf8");
-    expect(pkg).toContain("fixture");
-
-    await rm(result.workDir, { recursive: true, force: true });
-  });
-
-  test("creates an empty work dir when no fixture/ exists", async () => {
-    const noFixtureDir = await mkdtemp(join(tmpdir(), "uwf-eval-nofix-"));
-    await writeFile(join(noFixtureDir, "task.yaml"), TASK_YAML, "utf8");
-
-    const result = await prepare(noFixtureDir);
-    expect(result.workDir.startsWith(tmpdir())).toBe(true);
-
-    await rm(noFixtureDir, { recursive: true, force: true });
-    await rm(result.workDir, { recursive: true, force: true });
-  });
-});
@@ -1,63 +0,0 @@
-import { describe, expect, test } from "vitest";
-import {
-  EVAL_JUDGE_FRONTMATTER_SCHEMA,
-  EVAL_JUDGE_HALLUCINATION_SCHEMA,
-  EVAL_JUDGE_TOKEN_STATS_SCHEMA,
-  EVAL_JUDGE_UPSTREAM_SCHEMA,
-  EVAL_RUN_SCHEMA,
-} from "../src/storage/index.js";
-
-describe("OCAS schema definitions", () => {
-  test("eval-run schema has correct title and required fields", () => {
-    expect(EVAL_RUN_SCHEMA.title).toBe("@uwf/eval-run");
-    const required = EVAL_RUN_SCHEMA.required as string[];
-    expect(required).toContain("task");
-    expect(required).toContain("config");
-    expect(required).toContain("threadId");
-    expect(required).toContain("judges");
-    expect(required).toContain("overall");
-    expect(required).toContain("timestamp");
-  });
-
-  test("frontmatter judge schema has correct title", () => {
-    expect(EVAL_JUDGE_FRONTMATTER_SCHEMA.title).toBe("@uwf/eval-judge-frontmatter");
-    const required = EVAL_JUDGE_FRONTMATTER_SCHEMA.required as string[];
-    expect(required).toContain("stepsTotal");
-    expect(required).toContain("stepsValid");
-    expect(required).toContain("invalidSteps");
-  });
-
-  test("upstream judge schema has correct title", () => {
-    expect(EVAL_JUDGE_UPSTREAM_SCHEMA.title).toBe("@uwf/eval-judge-upstream");
-    const required = EVAL_JUDGE_UPSTREAM_SCHEMA.required as string[];
-    expect(required).toContain("perStep");
-  });
-
-  test("hallucination judge schema has correct title", () => {
-    expect(EVAL_JUDGE_HALLUCINATION_SCHEMA.title).toBe("@uwf/eval-judge-hallucination");
-    const required = EVAL_JUDGE_HALLUCINATION_SCHEMA.required as string[];
-    expect(required).toContain("perStep");
-  });
-
-  test("token-stats judge schema has correct title", () => {
-    expect(EVAL_JUDGE_TOKEN_STATS_SCHEMA.title).toBe("@uwf/eval-judge-token-stats");
-    const required = EVAL_JUDGE_TOKEN_STATS_SCHEMA.required as string[];
-    expect(required).toContain("totalInput");
-    expect(required).toContain("totalOutput");
-    expect(required).toContain("totalTurns");
-    expect(required).toContain("perStep");
-  });
-
-  test("all schemas have type object at root", () => {
-    const schemas = [
-      EVAL_RUN_SCHEMA,
-      EVAL_JUDGE_FRONTMATTER_SCHEMA,
-      EVAL_JUDGE_UPSTREAM_SCHEMA,
-      EVAL_JUDGE_HALLUCINATION_SCHEMA,
-      EVAL_JUDGE_TOKEN_STATS_SCHEMA,
-    ];
-    for (const s of schemas) {
-      expect(s.type).toBe("object");
-    }
-  });
-});
@@ -1,163 +0,0 @@
-import { describe, expect, test } from "vitest";
-import { parseTaskManifest } from "../src/task/index.js";
-
-const VALID_YAML = `
-name: fix-off-by-one
-description: Fix an off-by-one error in a calculator
-workflow: solve-issue
-prompt: "Fix the bug: add(1,2) returns 4 instead of 3"
-limits:
-  maxSteps: 15
-  timeoutMinutes: 30
-judges:
-  - name: frontmatter-compliance
-    weight: 0.15
-    builtin: true
-  - name: test-pass
-    weight: 0.3
-    entry: dist/judges/test-pass.js
-    schema: schemas/test-pass.json
-`;
-
-describe("parseTaskManifest", () => {
-  test("parses valid task.yaml", () => {
-    const manifest = parseTaskManifest(VALID_YAML);
-    expect(manifest.name).toBe("fix-off-by-one");
-    expect(manifest.description).toBe("Fix an off-by-one error in a calculator");
-    expect(manifest.workflow).toBe("solve-issue");
-    expect(manifest.prompt).toBe("Fix the bug: add(1,2) returns 4 instead of 3");
-    expect(manifest.limits).toEqual({ maxSteps: 15, timeoutMinutes: 30 });
-    expect(manifest.judges).toHaveLength(2);
-  });
-
-  test("parses builtin judge", () => {
-    const manifest = parseTaskManifest(VALID_YAML);
-    const builtin = manifest.judges[0];
-    expect(builtin).toBeDefined();
-    expect(builtin!.name).toBe("frontmatter-compliance");
-    expect(builtin!.weight).toBe(0.15);
-    expect(builtin!.builtin).toBe(true);
-    expect(builtin!.entry).toBeNull();
-  });
-
-  test("parses custom judge with entry + schema", () => {
-    const manifest = parseTaskManifest(VALID_YAML);
-    const custom = manifest.judges[1];
-    expect(custom).toBeDefined();
-    expect(custom!.name).toBe("test-pass");
-    expect(custom!.weight).toBe(0.3);
-    expect(custom!.builtin).toBe(false);
-    expect(custom!.entry).toBe("dist/judges/test-pass.js");
-    expect(custom!.schema).toBe("schemas/test-pass.json");
-  });
-
-  test("defaults limits when omitted", () => {
-    const yaml = `
-name: minimal
-workflow: solve-issue
-prompt: do something
-judges:
-  - name: check
-    builtin: true
-`;
-    const manifest = parseTaskManifest(yaml);
-    expect(manifest.limits).toEqual({ maxSteps: 20, timeoutMinutes: 30 });
-  });
-
-  test("defaults description to empty string", () => {
-    const yaml = `
-name: no-desc
-workflow: solve-issue
-prompt: do something
-judges:
-  - name: check
-    builtin: true
-`;
-    const manifest = parseTaskManifest(yaml);
-    expect(manifest.description).toBe("");
-  });
-
-  test("rejects missing name", () => {
-    const yaml = `
-workflow: solve-issue
-prompt: do something
-judges:
-  - name: check
-    builtin: true
-`;
-    expect(() => parseTaskManifest(yaml)).toThrow("name is required");
-  });
-
-  test("rejects missing workflow", () => {
-    const yaml = `
-name: test
-prompt: do something
-judges:
-  - name: check
-    builtin: true
-`;
-    expect(() => parseTaskManifest(yaml)).toThrow("workflow is required");
-  });
-
-  test("rejects missing prompt", () => {
-    const yaml = `
-name: test
-workflow: solve-issue
-judges:
-  - name: check
-    builtin: true
-`;
-    expect(() => parseTaskManifest(yaml)).toThrow("prompt is required");
-  });
-
-  test("rejects empty judges array", () => {
-    const yaml = `
-name: test
-workflow: solve-issue
-prompt: do something
-judges: []
-`;
-    expect(() => parseTaskManifest(yaml)).toThrow("at least one judge");
-  });
-
-  test("rejects non-builtin judge without entry", () => {
-    const yaml = `
-name: test
-workflow: solve-issue
-prompt: do something
-judges:
-  - name: custom-check
-    weight: 0.5
-`;
-    expect(() => parseTaskManifest(yaml)).toThrow("non-builtin judge must have entry");
-  });
-
-  test("rejects non-object YAML root", () => {
-    expect(() => parseTaskManifest("just a string")).toThrow("must be a YAML mapping");
-  });
-
-  test("rejects judge without name", () => {
-    const yaml = `
-name: test
-workflow: solve-issue
-prompt: do something
-judges:
-  - weight: 0.5
-    builtin: true
-`;
-    expect(() => parseTaskManifest(yaml)).toThrow("name is required");
-  });
-
-  test("defaults weight to 0 when omitted", () => {
-    const yaml = `
-name: test
-workflow: solve-issue
-prompt: do something
-judges:
-  - name: token-stats
-    builtin: true
-`;
-    const manifest = parseTaskManifest(yaml);
-    expect(manifest.judges[0]!.weight).toBe(0);
-  });
-});
@@ -1,45 +0,0 @@
-{
-  "name": "@united-workforce/eval",
-  "version": "0.1.3",
-  "private": false,
-  "files": [
-    "src",
-    "dist",
-    "package.json"
-  ],
-  "type": "module",
-  "bin": {
-    "uwf-eval": "./dist/cli.js"
-  },
-  "exports": {
-    ".": {
-      "types": "./dist/index.d.ts",
-      "import": "./dist/index.js"
-    }
-  },
-  "scripts": {
-    "test": "vitest run __tests__/",
-    "test:ci": "vitest run __tests__/"
-  },
-  "dependencies": {
-    "@ocas/core": "^0.3.0",
-    "@ocas/fs": "^0.3.0",
-    "@united-workforce/protocol": "workspace:^",
-    "@united-workforce/util": "workspace:^",
-    "commander": "^14.0.3",
-    "yaml": "^2.9.0"
-  },
-  "devDependencies": {
-    "typescript": "^5.8.3"
-  },
-  "repository": {
-    "type": "git",
-    "url": "https://git.shazhou.work/shazhou/united-workforce.git",
-    "directory": "packages/eval"
-  },
-  "homepage": "https://git.shazhou.work/shazhou/united-workforce#readme",
-  "bugs": {
-    "url": "https://git.shazhou.work/shazhou/united-workforce/issues"
-  },
-  "license": "MIT"
-}
@@ -1,25 +0,0 @@
-#!/usr/bin/env node
-import { Command } from "commander";
-import {
-  registerDiffCommand,
-  registerListCommand,
-  registerReportCommand,
-  registerRunCommand,
-} from "./commands/index.js";
-
-// eslint-disable-next-line -- dynamic import for version
-const pkg = await import("../package.json", { with: { type: "json" } });
-
-const program = new Command();
-
-program
-  .name("uwf-eval")
-  .description("Evaluate uwf workflow quality with real agents")
-  .version(pkg.default.version, "-V, --version");
-
-registerRunCommand(program);
-registerReportCommand(program);
-registerDiffCommand(program);
-registerListCommand(program);
-
-program.parse();
@@ -1,38 +0,0 @@
-import { createLogger } from "@united-workforce/util";
-import type { Command } from "commander";
-
-import { createEvalStore } from "../storage/index.js";
-import { formatDiff } from "./format.js";
-import { readEvalRun } from "./read.js";
-
-const log = createLogger({ sink: { kind: "stderr" } });
-const LOG_DIFF = "D3WZ8N5T";
-
-export function registerDiffCommand(program: Command): void {
-  program
-    .command("diff <hash1> <hash2>")
-    .description("Compare two eval runs side-by-side")
-    .action(async (hash1: string, hash2: string) => {
-      try {
-        const evalStore = await createEvalStore();
-        const payloadA = readEvalRun(evalStore, hash1);
-        if (payloadA === null) {
-          process.stderr.write(`eval run not found: ${hash1}\n`);
-          process.exitCode = 1;
-          return;
-        }
-        const payloadB = readEvalRun(evalStore, hash2);
-        if (payloadB === null) {
-          process.stderr.write(`eval run not found: ${hash2}\n`);
-          process.exitCode = 1;
-          return;
-        }
-        log(LOG_DIFF, `diff a=${hash1} b=${hash2}`);
-        process.stdout.write(formatDiff(payloadA, hash1, payloadB, hash2));
-      } catch (e) {
-        const message = e instanceof Error ? e.message : String(e);
-        process.stderr.write(`${message}\n`);
-        process.exitCode = 1;
-      }
-    });
-}
@@ -1,148 +0,0 @@
-import type { EvalRunPayload } from "../storage/index.js";
-import type { EvalListEntry } from "./types.js";
-
-const NAME_WIDTH = 28;
-const SCORE_WIDTH = 10;
-const TIMESTAMP_WIDTH = 26;
-
-/** Format a 0..1 score (or weight) with fixed precision. */
-function formatScore(value: number): string {
-  return value.toFixed(4);
-}
-
-/** Human-readable ISO-8601 timestamp from epoch milliseconds. */
-function formatTimestamp(ms: number): string {
-  return new Date(ms).toISOString();
-}
-
-/** Right-pad to a fixed column width (with a trailing space if already full). */
-function pad(value: string, width: number): string {
-  return value.length >= width ? `${value} ` : value.padEnd(width);
-}
-
-/** Directional indicator for a score delta (B relative to A). */
-function formatDelta(delta: number): string {
-  if (delta > 0) {
-    return `▲ +${formatScore(delta)}`;
-  }
-  if (delta < 0) {
-    return `▼ ${formatScore(delta)}`;
-  }
-  return `= ${formatScore(0)}`;
-}
-
-/** Render a single eval run as a human-readable report. */
-export function formatReport(payload: EvalRunPayload, runHash: string): string {
-  const lines: string[] = [];
-  lines.push("=== Eval Report ===");
-  lines.push(`Task:       ${payload.task}`);
-  lines.push(`Overall:    ${formatScore(payload.overall)}`);
-  lines.push(`Timestamp:  ${formatTimestamp(payload.timestamp)}`);
-  lines.push("");
-  lines.push("Config:");
-  lines.push(`  Agent:    ${payload.config.agent}`);
-  lines.push(`  Model:    ${payload.config.model}`);
-  lines.push(`  Engine:   ${payload.config.engineVersion}`);
-  lines.push("");
-  lines.push("Judges:");
-  lines.push(`  ${pad("NAME", NAME_WIDTH)}${pad("SCORE", SCORE_WIDTH)}WEIGHT`);
-  for (const judge of payload.judges) {
-    lines.push(
-      `  ${pad(judge.name, NAME_WIDTH)}${pad(formatScore(judge.score), SCORE_WIDTH)}${formatScore(judge.weight)}`,
-    );
-  }
-  lines.push("");
-  lines.push(`Thread:     ${payload.threadId}`);
-  lines.push(`Run:        ${runHash}`);
-  return `${lines.join("\n")}\n`;
-}
-
-/** Render a side-by-side comparison of two eval runs. */
-export function formatDiff(
-  payloadA: EvalRunPayload,
-  hashA: string,
-  payloadB: EvalRunPayload,
-  hashB: string,
-): string {
-  const lines: string[] = [];
-  lines.push("=== Eval Diff ===");
-  lines.push(`A: ${hashA}  (${payloadA.task})`);
-  lines.push(`B: ${hashB}  (${payloadB.task})`);
-  lines.push("");
-
-  const overallDelta = payloadB.overall - payloadA.overall;
-  lines.push("Overall:");
-  lines.push(
-    `  A=${formatScore(payloadA.overall)}  B=${formatScore(payloadB.overall)}  ${formatDelta(overallDelta)}`,
-  );
-  lines.push("");
-
-  lines.push("Config:");
-  lines.push(configLine("Agent", payloadA.config.agent, payloadB.config.agent));
-  lines.push(configLine("Model", payloadA.config.model, payloadB.config.model));
-  lines.push(configLine("Engine", payloadA.config.engineVersion, payloadB.config.engineVersion));
-  lines.push("");
-
-  lines.push("Judges:");
-  lines.push(`  ${pad("NAME", NAME_WIDTH)}${pad("A", SCORE_WIDTH)}${pad("B", SCORE_WIDTH)}DELTA`);
-  const scoresA = new Map(payloadA.judges.map((judge) => [judge.name, judge.score]));
-  const scoresB = new Map(payloadB.judges.map((judge) => [judge.name, judge.score]));
-  for (const name of unionJudgeNames(payloadA, payloadB)) {
-    const scoreA = scoresA.get(name);
-    const scoreB = scoresB.get(name);
-    const cellA = scoreA === undefined ? "—" : formatScore(scoreA);
-    const cellB = scoreB === undefined ? "—" : formatScore(scoreB);
-    const delta = scoreA !== undefined && scoreB !== undefined ? formatDelta(scoreB - scoreA) : "";
-    lines.push(
-      `  ${pad(name, NAME_WIDTH)}${pad(cellA, SCORE_WIDTH)}${pad(cellB, SCORE_WIDTH)}${delta}`,
-    );
-  }
-  return `${lines.join("\n")}\n`;
-}
-
-/** Render a table of indexed eval runs. */
-export function formatList(entries: ReadonlyArray<EvalListEntry>): string {
-  const lines: string[] = [];
-  lines.push(
-    `  ${pad("TASK", NAME_WIDTH)}${pad("OVERALL", SCORE_WIDTH)}${pad("TIMESTAMP", TIMESTAMP_WIDTH)}HASH`,
-  );
-  if (entries.length === 0) {
-    lines.push("  (no eval runs found)");
-  }
-  for (const entry of entries) {
-    lines.push(
-      `  ${pad(entry.task, NAME_WIDTH)}${pad(formatScore(entry.overall), SCORE_WIDTH)}${pad(formatTimestamp(entry.timestamp), TIMESTAMP_WIDTH)}${entry.hash}`,
-    );
-  }
-  return `${lines.join("\n")}\n`;
-}
-
-/** Sort newest-first, then apply optional task filter and result limit. */
-export function selectEntries(
-  entries: ReadonlyArray<EvalListEntry>,
-  task: string | null,
-  limit: number | null,
-): EvalListEntry[] {
-  const sorted = [...entries].sort((a, b) => b.timestamp - a.timestamp);
-  const filtered = task !== null ? sorted.filter((entry) => entry.task === task) : sorted;
-  return limit !== null ? filtered.slice(0, limit) : filtered;
-}
-
-/** Ordered union of judge names: A's order first, then B-only names. */
-function unionJudgeNames(payloadA: EvalRunPayload, payloadB: EvalRunPayload): string[] {
-  const names: string[] = [];
-  const seen = new Set<string>();
-  for (const judge of [...payloadA.judges, ...payloadB.judges]) {
-    if (!seen.has(judge.name)) {
-      seen.add(judge.name);
-      names.push(judge.name);
-    }
-  }
-  return names;
-}
-
-/** One config row: `=` when equal, `≠` otherwise. */
-function configLine(label: string, valueA: string, valueB: string): string {
-  const marker = valueA === valueB ? "=" : "≠";
-  return `  ${pad(`${label}:`, SCORE_WIDTH)}${marker} A=${valueA}  B=${valueB}`;
-}
@@ -1,7 +0,0 @@
-export { registerDiffCommand } from "./diff.js";
-export { formatDiff, formatList, formatReport, selectEntries } from "./format.js";
-export { registerListCommand } from "./list.js";
-export { readEvalEntries, readEvalRun } from "./read.js";
-export { registerReportCommand } from "./report.js";
-export { registerRunCommand } from "./run.js";
-export type { EvalListEntry } from "./types.js";
@@ -1,43 +0,0 @@
-import { createLogger } from "@united-workforce/util";
-import type { Command } from "commander";
-
-import { createEvalStore } from "../storage/index.js";
-import { formatList, selectEntries } from "./format.js";
-import { readEvalEntries } from "./read.js";
-
-const log = createLogger({ sink: { kind: "stderr" } });
-const LOG_LIST = "L5KX9R2B";
-
-type ListCliOptions = {
-  task: string | undefined;
-  limit: string;
-};
-
-export function registerListCommand(program: Command): void {
-  program
-    .command("list")
-    .description("List past eval runs")
-    .option("--task <name>", "filter by task name")
-    .option("--limit <n>", "max results", "20")
-    .action(async (opts: ListCliOptions) => {
-      const limit = Number.parseInt(opts.limit, 10);
-      if (!Number.isInteger(limit) || limit < 1) {
-        process.stderr.write("--limit must be a positive integer\n");
-        process.exitCode = 1;
-        return;
-      }
-
-      try {
-        const evalStore = await createEvalStore();
-        const entries = readEvalEntries(evalStore);
-        const task = opts.task ?? null;
-        const selected = selectEntries(entries, task, limit);
-        log(LOG_LIST, `list task=${task ?? "*"} found=${entries.length} shown=${selected.length}`);
-        process.stdout.write(formatList(selected));
-      } catch (e) {
-        const message = e instanceof Error ? e.message : String(e);
-        process.stderr.write(`${message}\n`);
-        process.exitCode = 1;
-      }
-    });
-}
@@ -1,41 +0,0 @@
-import type { EvalRunPayload, EvalStore } from "../storage/index.js";
-import type { EvalListEntry } from "./types.js";
-
-/** Variable prefix and suffix for eval run pointers (`@uwf/eval/<task>/latest`). */
-const EVAL_VAR_PREFIX = "@uwf/eval/";
-const EVAL_VAR_SUFFIX = "/latest";
-
-/** Read a single eval-run payload from CAS. Returns null when the node is absent. */
-export function readEvalRun(evalStore: EvalStore, hash: string): EvalRunPayload | null {
-  const node = evalStore.store.cas.get(hash);
-  if (node === null) {
-    return null;
-  }
-  return node.payload as EvalRunPayload;
-}
-
-/**
- * Read every indexed eval run by scanning `@uwf/eval/*\/latest` variables and
- * loading the referenced CAS node. Dangling pointers are skipped.
- */
-export function readEvalEntries(evalStore: EvalStore): EvalListEntry[] {
-  const { store, varStore } = evalStore;
-  const entries: EvalListEntry[] = [];
-  for (const variable of varStore.list()) {
-    if (!variable.name.startsWith(EVAL_VAR_PREFIX) || !variable.name.endsWith(EVAL_VAR_SUFFIX)) {
-      continue;
-    }
-    const node = store.cas.get(variable.value);
-    if (node === null) {
-      continue;
-    }
-    const payload = node.payload as EvalRunPayload;
-    entries.push({
-      task: payload.task,
-      overall: payload.overall,
-      timestamp: payload.timestamp,
-      hash: variable.value,
-    });
-  }
-  return entries;
-}
@@ -1,32 +0,0 @@
-import { createLogger } from "@united-workforce/util";
-import type { Command } from "commander";
-
-import { createEvalStore } from "../storage/index.js";
-import { formatReport } from "./format.js";
-import { readEvalRun } from "./read.js";
-
-const log = createLogger({ sink: { kind: "stderr" } });
-const LOG_REPORT = "R7QP2M4K";
-
-export function registerReportCommand(program: Command): void {
-  program
-    .command("report <hash>")
-    .description("Show eval run results")
-    .action(async (hash: string) => {
-      try {
-        const evalStore = await createEvalStore();
-        const payload = readEvalRun(evalStore, hash);
-        if (payload === null) {
-          process.stderr.write(`eval run not found: ${hash}\n`);
-          process.exitCode = 1;
-          return;
-        }
-        log(LOG_REPORT, `report task=${payload.task} hash=${hash}`);
-        process.stdout.write(formatReport(payload, hash));
-      } catch (e) {
-        const message = e instanceof Error ? e.message : String(e);
-        process.stderr.write(`${message}\n`);
-        process.exitCode = 1;
-      }
-    });
-}
@@ -1,84 +0,0 @@
-import { resolve } from "node:path";
-
-import type { Command } from "commander";
-import type { RunResult } from "../runner/index.js";
-import { collect, execute, getEngineVersion, prepare } from "../runner/index.js";
-import type { EvalRunConfig } from "../storage/index.js";
-import { createEvalStore } from "../storage/index.js";
-
-type RunCliOptions = {
-  agent: string;
-  model: string | undefined;
-  count: string;
-};
-
-async function runOnce(
-  taskDir: string,
-  agent: string,
-  model: string,
-  engineVersion: string,
-): Promise<RunResult> {
-  const prepared = await prepare(taskDir);
-  const { manifest, workDir } = prepared;
-
-  const { threadId } = await execute({
-    workDir,
-    workflow: manifest.workflow,
-    prompt: manifest.prompt,
-    agent,
-    maxSteps: manifest.limits.maxSteps,
-  });
-
-  const evalStore = await createEvalStore();
-  const config: EvalRunConfig = { agent, model, engineVersion };
-  const collected = await collect({
-    evalStore,
-    taskDir: prepared.taskDir,
-    workDir,
-    threadId,
-    manifest,
-    config,
-  });
-
-  return {
-    runHash: collected.runHash,
-    overall: collected.overall,
-    task: manifest.name,
-    judges: collected.judges,
-  };
-}
-
-export function registerRunCommand(program: Command): void {
-  program
-    .command("run <task>")
-    .description("Run eval on a task directory or tarball")
-    .option("--agent <name>", "agent adapter to use", "uwf-hermes")
-    .option("--model <model>", "model override")
-    .option("--count <n>", "number of eval runs", "1")
-    .action(async (task: string, opts: RunCliOptions) => {
-      const taskDir = resolve(task);
-      const agent = opts.agent;
-      const model = opts.model ?? "";
-      const count = Number.parseInt(opts.count, 10);
-      if (!Number.isInteger(count) || count < 1) {
-        process.stderr.write("--count must be a positive integer\n");
-        process.exitCode = 1;
-        return;
-      }
-
-      const engineVersion = getEngineVersion();
-
-      try {
-        const results: RunResult[] = [];
-        for (let i = 0; i < count; i++) {
-          results.push(await runOnce(taskDir, agent, model, engineVersion));
-        }
-        const output = count === 1 ? results[0] : results;
-        process.stdout.write(`${JSON.stringify(output)}\n`);
-      } catch (e) {
-        const message = e instanceof Error ? e.message : String(e);
-        process.stderr.write(`${message}\n`);
-        process.exitCode = 1;
-      }
-    });
-}
@@ -1,9 +0,0 @@
-import type { CasRef } from "@united-workforce/protocol";
-
-/** Summary row for the `list` command: one indexed eval run. */
-export type EvalListEntry = {
-  task: string;
-  overall: number;
-  timestamp: number;
-  hash: CasRef;
-};
@@ -1,34 +0,0 @@
-// Judge types
-export type { JudgeInput, JudgeOutput } from "./judge/index.js";
-export type {
-  CollectInput,
-  CollectResult,
-  ExecuteInput,
-  ExecuteResult,
-  JudgeRunner,
-  JudgeRunOutput,
-  JudgeSummary,
-  PrepareResult,
-  RunOptions,
-  RunResult,
-} from "./runner/index.js";
-// Runner (prepare → execute → collect)
-export { collect, computeOverall, execute, getEngineVersion, prepare } from "./runner/index.js";
-export type {
-  EvalJudgeRecord,
-  EvalRunConfig,
-  EvalRunPayload,
-  EvalStore,
-} from "./storage/index.js";
-// Storage schemas and types
-export {
-  createEvalStore,
-  EVAL_JUDGE_FRONTMATTER_SCHEMA,
-  EVAL_JUDGE_HALLUCINATION_SCHEMA,
-  EVAL_JUDGE_TOKEN_STATS_SCHEMA,
-  EVAL_JUDGE_UPSTREAM_SCHEMA,
-  EVAL_RUN_SCHEMA,
-  setEvalLatest,
-} from "./storage/index.js";
-export type { JudgeEntry, TaskLimits, TaskManifest } from "./task/index.js";
-export { loadTaskManifest, parseTaskManifest } from "./task/index.js";
@@ -1,105 +0,0 @@
-import { createLogger } from "@united-workforce/util";
-import { parse as parseYaml } from "yaml";
-
-import { EVAL_JUDGE_FRONTMATTER_SCHEMA } from "../../storage/index.js";
-import { readThreadSteps } from "./read-steps.js";
-import type { BuiltinJudgeOutput } from "./types.js";
-
-const log = createLogger({ sink: { kind: "stderr" } });
-
-const LOG_RESULT = "F2QH7R4M";
-
-const FENCE = "---";
-
-type InvalidStep = {
-  stepIndex: number;
-  role: string;
-  errors: string[];
-};
-
-/**
- * Extract the YAML frontmatter block from a step output. Returns the inner YAML
- * string when the output starts with a `---\n` block closed by a `\n---` fence,
- * otherwise null.
- */
-function extractFrontmatterYaml(output: unknown): string | null {
-  if (typeof output !== "string") {
-    return null;
-  }
-  if (!output.startsWith(`${FENCE}\n`)) {
-    return null;
-  }
-  const rest = output.slice(FENCE.length + 1);
-  const closeIndex = rest.indexOf(`\n${FENCE}`);
-  if (closeIndex === -1) {
-    return null;
-  }
-  return rest.slice(0, closeIndex);
-}
-
-/** Validate a single step's frontmatter, returning a list of errors (empty = valid). */
-function validateStepFrontmatter(output: unknown): string[] {
-  // CAS stores the extracted output as a JSON object after the extract pipeline.
-  // Accept both: parsed object (from step.output) or raw markdown string.
-  if (typeof output === "object" && output !== null && !Array.isArray(output)) {
-    const status = (output as Record<string, unknown>).$status;
-    if (typeof status !== "string" || status.trim() === "") {
-      return ["$status field is missing or not a non-empty string"];
-    }
-    return [];
-  }
-
-  const yaml = extractFrontmatterYaml(output);
-  if (yaml === null) {
-    return ["output does not begin with a valid '---' frontmatter block"];
-  }
-
-  let parsed: unknown;
-  try {
-    parsed = parseYaml(yaml);
-  } catch (e) {
-    const message = e instanceof Error ? e.message : String(e);
-    return [`frontmatter YAML failed to parse: ${message}`];
-  }
-
-  if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
-    return ["frontmatter is not a YAML mapping"];
-  }
-
-  const status = (parsed as Record<string, unknown>).$status;
-  if (typeof status !== "string" || status.trim() === "") {
-    return ["$status field is missing or not a non-empty string"];
-  }
-
-  return [];
-}
-
-/**
- * Deterministic judge: every step's agent output must contain valid YAML
- * frontmatter with a non-empty `$status` field. Score = stepsValid / stepsTotal
- * (0 when there are no steps).
- */
-export async function runFrontmatterJudge(threadId: string): Promise<BuiltinJudgeOutput> {
-  const steps = readThreadSteps(threadId);
-
-  const invalidSteps: InvalidStep[] = [];
-  for (let i = 0; i < steps.length; i++) {
-    const step = steps[i];
-    const errors = validateStepFrontmatter(step.output);
-    if (errors.length > 0) {
-      invalidSteps.push({ stepIndex: i, role: step.role, errors });
-    }
-  }
-
-  const stepsTotal = steps.length;
-  const stepsValid = stepsTotal - invalidSteps.length;
-  const score = stepsTotal > 0 ? stepsValid / stepsTotal : 0;
-
-  log(LOG_RESULT, `frontmatter thread=${threadId} valid=${stepsValid}/${stepsTotal}`);
-
-  return {
-    score,
-    data: { stepsTotal, stepsValid, invalidSteps },
-    schema: EVAL_JUDGE_FRONTMATTER_SCHEMA,
-  };
-}
@@ -1,17 +0,0 @@
-import { EVAL_JUDGE_HALLUCINATION_SCHEMA } from "../../storage/index.js";
-import type { BuiltinJudgeOutput } from "./types.js";
-
-/**
- * LLM-as-judge: detects claims in each step's output that are not grounded in
- * the available context (hallucinations).
- *
- * TODO: LLM-as-judge — needs provider config to call LLM API. Returns a stub
- * (score 0, empty perStep) until the LLM call path is wired up.
- */
-export async function runHallucinationJudge(_threadId: string): Promise<BuiltinJudgeOutput> {
-  return {
-    score: 0,
-    data: { perStep: [] },
-    schema: EVAL_JUDGE_HALLUCINATION_SCHEMA,
-  };
-}
@@ -1,6 +0,0 @@
-export { runFrontmatterJudge } from "./frontmatter.js";
-export { runHallucinationJudge } from "./hallucination.js";
-export { readThreadSteps } from "./read-steps.js";
-export { runTokenStatsJudge } from "./token-stats.js";
-export type { BuiltinJudge, BuiltinJudgeOutput } from "./types.js";
-export { runUpstreamJudge } from "./upstream.js";
@@ -1,14 +0,0 @@
-import { execFileSync } from "node:child_process";
-
-import type { StepEntry, ThreadStepsOutput } from "@united-workforce/protocol";
-
-/** Shell out to `uwf step list` and return the parsed step entries (excludes start entry). */
-export function readThreadSteps(threadId: string): StepEntry[] {
-  const stdout = execFileSync("uwf", ["step", "list", threadId], {
-    encoding: "utf8",
-    stdio: ["ignore", "pipe", "pipe"],
-  }).trim();
-  const parsed = JSON.parse(stdout) as ThreadStepsOutput;
-  // steps[0] is the StartEntry; the rest are StepEntry records.
-  return parsed.steps.slice(1) as StepEntry[];
-}
@@ -1,53 +0,0 @@
-import { createLogger } from "@united-workforce/util";
-
-import { EVAL_JUDGE_TOKEN_STATS_SCHEMA } from "../../storage/index.js";
-import { readThreadSteps } from "./read-steps.js";
-import type { BuiltinJudgeOutput } from "./types.js";
-
-const log = createLogger({ sink: { kind: "stderr" } });
-
-const LOG_RESULT = "T7KQ3M9P";
-
-type PerStepStats = {
-  role: string;
-  inputTokens: number;
-  outputTokens: number;
-  turns: number;
-  duration: number;
-};
-
-/**
- * Informational judge: aggregate token usage across every step. Always scores
- * 1.0 — it never penalizes a run, it only reports usage. Steps with null usage
- * contribute zeros.
- */
-export async function runTokenStatsJudge(threadId: string): Promise<BuiltinJudgeOutput> {
-  const steps = readThreadSteps(threadId);
-
-  let totalInput = 0;
-  let totalOutput = 0;
-  let totalTurns = 0;
-  const perStep: PerStepStats[] = [];
-
-  for (const step of steps) {
-    const usage = step.usage;
-    const inputTokens = usage !== null ? usage.inputTokens : 0;
-    const outputTokens = usage !== null ? usage.outputTokens : 0;
-    const turns = usage !== null ? usage.turns : 0;
-    const duration = usage !== null ? usage.duration : 0;
-
-    totalInput += inputTokens;
-    totalOutput += outputTokens;
-    totalTurns += turns;
-
-    perStep.push({ role: step.role, inputTokens, outputTokens, turns, duration });
-  }
-
-  log(LOG_RESULT, `token-stats thread=${threadId} in=${totalInput} out=${totalOutput}`);
-
-  return {
-    score: 1.0,
-    data: { totalInput, totalOutput, totalTurns, perStep },
-    schema: EVAL_JUDGE_TOKEN_STATS_SCHEMA,
-  };
-}
--- a/Show More
+++ b/Show More