chore: release cli@0.3.0 util@0.1.3 agent-hermes@0.1.3 agent-claude-code@0.1.2 agent-builtin@0.1.2 agent-mock@0.1.2

Merge pull request 'fix: bootstrap — session restart hint + v0.2.1 migration note' (#125 ) from fix/123-session-restart-hint into main
fix: bootstrap — remind to restart session after skill install/update
2026-06-06 00:13:48 +00:00 · 2026-06-05 23:54:24 +00:00 · 2026-06-05 23:48:53 +00:00 · 2026-06-05 23:33:57 +00:00 · 2026-06-05 23:31:56 +00:00 · 2026-06-05 23:13:54 +00:00
174 changed files with 5758 additions and 1652 deletions
@@ -1,8 +0,0 @@
-# Changesets
-
-Hello and welcome! This folder has been automatically generated by `@changesets/cli`, a build tool that works
-with multi-package repos, or single-package repos to help you version and publish your code. You can
-find the full documentation for it [in our repository](https://github.com/changesets/changesets).
-
-We have a quick list of common questions to get you started engaging with this project in
-[our documentation](https://github.com/changesets/changesets/blob/main/docs/common-questions.md).
@@ -0,0 +1,9 @@
+---
+"@united-workforce/cli": patch
+---
+
+fix: expand bootstrap prompt with full onboarding and upgrade guide
+
+Bootstrap now covers two scenarios:
+- Fresh install: CLI + adapter installation, `uwf setup` configuration, skill installation, end-to-end verification
+- Upgrade: package update, skill regeneration, breaking change migrations (e.g. $START new/resume)
@@ -0,0 +1,8 @@
+---
+"@united-workforce/cli": patch
+---
+
+fix: bootstrap adds Step 0 environment pre-flight check
+
+- Pre-flight checks for node, pnpm/npm, global bin PATH, hermes CLI with FIX instructions (#112)
+- Install commands changed from npm to pnpm (with npm fallback)
@@ -0,0 +1,9 @@
+---
+"@united-workforce/cli": patch
+"@united-workforce/util": patch
+---
+
+fix: workflow-authoring flat schema example uses enum, bootstrap adds PATH guidance
+
+- workflow-authoring: flat schema example uses `enum: [done]` instead of bare `const` (#110.3)
+- bootstrap: adds `which hermes` check and PATH guidance for venv installs (#110.4)
@@ -0,0 +1,14 @@
+---
+"@united-workforce/cli": patch
+---
+
+fix: improve bootstrap docs — agent discovery, pnpm/npm parity, preset provider table (#118, #120)
+
+- Step 1: detect installed agents (hermes/claude) before choosing adapter
+- Step 1: clarify adapter versions are independent from CLI — install @latest
+- Step 1: show pnpm and npm side-by-side
+- Step 1: add "adapter must be installed before `uwf setup --agent`" note
+- Step 1: add ACP verification step (hermes acp --help)
+- Step 2: `--agent` takes adapter command name (e.g. `uwf-hermes`), not npm package
+- Step 2: preset providers listed as a table with names and default base URLs
+- Remove uwf-builtin from supported adapters (not ready yet)
@@ -0,0 +1,10 @@
+---
+"@united-workforce/cli": patch
+---
+
+fix: preset provider base-url auto-fill, bootstrap ACP docs, friendlier name mismatch error
+
+- `uwf setup --provider dashscope` now auto-fills `--base-url` from preset list (#106)
+- Bootstrap guide documents uwf-hermes ACP dependency (`pip install hermes-agent[acp]`) (#107)
+- Bootstrap verify step uses inline workflow instead of missing `examples/eval-simple.yaml` (#107)
+- Workflow filename mismatch error now suggests how to fix it (#108)
@@ -1,11 +0,0 @@
-{
-  "$schema": "https://unpkg.com/@changesets/config@3.1.4/schema.json",
-  "changelog": "@changesets/cli/changelog",
-  "commit": false,
-  "fixed": [["@united-workforce/*"]],
-  "linked": [],
-  "access": "public",
-  "baseBranch": "main",
-  "updateInternalDependencies": "patch",
-  "ignore": ["@united-workforce/dashboard"]
-}
@@ -1,30 +0,0 @@
-{
-  "mode": "exit",
-  "tag": "alpha",
-  "initialVersions": {
-    "@uncaged/cli": "0.4.5",
-    "@uncaged/workflow-agent-cursor": "0.4.5",
-    "@uncaged/agent-hermes": "0.4.5",
-    "@uncaged/workflow-agent-llm": "0.4.5",
-    "@uncaged/workflow-agent-react": "0.4.5",
-    "@uncaged/workflow-cas": "0.4.5",
-    "@uncaged/dashboard": "0.1.0",
-    "@uncaged/workflow-execute": "0.4.5",
-    "@uncaged/workflow-gateway": "0.4.5",
-    "@uncaged/protocol": "0.4.5",
-    "@uncaged/workflow-reactor": "0.4.5",
-    "@uncaged/workflow-register": "0.4.5",
-    "@uncaged/workflow-runtime": "0.4.5",
-    "@uncaged/workflow-template-develop": "0.4.5",
-    "@uncaged/workflow-template-solve-issue": "0.4.5",
-    "@uncaged/util": "0.4.5",
-    "@uncaged/util-agent": "0.4.5"
-  },
-  "changesets": [
-    "env-api-unify",
-    "fix-internal-deps",
-    "fix-publish-src",
-    "fix-workspace-deps",
-    "rfc-252-agent-fn"
-  ]
-}
@@ -0,0 +1,14 @@
+---
+"@united-workforce/cli": patch
+"@united-workforce/agent-hermes": patch
+"@united-workforce/agent-claude-code": patch
+"@united-workforce/agent-builtin": patch
+"@united-workforce/agent-mock": patch
+---
+
+fix: suppress ExperimentalWarning, PEP 668 pip guidance, setup help (#116)
+
+- All CLI bins use shebang `#!/usr/bin/env -S node --disable-warning=ExperimentalWarning`
+- Remove NODE_OPTIONS injection from spawn (shebang handles it)
+- Bootstrap pip install guidance covers venv/pipx/source options for PEP 668 systems
+- `uwf setup --help` mentions interactive wizard mode
@@ -0,0 +1,12 @@
+---
+"@united-workforce/cli": patch
+---
+
+fix: setup UX improvements (#114)
+
+- Setup validates adapter availability and prints install command if missing
+- Setup prints "Config saved to <path> ✓" on success
+- Spawn ENOENT gives actionable error ("not found in PATH" + which command)
+- SQLite ExperimentalWarning suppressed via NODE_OPTIONS in spawned processes
+- Bootstrap VERSION reads cli package version (was reading util version)
+- Bootstrap PATH guidance is shell-agnostic (no hardcoded .bashrc/.profile)
@@ -0,0 +1,9 @@
+---
+"@united-workforce/cli": minor
+"@united-workforce/util": patch
+---
+
+feat: replace $START `_` status with `new`/`resume` semantics
+
+BREAKING: All workflow YAML files must update `$START._` to `$START.new` + `$START.resume`.
+The `resume` edge prompt replaces the previously hardcoded resume message in the CLI.
@@ -0,0 +1,15 @@
+---
+"@united-workforce/cli": patch
+"@united-workforce/util": patch
+---
+
+fix: unify $status to const-only, drop enum support (#123)
+
+Breaking: `$status` in frontmatter now requires `const` everywhere.
+`enum` is no longer accepted and will be rejected by the validator.
+
+- Validator: `hasStatusConst()` / `getConstStatuses()` replace enum-based checks
+- Error message: "must define $status as const (or oneOf with const)"
+- workflow-authoring docs: all examples use `const`, enum explicitly noted as unsupported
+- bootstrap hello.yaml: `$status: { const: done }`
+- All test fixtures migrated from enum to const/oneOf
@@ -12,15 +12,17 @@ jobs:
    steps:
      - uses: actions/checkout@v4

-      - uses: oven-sh/setup-bun@v2
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22

-      - run: bun install
+      - run: corepack enable && pnpm install

      - name: Build
-        run: bun run build
+        run: pnpm run build

      - name: Lint
-        run: bun run check
+        run: pnpm run check

      - name: Test
-        run: bun run test:ci
+        run: pnpm run test:ci
@@ -0,0 +1,226 @@
+# Eval Framework Implementation Plan
+
+## Goal
+
+Build `uwf-eval` CLI + eval task infrastructure for evaluating uwf workflow quality with real agents.
+
+## Architecture
+
+```
+uwf-eval (runner)          task package (npm)          OCAS (storage)
+  │                          │                           │
+  ├─ unpack tarball ───────► fixture/ → tmp cwd          │
+  ├─ read task.yaml          │                           │
+  ├─ uwf thread start/exec  │                           │
+  ├─ run judges ───────────► dist/judges/*.js            │
+  ├─ collect scores          │                           │
+  └─ store results ─────────────────────────────────────► CAS nodes + variables
+```
+
+### Key Design Decisions
+
+- **uwf-eval is NOT part of uwf** — separate package, shells out to uwf CLI
+- **Task = npm package** — fixture + task.yaml + judge scripts, distributable as tarball
+- **Judge = Node script** — `node <entry> <cwd> <thread-id>`, outputs `{score, data}` JSON
+- **Every output is OCAS typed** — eval-run, judge results all have registered schemas
+- **Builtin judges** — frontmatter compliance, upstream consumption, hallucination, token stats
+- **Task-specific judges** — bundled in the task package, custom schema per judge
+
+## Deliverables
+
+### Phase 1: Foundation (`@united-workforce/eval`)
+
+New package in the uwf monorepo.
+
+```
+packages/eval/
+  src/
+    cli.ts                    # uwf-eval entry point
+    commands/
+      run.ts                  # uwf-eval run
+      report.ts               # uwf-eval report <hash>
+      diff.ts                 # uwf-eval diff <hash> <hash>
+      list.ts                 # uwf-eval list
+    runner/
+      prepare.ts              # unpack tarball/dir → tmp cwd
+      execute.ts              # shell out to uwf thread start/exec
+      collect.ts              # run judges, collect scores
+    judge/
+      types.ts                # JudgeInput, JudgeOutput types
+      builtin/
+        frontmatter.ts        # frontmatter compliance check
+        upstream.ts           # upstream info consumption (LLM-as-judge)
+        hallucination.ts      # hallucination detection (LLM-as-judge)
+        token-stats.ts        # token usage from $usage field (#68)
+    storage/
+      schemas.ts              # OCAS schema definitions
+      store.ts                # CAS read/write helpers
+      index.ts                # variable indexing (@uwf/eval/*)
+    task/
+      types.ts                # TaskManifest type (task.yaml)
+      loader.ts               # parse task.yaml, validate
+  package.json
+  tsconfig.json
+```
+
+#### OCAS Schemas to Register
+
+1. `@uwf/eval-run` — full eval execution record
+   ```
+   { task, config: {agent, model, engineVersion}, threadId,
+     judges: [{name, score, weight, dataHash}], overall, timestamp }
+   ```
+
+2. `@uwf/eval-judge-frontmatter` — frontmatter judge data
+   ```
+   { stepsTotal, stepsValid, invalidSteps: [{stepIndex, role, errors: string[]}] }
+   ```
+
+3. `@uwf/eval-judge-upstream` — upstream consumption judge data
+   ```
+   { perStep: [{role, consumed: string[], missed: string[], score}] }
+   ```
+
+4. `@uwf/eval-judge-hallucination` — hallucination judge data
+   ```
+   { perStep: [{role, hallucinations: string[], score}] }
+   ```
+
+5. `@uwf/eval-judge-token-stats` — token stats (not scored, informational)
+   ```
+   { totalInput, totalOutput, totalTurns, perStep: [{role, input, output, turns, duration}] }
+   ```
+
+#### CLI Design
+
+```bash
+# Run eval
+uwf-eval run <task-dir-or-tarball> [--agent hermes] [--model claude-sonnet-4] [--count 20]
+
+# View results
+uwf-eval report <run-hash>        # render via ocas render
+uwf-eval diff <hash1> <hash2>     # side-by-side comparison
+uwf-eval list                     # list past runs
+```
+
+### Phase 2: Task Package Scaffold
+
+Template for creating eval tasks. Also serves as the first real task.
+
+```
+eval-tasks/                        # shazhou/uwf-eval-tasks monorepo
+  packages/
+    _template/                     # copypaste template
+      package.json
+      task.yaml
+      fixture/
+      src/judges/
+      tsconfig.json
+    fix-off-by-one/                # first real task
+      package.json                 # @uwf-eval/fix-off-by-one
+      task.yaml
+      fixture/
+        src/calc.ts                # buggy calculator
+        src/calc.test.ts           # test that exposes the bug
+        package.json
+      src/judges/
+        test-pass.ts               # runs pnpm test, checks exit code
+        code-quality.ts            # LLM judge: minimal change, correct fix
+      schemas/
+        test-pass.json             # OCAS schema for test-pass data
+        code-quality.json          # OCAS schema for code-quality data
+      tsconfig.json
+  pnpm-workspace.yaml
+  tsconfig.json
+  biome.json
+```
+
+#### task.yaml Format
+
+```yaml
+name: fix-off-by-one
+description: Fix an off-by-one error in a calculator's add function
+workflow: solve-issue              # registered workflow name, or relative path to .yaml
+prompt: "Fix the bug: add(1,2) returns 4 instead of 3"
+limits:
+  maxSteps: 15
+  timeoutMinutes: 30
+judges:
+  - name: frontmatter-compliance
+    weight: 0.15
+    builtin: true
+  - name: upstream-consumption
+    weight: 0.15
+    builtin: true
+  - name: hallucination
+    weight: 0.1
+    builtin: true
+  - name: token-stats
+    weight: 0                      # informational, not scored
+    builtin: true
+  - name: test-pass
+    weight: 0.3
+    entry: dist/judges/test-pass.js
+    schema: schemas/test-pass.json
+  - name: code-quality
+    weight: 0.3
+    entry: dist/judges/code-quality.js
+    schema: schemas/code-quality.json
+```
+
+#### Judge Script Contract
+
+```typescript
+// Input: process.argv = [node, script, cwd, threadId]
+// Output: stdout JSON
+// Exit 0 = success, non-zero = judge error (not low score)
+
+import type { JudgeOutput } from "@united-workforce/eval";
+
+const result: JudgeOutput<TestPassData> = {
+  score: 1.0,      // 0.0 - 1.0
+  data: {           // typed per judge schema
+    command: "pnpm test",
+    exitCode: 0,
+    output: "3 tests passed"
+  }
+};
+
+console.log(JSON.stringify(result));
+```
+
+### Phase 3: Prerequisite — $usage in Adapter Protocol (#68)
+
+Blocked by #68. Token stats judge needs `$usage` in step nodes.
+
+Can proceed with Phase 1+2 without it — token-stats judge just returns zeros until adapters report usage.
+
+## Implementation Order
+
+1. **Phase 1a**: `@united-workforce/eval` package scaffold + CLI skeleton + OCAS schemas
+2. **Phase 1b**: `run` command — prepare, execute, collect flow
+3. **Phase 1c**: Builtin judges — frontmatter (deterministic), upstream + hallucination (LLM-as-judge)
+4. **Phase 2a**: Create `shazhou/uwf-eval-tasks` monorepo with proman
+5. **Phase 2b**: First task `fix-off-by-one` with fixture repo + 2 custom judges
+6. **Phase 2c**: End-to-end test: `uwf-eval run packages/fix-off-by-one --agent hermes`
+7. **Phase 1d**: `report`, `diff`, `list` commands (read from CAS, render via ocas render)
+
+## Dependencies
+
+- `@ocas/core` + `@ocas/fs` — CAS storage
+- `@united-workforce/protocol` — step node types
+- `commander` — CLI framework (consistent with uwf)
+- LLM API access — for LLM-as-judge (upstream, hallucination, task-specific quality judges)
+
+## Open Questions
+
+1. **LLM-as-judge provider config** — reuse uwf's `~/.uwf/config.yaml` provider settings? Or separate config?
+2. **Workflow file location** — task.yaml references a workflow. Should the workflow YAML be inside the tarball, or reference a registered workflow by name?
+3. **Non-coding tasks** — debate workflow has no fixture repo. task.yaml needs `fixture: null` or simply omit the `fixture/` dir. Runner creates empty cwd.
+4. **Parallel judge execution** — judges are independent, can run in parallel. Worth the complexity?
+
+## Risks
+
+- LLM-as-judge consistency — same input may get different scores. Mitigation: run judge multiple times, take average? Or accept variance.
+- Token cost of judges — each LLM judge call costs tokens. For a 10-step workflow with 2 LLM judges = 20 LLM calls just for judging. Acceptable?
+- Fixture repo drift — if the fixture evolves, old eval runs become non-comparable. Pin fixture version in task.yaml.
@@ -264,7 +264,8 @@ roles:

 graph:
  $START:
-    _: { role: "bootstrap", prompt: "Set up the Docker container and verify uwf is runnable." }
+    new: { role: "bootstrap", prompt: "Set up the Docker container and verify uwf is runnable." }
+    resume: { role: "bootstrap", prompt: "Review the previous run output and continue the walkthrough." }
  bootstrap:
    pass: { role: "config-and-registry", prompt: "Container {{{containerName}}} is ready. Validate config and workflow registration." }
    fail: { role: "$END", prompt: "Bootstrap failed: {{{error}}}. No container was created." }
@@ -227,7 +227,8 @@ roles:
          required: [$status, error]
 graph:
  $START:
-    _: { role: "planner", prompt: "Analyze the issue and produce an implementation plan." }
+    new: { role: "planner", prompt: "Analyze the issue and produce an implementation plan." }
+    resume: { role: "planner", prompt: "Review the previous run output and continue the work." }
  planner:
    insufficient_info: { role: "$SUSPEND", prompt: "信息不足，需要补充：{{{reason}}}" }
    ready: { role: "developer", prompt: "Implement the TDD test spec (CAS hash: {{{plan}}}) in repo {{{repoPath}}}. Repo remote: {{{repoRemote}}}." }
@@ -0,0 +1,25 @@
+# Changelog
+
+## 0.1.0 (2026-06-05)
+
+Initial release of `@united-workforce/*` — a stateless workflow engine for AI agent orchestration.
+
+### Packages
+
+- **@united-workforce/protocol** — shared types (WorkflowPayload, StepNode, etc.)
+- **@united-workforce/util** — Crockford Base32, ULID, structured logger, frontmatter parsing
+- **@united-workforce/util-agent** — agent factory, context builder, extract pipeline
+- **@united-workforce/cli** — `uwf` CLI (thread lifecycle, status-based moderator, workflow registry)
+- **@united-workforce/eval** — `uwf-eval` CLI (prepare → execute → collect eval pipeline)
+- **@united-workforce/agent-hermes** — `uwf-hermes` adapter (Hermes Agent)
+- **@united-workforce/agent-claude-code** — `uwf-claude-code` adapter (Claude Code CLI)
+- **@united-workforce/agent-builtin** — `uwf-builtin` adapter (built-in LLM agent)
+- **@united-workforce/agent-mock** — `uwf-mock` adapter (deterministic test agent)
+
+### Highlights
+
+- Status-based graph routing (no LLM moderator cost)
+- CAS-backed immutable thread chains (`@ocas/core`)
+- Real token usage tracking (Hermes + Claude Code)
+- Eval framework with built-in judges (frontmatter, token-stats, test-pass)
+- `$SUSPEND` / resume for human-in-the-loop workflows
@@ -222,41 +222,42 @@ Test files (`__tests__/**`) are exempt.

 | Tool | Purpose |
 |------|---------|
-| **bun** | Package manager + runtime |
+| **pnpm** | Package manager |
 | **TypeScript** | Type checking (strict mode) |
 | **Biome** | Lint + format (replaces ESLint + Prettier) |
-| **vitest** | Test runner (`cli` uses vitest; other packages use `bun test`) |
+| **vitest** | Test runner (all packages) |

 ### Development Workflow

 ```bash
 # ── Setup ──
-bun install                 # install all workspace dependencies
+pnpm install                # install all workspace dependencies

 # ── Daily development ──
-bun run build               # tsc --build (all packages, dependency order)
-bun run check               # tsc --build + biome check + lint-log-tags
-bun run format              # biome format --write
-bun test                    # run tests across all packages
+pnpm run build              # build all packages (dependency order)
+pnpm run check              # biome check + lint-log-tags
+pnpm run typecheck          # tsc --build
+pnpm run test               # run tests across all packages

 # ── Before committing ──
-bun run check               # must pass — typecheck + lint + log tag validation
-bun test                    # must pass — all package tests
+pnpm run check              # must pass — lint + log tag validation
+pnpm run typecheck          # must pass — type checking
+pnpm run test               # must pass — all package tests
 ```

 ### Publishing

-All public `@united-workforce/*` packages are published to **npmjs.org** with **fixed mode** (all packages share the same version number).
+All public `@united-workforce/*` packages are published to **npmjs.org** with **independent versioning**.

 ```bash
 # 1. Add a changeset describing the change
-bun changeset
+npx changeset

-# 2. Bump all package versions + generate CHANGELOGs
-bun version
+# 2. Bump versions + generate CHANGELOGs
+proman bump

-# 3. Build, test, and publish (runs scripts/publish-all.mjs)
-bun release
+# 3. Build, test, and publish
+proman publish

 # Or publish manually with a tag:
 node scripts/publish-all.mjs --tag alpha
@@ -265,7 +266,7 @@ node scripts/publish-all.mjs --dry-run    # preview without publishing

 - `workspace:^` dependencies resolve to `^x.y.z` on publish
 - Publish order defined in `scripts/publish-all.mjs` (dependency order)
- Changesets config: `.changeset/config.json` (fixed mode, public access)
+- Changesets config: `.changeset/config.json` (independent versioning, public access)

 ### End-to-end: Author → Register → Run

@@ -470,7 +470,7 @@ Use the `ocas` CLI for direct CAS operations (`~/.ocas/` store, shared with `uwf

 | Tool | Purpose |
 |------|---------|
-| **bun** | Package manager + runtime |
+| **pnpm** | Package manager |
 | **TypeScript** | Type checking (strict mode) |
 | **Biome** | Lint + format |
 | **vitest** | Test runner |
@@ -17,7 +17,7 @@ The root README should have these sections in order:
 4. **Packages** — table with ALL packages from packages/ directory, columns: Package, Description, Type (cli/lib/agent/app)
 5. **Quick Start** — install, build, register workflow, start thread, run step
 6. **CLI Reference** — brief command list, detailed usage in cli README
-7. **Development** — bun install / build / check / test
+7. **Development** — pnpm install / build / check / test

 ## Per-Package README Structure

@@ -26,7 +26,7 @@ Each package README should have:
 1. **Title** — package name
 2. **One-line description** — matching package.json
 3. **Overview** — what it does, where it sits in the architecture, dependencies
-4. **Installation** — bun add (for libs) or "included as binary" (for cli/agents)
+4. **Installation** — pnpm add (for libs) or "included as binary" (for cli/agents)
 5. **API** (lib packages) — all exports from src/index.ts with type signatures, grouped by category, minimal usage examples
 6. **CLI Usage** (cli/agent packages) — command reference with examples
 7. **Internal Structure** — brief src/ file organization
@@ -56,7 +56,7 @@ For each package read:
 - All relative links work
 - Package names match package.json
 - No references to removed/renamed packages
- bun run build still passes
+- pnpm run build still passes

 ## Guidelines

@@ -200,7 +200,7 @@ payload:

 - `roles` — 内联定义，每个 role 的 `meta` 是独立的 ocas_ref（指向 ocas 内置 JSON Schema 节点）
 - `graph` — `Record<Role | "$START", Record<Status, Target>>`，每个 Target = `{ role, prompt }`
- Status 来自上一个 role 输出的 `status` 字段，`$START` 用 `_` 作为初始 status
+- Status 来自上一个 role 输出的 `$status` 字段，`$START` 使用 `new`（首次启动）和 `resume`（恢复已完成的 thread）作为 status
 - Prompt 模板使用 Mustache 渲染，变量来自 lastOutput
 - 不含 agent binding — agent 配置在 `~/.uwf/config.yaml` 中管理

@@ -208,7 +208,7 @@ Moderator 的求值逻辑：

 ```typescript
 evaluate(graph, lastRole, lastOutput) → { role, prompt }
-// 1. status = lastRole === "$START" ? "_" : lastOutput.status
+// 1. status = lastOutput.$status (e.g. "new" for $START first run, "resume" for completed thread resume)
 // 2. target = graph[lastRole][status]
 // 3. prompt = mustache.render(target.prompt, lastOutput)
 ```
@@ -422,8 +422,8 @@ type StepNodePayload = StepRecord & {
 Moderator 使用 `evaluate(graph, lastRole, lastOutput)` 进行同步 status-based routing：

 ```typescript
-// graph[lastRole][lastOutput.status] → Target { role, prompt }
-// $START 角色使用 "_" 作为初始 status
+// graph[lastRole][lastOutput.$status] → Target { role, prompt }
+// $START 使用 "new"（首次启动）和 "resume"（恢复已完成 thread）作为 status
 // prompt 通过 Mustache 模板渲染，变量来自 lastOutput
 ```

@@ -23,7 +23,7 @@ roles:
      type: object
      properties:
        $status:
-          enum: ["_"]
+          enum: ["done"]
        thesis:
          type: string
        keyPoints:
@@ -35,6 +35,7 @@ roles:
      required: [$status, thesis, keyPoints]
 graph:
  $START:
-    _: { role: "analyst", prompt: "Analyze the topic in the task and produce a structured summary with key points." }
+    new: { role: "analyst", prompt: "Analyze the topic in the task and produce a structured summary with key points." }
+    resume: { role: "analyst", prompt: "Review the previous analysis output and continue with additional context." }
  analyst:
-    _: { role: "$END", prompt: "Analysis complete. Finish the workflow." }
+    done: { role: "$END", prompt: "Analysis complete. Finish the workflow." }
@@ -53,7 +53,8 @@ roles:
      required: [$status, argument]
 graph:
  $START:
-    _: { role: "against", prompt: "Present your opening argument against the proposition." }
+    new: { role: "against", prompt: "Present your opening argument against the proposition." }
+    resume: { role: "against", prompt: "Review the previous debate output and continue the argument against the proposition." }
  against:
    conceded: { role: "$END", prompt: "The against side conceded. Debate over." }
    continue: { role: "for", prompt: "Counter the opposing argument: {{{argument}}}" }
@@ -0,0 +1,31 @@
+name: eval-simple
+description: "Single-role eval workflow: fixer takes prompt, fixes code, done."
+roles:
+  fixer:
+    description: "Fixes the code based on the prompt"
+    goal: |
+      You are a code fixer. Read the prompt, understand the bug, fix it, and verify by running the tests.
+    capabilities:
+      - code-editing
+      - test-running
+    procedure: |
+      1. Read the prompt to understand what needs to be fixed
+      2. Fix the bug in the source code
+      3. Run the tests mentioned in the prompt to verify
+      4. Output $status=done when tests pass
+    output: "Describe what you fixed and confirm tests pass. Set $status to done."
+    frontmatter:
+      type: object
+      properties:
+        $status:
+          type: string
+          enum: [done]
+        summary:
+          type: string
+      required: [$status, summary]
+graph:
+  $START:
+    new: { role: "fixer", prompt: "Fix the code issue described in the task prompt." }
+    resume: { role: "fixer", prompt: "Review the previous run output and continue fixing the code issue." }
+  fixer:
+    done: { role: "$END", prompt: "Fix complete." }
@@ -215,7 +215,8 @@ roles:
          required: [$status, error]
 graph:
  $START:
-    _: { role: "planner", prompt: "Analyze the issue and produce an implementation plan." }
+    new: { role: "planner", prompt: "Analyze the issue and produce an implementation plan." }
+    resume: { role: "planner", prompt: "Review the previous run output and continue the work." }
  planner:
    insufficient_info: { role: "$SUSPEND", prompt: "信息不足，需要补充：{{{reason}}}" }
    ready: { role: "developer", prompt: "Implement the TDD test spec (CAS hash: {{{plan}}}) in repo {{{repoPath}}}." }
@@ -1,8 +1,8 @@
 import { mkdtemp, rm } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
-import { afterEach, beforeEach, describe, expect, test } from "vitest";
 import { createMemoryStore } from "@ocas/core";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
 import { storeBuiltinDetail } from "../src/detail.js";
 import { appendSessionTurn, initSessionDir } from "../src/session.js";
 import type { BuiltinTurnPayload } from "../src/types.js";
@@ -1,8 +1,8 @@
-import { describe, it, expect, beforeAll, afterAll } from "vitest";
-import { readFileTool } from "../src/tools/read-file.js";
-import { writeFile, mkdir, rm } from "node:fs/promises";
-import { join } from "node:path";
+import { mkdir, rm, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterAll, beforeAll, describe, expect, it } from "vitest";
+import { readFileTool } from "../src/tools/read-file.js";

 const testDir = join(tmpdir(), `read-file-test-${Date.now()}`);
 const ctx = { cwd: testDir, storageRoot: testDir };
@@ -1,6 +1,6 @@
-import { describe, it, expect } from "vitest";
-import { runCommandTool } from "../src/tools/run-command.js";
 import { tmpdir } from "node:os";
+import { describe, expect, it } from "vitest";
+import { runCommandTool } from "../src/tools/run-command.js";

 const ctx = { cwd: tmpdir(), storageRoot: tmpdir() };

@@ -3,13 +3,13 @@ import { mkdtemp, rm } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterEach, beforeEach, describe, expect, test } from "vitest";
-import type { BuiltinTurnPayload } from "../src/types.js";
 import {
  appendSessionTurn,
  initSessionDir,
  readSessionTurns,
  removeSession,
 } from "../src/session.js";
+import type { BuiltinTurnPayload } from "../src/types.js";

 describe("session", () => {
  let storageRoot: string;
@@ -1,8 +1,8 @@
-import { describe, it, expect, afterAll } from "vitest";
-import { writeFileTool } from "../src/tools/write-file.js";
 import { readFile, rm } from "node:fs/promises";
-import { join } from "node:path";
 import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterAll, describe, expect, it } from "vitest";
+import { writeFileTool } from "../src/tools/write-file.js";

 const testDir = join(tmpdir(), `write-file-test-${Date.now()}`);
 const ctx = { cwd: testDir, storageRoot: testDir };
@@ -1,6 +1,6 @@
 {
  "name": "@united-workforce/agent-builtin",
-  "version": "0.5.0",
+  "version": "0.1.2",
  "files": [
    "src",
    "dist",
@@ -8,7 +8,7 @@
  ],
  "type": "module",
  "bin": {
-    "uwf-builtin": "./src/cli.ts"
+    "uwf-builtin": "./dist/cli.js"
  },
  "exports": {
    ".": {
@@ -17,7 +17,6 @@
    }
  },
  "scripts": {
-    "prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1",
    "test": "vitest run __tests__/",
    "test:ci": "vitest run __tests__/"
  },
@@ -82,7 +82,13 @@ async function runBuiltinWithMessages(

  if (loopResult.turnCount === 0) {
    log("5RWTK9NB", "no turns produced, returning empty output");
-    return { output: "", detailHash: "", sessionId: session.sessionId, assembledPrompt: "" };
+    return {
+      output: "",
+      detailHash: "",
+      sessionId: session.sessionId,
+      assembledPrompt: "",
+      usage: null,
+    };
  }

  // Read jsonl → persist turns to CAS → store detail
@@ -99,6 +105,7 @@ async function runBuiltinWithMessages(
    detailHash,
    sessionId: session.sessionId,
    assembledPrompt: "",
+    usage: null,
  };
 }

@@ -1,4 +1,11 @@
-#!/usr/bin/env node
+#!/usr/bin/env -S node --disable-warning=ExperimentalWarning
+
+// eslint-disable-next-line -- dynamic import for version
+const pkg = await import("../package.json", { with: { type: "json" } });
+if (process.argv.includes("--version") || process.argv.includes("-V")) {
+  process.stdout.write(`${pkg.default.version}\n`);
+  process.exit(0);
+}

 import { createBuiltinAgent } from "./agent.js";

@@ -1,6 +1,6 @@
 {
  "name": "@united-workforce/agent-claude-code",
-  "version": "0.1.0",
+  "version": "0.1.2",
  "files": [
    "src",
    "dist",
@@ -8,7 +8,7 @@
  ],
  "type": "module",
  "bin": {
-    "uwf-claude-code": "./src/cli.ts"
+    "uwf-claude-code": "./dist/cli.js"
  },
  "exports": {
    ".": {
@@ -17,12 +17,12 @@
    }
  },
  "scripts": {
-    "prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1",
    "test": "vitest run __tests__/",
    "test:ci": "vitest run __tests__/"
  },
  "dependencies": {
    "@ocas/core": "^0.3.0",
+    "@united-workforce/protocol": "workspace:^",
    "@united-workforce/util": "workspace:^",
    "@united-workforce/util-agent": "workspace:^"
  },
@@ -1,5 +1,6 @@
 import { spawn } from "node:child_process";
 import type { Store } from "@ocas/core";
+import type { Usage } from "@united-workforce/protocol";
 import { createLogger } from "@united-workforce/util";
 import {
  type AgentContext,
@@ -145,7 +146,14 @@ async function processClaudeOutput(
      );
    }

-    return { output, detailHash, sessionId, assembledPrompt };
+    const usage: Usage = {
+      turns: parsed.numTurns,
+      inputTokens: parsed.usage.inputTokens,
+      outputTokens: parsed.usage.outputTokens,
+      duration: Math.round(parsed.durationMs / 1000),
+    };
+
+    return { output, detailHash, sessionId, assembledPrompt, usage };
  }

  // Truly unparseable output - provide enhanced error message
@@ -1,4 +1,11 @@
-#!/usr/bin/env node
+#!/usr/bin/env -S node --disable-warning=ExperimentalWarning
+
+// eslint-disable-next-line -- dynamic import for version
+const pkg = await import("../package.json", { with: { type: "json" } });
+if (process.argv.includes("--version") || process.argv.includes("-V")) {
+  process.stdout.write(`${pkg.default.version}\n`);
+  process.exit(0);
+}

 import { createClaudeCodeAgent } from "./claude-code.js";

@@ -2,5 +2,5 @@
  "extends": "../../tsconfig.json",
  "compilerOptions": { "rootDir": "src", "outDir": "dist" },
  "include": ["src"],
-  "references": [{ "path": "../util-agent" }]
+  "references": [{ "path": "../protocol" }, { "path": "../util-agent" }]
 }
@@ -0,0 +1,18 @@
+# @united-workforce/agent-hermes
+
+## 0.1.1
+
+### Patch Changes
+
+- 8085d1d: fix: read token usage from ACP PromptResponse instead of DB
+
+  Token counts (inputTokens, outputTokens) now come from the ACP
+  `PromptResponse.usage` field, which is populated synchronously from
+  `run_conversation()` return data — no WAL race condition.
+
+  Turns (assistant message count) still come from the DB via
+  `snapshotTurns()` before/after delta.
+
+  Previously both tokens and turns were read from the Hermes state DB
+  after the ACP prompt returned, but due to WAL write lag the DB often
+  had incomplete token data at read time (e.g. 235 vs actual 26,080).
@@ -1,55 +0,0 @@
-import { afterEach, beforeEach, describe, expect, it } from "vitest";
-import { HermesAcpClient } from "../../src/acp-client.js";
-
-const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
-
-describe("HermesAcpClient", () => {
-  let client: HermesAcpClient;
-
-  beforeEach(() => {
-    client = new HermesAcpClient();
-  });
-
-  afterEach(async () => {
-    await client.close();
-  });
-
-  it(
-    "connect() returns a UUID sessionId",
-    async () => {
-      const sessionId = await client.connect(process.cwd());
-      expect(typeof sessionId).toBe("string");
-      expect(sessionId).toMatch(UUID_RE);
-    },
-    { timeout: 2 * 60 * 1000 },
-  );
-
-  it(
-    "prompt() returns a non-empty text response",
-    async () => {
-      await client.connect(process.cwd());
-      const result = await client.prompt("Reply with exactly the word: PONG");
-      expect(typeof result.text).toBe("string");
-      expect(result.text.length).toBeGreaterThan(0);
-      expect(typeof result.sessionId).toBe("string");
-      expect(result.sessionId).toMatch(UUID_RE);
-    },
-    { timeout: 2 * 60 * 1000 },
-  );
-
-  it(
-    "prompt() can be called twice on the same session (resume)",
-    async () => {
-      await client.connect(process.cwd());
-
-      const first = await client.prompt("Say the word ALPHA and nothing else.");
-      expect(first.text.length).toBeGreaterThan(0);
-
-      const second = await client.prompt("Now say the word BETA and nothing else.");
-      expect(second.text.length).toBeGreaterThan(0);
-
-      expect(first.sessionId).toBe(second.sessionId);
-    },
-    { timeout: 2 * 60 * 1000 },
-  );
-});
@@ -1,56 +0,0 @@
-import { afterEach, describe, expect, it } from "vitest";
-import { HermesAcpClient } from "../../src/acp-client.js";
-
-/**
- * E2E test for cross-process session resume.
- *
- * Simulates the workflow re-entry scenario:
- * 1. Client A: connect → prompt → close (developer first run)
- * 2. Client B: resume(sessionId) → prompt (developer re-entry after reviewer reject)
- *
- * This is what happens when uwf thread step spawns uwf-hermes twice for the same role.
- */
-describe("HermesAcpClient cross-process resume", () => {
-  const clients: HermesAcpClient[] = [];
-
-  afterEach(async () => {
-    for (const c of clients) {
-      await c.close();
-    }
-    clients.length = 0;
-  });
-
-  // TODO(#435): flaky — depends on live LLM; mock or move to integration suite
-  it.skip(
-    "resume() after close — second prompt returns non-empty text",
-    async () => {
-      // --- Client A: first run ---
-      const clientA = new HermesAcpClient();
-      clients.push(clientA);
-
-      await clientA.connect(process.cwd());
-      const first = await clientA.prompt(
-        "Remember the secret code: WATERMELON. Reply with exactly: ACKNOWLEDGED",
-      );
-      expect(first.text.length).toBeGreaterThan(0);
-      const sessionId = first.sessionId;
-
-      // Close client A (simulates uwf-hermes process exit)
-      await clientA.close();
-
-      // --- Client B: resume (simulates re-entry) ---
-      const clientB = new HermesAcpClient();
-      clients.push(clientB);
-
-      await clientB.resume(sessionId, process.cwd());
-      const second = await clientB.prompt(
-        "What was the secret code I told you earlier? Reply with just the code word.",
-      );
-
-      // The critical assertion: resumed session produces non-empty output
-      expect(second.text.length).toBeGreaterThan(0);
-      expect(second.sessionId).toBe(sessionId);
-    },
-    { timeout: 3 * 60 * 1000 },
-  );
-});
@@ -15,7 +15,8 @@ describe("Issue #551 — bin entry & engines", () => {
    const pkg = JSON.parse(readFileSync(join(PKG_ROOT, "package.json"), "utf-8"));
    const binPath = pkg.bin["uwf-hermes"];
    const content = readFileSync(join(PKG_ROOT, binPath), "utf-8");
-    expect(content.startsWith("#!/usr/bin/env node")).toBe(true);
+    expect(content.startsWith("#!/usr/bin/env")).toBe(true);
+    expect(content).toContain("node");
  });

  test("README.md explains uwf-hermes is an adapter", () => {
@@ -140,7 +140,9 @@ function createTestDb(dbPath: string): TestDb {
  db.exec(`CREATE TABLE sessions (
    id TEXT PRIMARY KEY,
    model TEXT NOT NULL,
-    started_at INTEGER NOT NULL
+    started_at INTEGER NOT NULL,
+    input_tokens INTEGER DEFAULT 0,
+    output_tokens INTEGER DEFAULT 0
  )`);
  db.exec(`CREATE TABLE messages (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -0,0 +1,122 @@
+import { describe, expect, test } from "vitest";
+import type { AcpUsage } from "../src/acp-client.js";
+import { buildUsage, snapshotTurns } from "../src/hermes.js";
+import type { HermesSessionJson } from "../src/types.js";
+
+function makeSession(overrides: Partial<HermesSessionJson> = {}): HermesSessionJson {
+  return {
+    session_id: "test-session",
+    model: "test-model",
+    session_start: "2026-01-01T00:00:00Z",
+    messages: [],
+    inputTokens: 0,
+    outputTokens: 0,
+    ...overrides,
+  };
+}
+
+describe("snapshotTurns", () => {
+  test("returns zero for null session", () => {
+    const result = snapshotTurns(null);
+    expect(result).toEqual({ turns: 0 });
+  });
+
+  test("returns zero for empty session", () => {
+    const result = snapshotTurns(makeSession());
+    expect(result).toEqual({ turns: 0 });
+  });
+
+  test("counts assistant messages as turns", () => {
+    const result = snapshotTurns(
+      makeSession({
+        messages: [
+          { role: "user", content: "hello", reasoning: null, tool_calls: null },
+          { role: "assistant", content: "hi", reasoning: null, tool_calls: null },
+          { role: "user", content: "do X", reasoning: null, tool_calls: null },
+          { role: "tool", content: "result", reasoning: null, tool_calls: null },
+          { role: "assistant", content: "done", reasoning: null, tool_calls: null },
+        ],
+        inputTokens: 1000,
+        outputTokens: 500,
+      }),
+    );
+    expect(result).toEqual({ turns: 2 });
+  });
+
+  test("ignores non-assistant messages for turn count", () => {
+    const result = snapshotTurns(
+      makeSession({
+        messages: [
+          { role: "user", content: "hello", reasoning: null, tool_calls: null },
+          { role: "tool", content: "result", reasoning: null, tool_calls: null },
+        ],
+      }),
+    );
+    expect(result.turns).toBe(0);
+  });
+});
+
+describe("buildUsage", () => {
+  const acpUsage: AcpUsage = { inputTokens: 5000, outputTokens: 2000, totalTokens: 7000 };
+
+  test("first visit: tokens from ACP, turns from DB delta", () => {
+    const beforeTurns = { turns: 0 };
+    const afterTurns = { turns: 3 };
+    const result = buildUsage(acpUsage, beforeTurns, afterTurns, 12.5);
+    expect(result).toEqual({
+      turns: 3,
+      inputTokens: 5000,
+      outputTokens: 2000,
+      duration: 13,
+    });
+  });
+
+  test("re-entry: turn delta computed correctly, tokens from ACP", () => {
+    const beforeTurns = { turns: 2 };
+    const afterTurns = { turns: 4 };
+    const acpDelta: AcpUsage = { inputTokens: 8000, outputTokens: 3500, totalTokens: 11500 };
+    const result = buildUsage(acpDelta, beforeTurns, afterTurns, 7.3);
+    expect(result).toEqual({
+      turns: 2,
+      inputTokens: 8000,
+      outputTokens: 3500,
+      duration: 7,
+    });
+  });
+
+  test("floors negative turn deltas at 0, then defaults to 1", () => {
+    const beforeTurns = { turns: 5 };
+    const afterTurns = { turns: 3 };
+    const result = buildUsage(acpUsage, beforeTurns, afterTurns, 1.0);
+    // turns would be negative (-2), floored to 0, then || 1 gives 1
+    expect(result.turns).toBe(1);
+  });
+
+  test("zero turns delta defaults to 1 (at least one turn happened)", () => {
+    const beforeTurns = { turns: 3 };
+    const afterTurns = { turns: 3 };
+    const result = buildUsage(acpUsage, beforeTurns, afterTurns, 5.0);
+    // turns delta is 0, || 1 gives 1
+    expect(result.turns).toBe(1);
+  });
+
+  test("null ACP usage yields zero tokens", () => {
+    const beforeTurns = { turns: 0 };
+    const afterTurns = { turns: 2 };
+    const result = buildUsage(null, beforeTurns, afterTurns, 10.0);
+    expect(result).toEqual({
+      turns: 2,
+      inputTokens: 0,
+      outputTokens: 0,
+      duration: 10,
+    });
+  });
+
+  test("duration is rounded", () => {
+    const beforeTurns = { turns: 0 };
+    const afterTurns = { turns: 1 };
+    expect(buildUsage(acpUsage, beforeTurns, afterTurns, 3.7).duration).toBe(4);
+    expect(buildUsage(acpUsage, beforeTurns, afterTurns, 3.2).duration).toBe(3);
+    expect(buildUsage(acpUsage, beforeTurns, afterTurns, 0.0).duration).toBe(0);
+  });
+});
@@ -1,6 +1,6 @@
 {
  "name": "@united-workforce/agent-hermes",
-  "version": "0.5.0",
+  "version": "0.1.3",
  "files": [
    "src",
    "dist",
@@ -8,7 +8,7 @@
  ],
  "type": "module",
  "bin": {
-    "uwf-hermes": "./src/cli.ts"
+    "uwf-hermes": "./dist/cli.js"
  },
  "exports": {
    ".": {
@@ -17,7 +17,6 @@
    }
  },
  "scripts": {
-    "prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1",
    "test": "vitest run __tests__/",
    "test:ci": "vitest run __tests__/"
  },
@@ -1,6 +1,16 @@
 import type { ChildProcess } from "node:child_process";
 import { spawn } from "node:child_process";
+import { readFileSync } from "node:fs";
+import { dirname, join } from "node:path";
 import { createInterface } from "node:readline";
+import { fileURLToPath } from "node:url";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const OWN_VERSION = (
+  JSON.parse(readFileSync(join(__dirname, "..", "package.json"), "utf-8")) as {
+    version: string;
+  }
+).version;

 const HERMES_COMMAND = "hermes";
 const PROTOCOL_VERSION = 1;
@@ -17,9 +27,17 @@ type PendingRequest = {
  reject: (reason: Error) => void;
 };

+/** Token usage returned by ACP PromptResponse. */
+export type AcpUsage = {
+  inputTokens: number;
+  outputTokens: number;
+  totalTokens: number;
+};
+
 export type AcpPromptResult = {
  text: string;
  sessionId: string;
+  usage: AcpUsage | null;
 };

 export class HermesAcpClient {
@@ -72,6 +90,11 @@ export class HermesAcpClient {
    return sessionId;
  }

+  /** Return the current session ID, or null if not connected. */
+  getSessionId(): string | null {
+    return this.sessionId;
+  }
+
  /** Send prompt and collect final assistant text from ACP stream chunks. */
  async prompt(text: string): Promise<AcpPromptResult> {
    if (this.sessionId === null) {
@@ -91,9 +114,25 @@ export class HermesAcpClient {
      );
    }

+    // Extract token usage from ACP PromptResponse.result.usage (camelCase wire format)
+    const result = (response as { result?: Record<string, unknown> }).result;
+    const rawUsage = result?.usage as Record<string, unknown> | undefined;
+    const usage: AcpUsage | null =
+      rawUsage !== undefined &&
+      typeof rawUsage.inputTokens === "number" &&
+      typeof rawUsage.outputTokens === "number" &&
+      typeof rawUsage.totalTokens === "number"
+        ? {
+            inputTokens: rawUsage.inputTokens,
+            outputTokens: rawUsage.outputTokens,
+            totalTokens: rawUsage.totalTokens,
+          }
+        : null;
+
    return {
      text: this.messageChunks.join(""),
      sessionId: this.sessionId,
+      usage,
    };
  }

@@ -270,7 +309,7 @@ export class HermesAcpClient {
  private async initialize(): Promise<void> {
    const initResponse = await this.sendRequest("initialize", {
      protocolVersion: PROTOCOL_VERSION,
-      clientInfo: { name: "uwf", version: "0.1.0" },
+      clientInfo: { name: "uwf-hermes", version: OWN_VERSION },
      capabilities: {},
    });

@@ -1,4 +1,11 @@
-#!/usr/bin/env node
+#!/usr/bin/env -S node --disable-warning=ExperimentalWarning
+
+// eslint-disable-next-line -- dynamic import for version
+const pkg = await import("../package.json", { with: { type: "json" } });
+if (process.argv.includes("--version") || process.argv.includes("-V")) {
+  process.stdout.write(`${pkg.default.version}\n`);
+  process.exit(0);
+}

 import { createHermesAgent } from "./hermes.js";
 import { isResumeDisabled } from "./session-cache.js";
@@ -1,4 +1,5 @@
 import type { Store } from "@ocas/core";
+import type { Usage } from "@united-workforce/protocol";
 import { createLogger } from "@united-workforce/util";
 import {
  type AgentContext,
@@ -7,13 +8,50 @@ import {
  buildRolePrompt,
  createAgent,
 } from "@united-workforce/util-agent";
-
+import type { AcpUsage } from "./acp-client.js";
 import { HermesAcpClient } from "./acp-client.js";
 import { getCachedSessionId, setCachedSessionId } from "./session-cache.js";
 import { loadHermesSession, storeHermesSessionDetail } from "./session-detail.js";
+import type { HermesSessionJson } from "./types.js";

 const log = createLogger({ sink: { kind: "stderr" } });

+/** Snapshot of session metrics taken before and after a prompt call. */
+type TurnsSnapshot = {
+  turns: number;
+};
+
+const ZERO_TURNS: TurnsSnapshot = { turns: 0 };
+
+/** Extract assistant turn count from a session. Returns zero for null sessions. */
+export function snapshotTurns(session: HermesSessionJson | null): TurnsSnapshot {
+  if (session === null) {
+    return ZERO_TURNS;
+  }
+  return {
+    turns: session.messages.filter((m) => m.role === "assistant").length,
+  };
+}
+
+/**
+ * Build Usage from ACP token data + DB turn delta.
+ * Tokens come from ACP PromptResponse (synchronous, accurate).
+ * Turns come from DB before/after snapshots (may have WAL lag, but acceptable).
+ */
+export function buildUsage(
+  acpUsage: AcpUsage | null,
+  beforeTurns: TurnsSnapshot,
+  afterTurns: TurnsSnapshot,
+  durationSec: number,
+): Usage {
+  return {
+    turns: Math.max(0, afterTurns.turns - beforeTurns.turns) || 1,
+    inputTokens: acpUsage?.inputTokens ?? 0,
+    outputTokens: acpUsage?.outputTokens ?? 0,
+    duration: Math.round(durationSec),
+  };
+}
+
 /** Assemble system prompt, task, and prior step outputs for Hermes. */
 export function buildHermesPrompt(ctx: AgentContext): string {
  const parts: string[] = [];
@@ -108,25 +146,45 @@ export function createHermesAgent(resumeDisabled: boolean): () => Promise<void>
    void client.close();
  });

-  async function runPrompt(ctx: AgentContext, useContinuation: boolean): Promise<AgentRunResult> {
+  async function runPrompt(
+    ctx: AgentContext,
+    useContinuation: boolean,
+    beforeTurns: TurnsSnapshot,
+  ): Promise<AgentRunResult> {
    const effectiveCtx = useContinuation ? ctx : { ...ctx, isFirstVisit: true };
    const fullPrompt = buildHermesPrompt(effectiveCtx);
-    const { text, sessionId } = await client.prompt(fullPrompt);
+    const startMs = Date.now();
+    const { text, sessionId, usage: acpUsage } = await client.prompt(fullPrompt);
+    const durationSec = (Date.now() - startMs) / 1000;
    const { detailHash } = await storePromptResult(ctx.store, sessionId);

    if (!resumeDisabled) {
      await setCachedSessionId(ctx.threadId, ctx.role, sessionId, ctx.storageRoot);
    }

-    return { output: text, detailHash, sessionId, assembledPrompt: fullPrompt };
+    // Turns from DB (may lag slightly due to WAL, but acceptable)
+    const afterSession = await loadHermesSession(sessionId);
+    const afterTurns = snapshotTurns(afterSession);
+    const usage = buildUsage(acpUsage, beforeTurns, afterTurns, durationSec);
+
+    return { output: text, detailHash, sessionId, assembledPrompt: fullPrompt, usage };
  }

  async function runHermes(ctx: AgentContext): Promise<AgentRunResult> {
    const cwd = process.cwd();
    const attempt = await prepareSession(client, ctx, cwd, resumeDisabled);

+    // Snapshot before prompt: for resumed sessions, captures cumulative state
+    // so we can compute the turn delta. For new sessions, this is ZERO_TURNS.
+    const currentSessionId = client.getSessionId();
+    const beforeSession =
+      attempt.resumed && currentSessionId !== null
+        ? await loadHermesSession(currentSessionId)
+        : null;
+    const beforeTurns = snapshotTurns(beforeSession);
+
    try {
-      return await runPrompt(ctx, attempt.useContinuation);
+      return await runPrompt(ctx, attempt.useContinuation, beforeTurns);
    } catch (error) {
      if (!attempt.resumed) {
        throw error;
@@ -136,7 +194,8 @@ export function createHermesAgent(resumeDisabled: boolean): () => Promise<void>
      log("8FQW2R6N", `continuation prompt failed, retrying with initial prompt: ${message}`);
      await client.close();
      await client.connect(cwd);
-      return runPrompt(ctx, false);
+      // Fresh session after retry — reset snapshot to zero
+      return runPrompt(ctx, false, ZERO_TURNS);
    }
  }

@@ -147,9 +206,22 @@ export function createHermesAgent(resumeDisabled: boolean): () => Promise<void>
  ): Promise<AgentRunResult> {
    // Client is already connected from runHermes — same ACP session,
    // so the agent sees the full conversation history (crucial for retries).
-    const { text, sessionId } = await client.prompt(message);
+    // Snapshot turns before the continuation prompt for delta computation.
+    const currentSessionId = client.getSessionId();
+    const beforeSession =
+      currentSessionId !== null ? await loadHermesSession(currentSessionId) : null;
+    const beforeTurns = snapshotTurns(beforeSession);
+
+    const startMs = Date.now();
+    const { text, sessionId, usage: acpUsage } = await client.prompt(message);
+    const durationSec = (Date.now() - startMs) / 1000;
    const { detailHash } = await storePromptResult(store, sessionId);
-    return { output: text, detailHash, sessionId, assembledPrompt: "" };
+
+    const afterSession = await loadHermesSession(sessionId);
+    const afterTurns = snapshotTurns(afterSession);
+    const usage = buildUsage(acpUsage, beforeTurns, afterTurns, durationSec);
+
+    return { output: text, detailHash, sessionId, assembledPrompt: "", usage };
  }

  const agentMain = createAgent({
@@ -1,2 +1,8 @@
+export type { AcpUsage } from "./acp-client.js";
 export { HermesAcpClient } from "./acp-client.js";
-export { buildHermesPrompt, createHermesAgent } from "./hermes.js";
+export {
+  buildHermesPrompt,
+  buildUsage,
+  createHermesAgent,
+  snapshotTurns,
+} from "./hermes.js";
@@ -106,7 +106,7 @@ function parseSessionJson(raw: unknown): HermesSessionJson | null {
      messages.push(msg);
    }
  }
-  return { session_id, model, session_start, messages };
+  return { session_id, model, session_start, messages, inputTokens: 0, outputTokens: 0 };
 }

 export function getHermesDbPath(): string {
@@ -117,6 +117,8 @@ type DbSessionRow = {
  id: string;
  model: string;
  started_at: number;
+  input_tokens: number;
+  output_tokens: number;
 };

 type DbMessageRow = {
@@ -156,7 +158,9 @@ export function loadHermesSessionFromDb(
  try {
    db = new DatabaseSync(resolvedPath, { readOnly: true });
    const session = db
-      .prepare("SELECT id, model, started_at FROM sessions WHERE id = ?")
+      .prepare(
+        "SELECT id, model, started_at, input_tokens, output_tokens FROM sessions WHERE id = ?",
+      )
      .get(sessionId) as DbSessionRow | null;
    if (session === null) {
      return null;
@@ -181,6 +185,8 @@ export function loadHermesSessionFromDb(
      model: session.model,
      session_start: new Date(session.started_at * 1000).toISOString(),
      messages,
+      inputTokens: session.input_tokens ?? 0,
+      outputTokens: session.output_tokens ?? 0,
    };
  } catch {
    return null;
@@ -40,4 +40,6 @@ export type HermesSessionJson = {
  model: string;
  session_start: string;
  messages: HermesSessionMessage[];
+  inputTokens: number;
+  outputTokens: number;
 };
@@ -1,6 +1,6 @@
 {
  "name": "@united-workforce/agent-mock",
-  "version": "0.5.0",
+  "version": "0.1.2",
  "files": [
    "src",
    "dist",
@@ -17,7 +17,6 @@
    }
  },
  "scripts": {
-    "prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1",
    "test": "vitest run __tests__/",
    "test:ci": "vitest run __tests__/"
  },
@@ -1,4 +1,11 @@
-#!/usr/bin/env node
+#!/usr/bin/env -S node --disable-warning=ExperimentalWarning
+
+// eslint-disable-next-line -- dynamic import for version
+const pkg = await import("../package.json", { with: { type: "json" } });
+if (process.argv.includes("--version") || process.argv.includes("-V")) {
+  process.stdout.write(`${pkg.default.version}\n`);
+  process.exit(0);
+}

 import { createMockAgent } from "./mock-agent.js";

@@ -103,6 +103,7 @@ export function createMockAgent(mockDataPath: string): () => Promise<void> {
      detailHash,
      sessionId,
      assembledPrompt: "",
+      usage: { turns: 1, inputTokens: 0, outputTokens: 0, duration: 0 },
    };
    lastResult = result;
    return result;
@@ -0,0 +1,9 @@
+# @united-workforce/cli
+
+## 0.1.1
+
+### Patch Changes
+
+- 850a3b2: fix: resolve --agent override via config alias before raw command
+
+  `resolveAgentConfig()` now checks `config.agents[alias]` first before falling back to `parseAgentOverride()`. Eval CLI default `--agent` changed from `"hermes"` to `"uwf-hermes"`.
@@ -1,6 +1,6 @@
 {
  "name": "@united-workforce/cli",
-  "version": "0.5.0",
+  "version": "0.3.0",
  "files": [
    "src",
    "dist",
@@ -22,7 +22,6 @@
    "yaml": "^2.8.4"
  },
  "scripts": {
-    "prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1",
    "test": "vitest run src/",
    "test:ci": "vitest run src/"
  },
@@ -58,7 +58,10 @@ describe("C1: adapter JSON round-trip integration", () => {
        },
      },
      graph: {
-        $START: { _: { role: "worker", prompt: "Do the work", location: null } },
+        $START: {
+          new: { role: "worker", prompt: "Do the work", location: null },
+          resume: { role: "worker", prompt: "Resume the work", location: null },
+        },
        worker: { done: { role: "$END", prompt: "completed", location: null } },
      },
    });
@@ -6,13 +6,7 @@ import type { CasRef, ThreadId } from "@united-workforce/protocol";
 import { describe, expect, test } from "vitest";
 import { createMarker, deleteMarker } from "../background/index.js";
 import { cmdThreadList, cmdThreadShow, cmdThreadStart } from "../commands/thread.js";
-import {
-  addHistoryEntry,
-  createUwfStore,
-  deleteThread,
-  loadAllThreads,
-  setThread,
-} from "../store.js";
+import { completeThread, createUwfStore, loadActiveThreads, setThread } from "../store.js";

 const OUTPUT_SCHEMA = {
  type: "object" as const,
@@ -34,9 +28,13 @@ roles:
      $status: "ready"
    frontmatter:
      type: object
+      oneOf:
+        - properties:
+            $status: { const: "ready" }
+          required: ["$status"]
+        - properties:
+            $status: { const: "not-ready" }
          required: ["$status"]
-      properties:
-        $status: { type: string, enum: ["ready", "not-ready"] }
  roleB:
    description: Second role
    goal: Do B
@@ -48,13 +46,17 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string }
+        $status: { const: "done" }
 graph:
  $START:
-    _:
+    new:
      role: roleA
      prompt: "Do A"
      location: null
+    resume:
+      role: roleA
+      prompt: "Resume A"
+      location: null
  roleA:
    ready:
      role: roleB
@@ -65,7 +67,7 @@ graph:
      prompt: "Try again"
      location: null
  roleB:
-    _:
+    done:
      role: $END
      prompt: "Done"
      location: null
@@ -84,9 +86,13 @@ roles:
      $status: "pass"
    frontmatter:
      type: object
+      oneOf:
+        - properties:
+            $status: { const: "pass" }
+          required: ["$status"]
+        - properties:
+            $status: { const: "fail" }
          required: ["$status"]
-      properties:
-        $status: { type: string, enum: ["pass", "fail"] }
  roleB:
    description: Pass role
    goal: Do B
@@ -98,7 +104,7 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string }
+        $status: { const: "done" }
  roleC:
    description: Fail role
    goal: Do C
@@ -110,13 +116,17 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string }
+        $status: { const: "done" }
 graph:
  $START:
-    _:
+    new:
      role: roleA
      prompt: "Do A"
      location: null
+    resume:
+      role: roleA
+      prompt: "Resume A"
+      location: null
  roleA:
    pass:
      role: roleB
@@ -127,12 +137,12 @@ graph:
      prompt: "Do C (fail)"
      location: null
  roleB:
-    _:
+    done:
      role: $END
      prompt: "Done"
      location: null
  roleC:
-    _:
+    done:
      role: $END
      prompt: "Done"
      location: null
@@ -153,15 +163,19 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string }
+        $status: { const: "done" }
 graph:
  $START:
-    _:
+    new:
      role: worker
      prompt: "Work"
      location: null
+    resume:
+      role: worker
+      prompt: "Resume work"
+      location: null
  worker:
-    _:
+    done:
      role: $END
      prompt: "Done"
      location: null
@@ -175,7 +189,7 @@ async function insertStepNode(
  outputPayload: Record<string, unknown>,
 ): Promise<void> {
  const uwf = await createUwfStore(storageRoot);
-  const index = loadAllThreads(uwf.varStore);
+  const index = loadActiveThreads(uwf.varStore);
  const headEntry = index[threadId];
  if (headEntry === undefined) throw new Error(`thread ${threadId} not in index`);
  const head = headEntry.head;
@@ -206,7 +220,13 @@ async function insertStepNode(
    assembledPrompt: null,
  })) as CasRef;

-  setThread(uwf.varStore, threadId, { head: stepHash, suspendedRole: null, suspendMessage: null });
+  setThread(uwf.varStore, threadId, {
+    head: stepHash,
+    status: "idle",
+    suspendedRole: null,
+    suspendMessage: null,
+    completedAt: null,
+  });
 }

 describe("currentRole field", () => {
@@ -282,19 +302,12 @@ describe("currentRole field", () => {
    try {
      const wf = join(tmpDir, "test-current-role.yaml");
      await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
-      const { thread, workflow } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
+      const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
      const tid = thread as ThreadId;

      const uwfForIndex = await createUwfStore(storageRoot);
-      const head = loadAllThreads(uwfForIndex.varStore)[tid]!.head;
-      deleteThread(uwfForIndex.varStore, tid);
-      addHistoryEntry(uwfForIndex.varStore, {
-        thread: tid,
-        workflow,
-        head,
-        completedAt: Date.now(),
-        reason: "completed",
-      });
+      loadActiveThreads(uwfForIndex.varStore)[tid]!.head;
+      completeThread(uwfForIndex.varStore, tid, "completed");

      const result = await cmdThreadShow(storageRoot, tid);
      expect(result.status).toBe("completed");
@@ -310,19 +323,12 @@ describe("currentRole field", () => {
    try {
      const wf = join(tmpDir, "test-current-role.yaml");
      await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
-      const { thread, workflow } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
+      const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
      const tid = thread as ThreadId;

      const uwfForIndex = await createUwfStore(storageRoot);
-      const head = loadAllThreads(uwfForIndex.varStore)[tid]!.head;
-      deleteThread(uwfForIndex.varStore, tid);
-      addHistoryEntry(uwfForIndex.varStore, {
-        thread: tid,
-        workflow,
-        head,
-        completedAt: Date.now(),
-        reason: "cancelled",
-      });
+      loadActiveThreads(uwfForIndex.varStore)[tid]!.head;
+      completeThread(uwfForIndex.varStore, tid, "cancelled");

      const result = await cmdThreadShow(storageRoot, tid);
      expect(result.status).toBe("cancelled");
@@ -375,15 +381,8 @@ describe("currentRole field", () => {
      const comp = await cmdThreadStart(storageRoot, wf, "completed", tmpDir);
      const compId = comp.thread as ThreadId;
      const uwfForIndex = await createUwfStore(storageRoot);
-      const compHead = loadAllThreads(uwfForIndex.varStore)[compId]!.head;
-      deleteThread(uwfForIndex.varStore, compId);
-      addHistoryEntry(uwfForIndex.varStore, {
-        thread: compId,
-        workflow: comp.workflow,
-        head: compHead,
-        completedAt: Date.now(),
-        reason: "completed",
-      });
+      const _compHead = loadActiveThreads(uwfForIndex.varStore)[compId]!.head;
+      completeThread(uwfForIndex.varStore, compId, "completed");

      const list = await cmdThreadList(storageRoot, null, null, null, 0, 100);

@@ -447,8 +446,8 @@ describe("currentRole field", () => {
      await writeFile(wf, SINGLE_ROLE_WORKFLOW_YAML, "utf8");

      const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
-      // worker → _ maps to $END
-      await insertStepNode(storageRoot, thread as ThreadId, "worker", {});
+      // worker → done maps to $END
+      await insertStepNode(storageRoot, thread as ThreadId, "worker", { $status: "done" });

      const result = await cmdThreadShow(storageRoot, thread as ThreadId);
      expect(result.currentRole).toBe(null);
@@ -10,7 +10,7 @@ import { afterEach, beforeAll, beforeEach, describe, expect, test } from "vitest
 import { stringify } from "yaml";
 import { cmdThreadStart } from "../commands/thread.js";
 import { cmdWorkflowAdd } from "../commands/workflow.js";
-import { createUwfStore, findHistoryEntry, getThread } from "../store.js";
+import { createUwfStore, getThread } from "../store.js";

 // ── paths ──────────────────────────────────────────────────────────────────

@@ -106,9 +106,13 @@ async function addWorkflow(workflowFixture: string, workflowName: string): Promi

 type ExecResult = { stdout: string; stderr: string; exitCode: number };

-function runExec(threadId: string): ExecResult {
+function runExec(threadId: string, count: number | null = null): ExecResult {
+  const args = [CLI_PATH, "thread", "exec", threadId];
+  if (count !== null) {
+    args.push("--count", String(count));
+  }
  try {
-    const stdout = execFileSync(process.execPath, [CLI_PATH, "thread", "exec", threadId], {
+    const stdout = execFileSync(process.execPath, args, {
      encoding: "utf8",
      stdio: ["ignore", "pipe", "pipe"],
      env: { ...process.env, UWF_HOME: uwfHome, OCAS_HOME: casDir },
@@ -126,11 +130,38 @@ function runExec(threadId: string): ExecResult {
  }
 }

+/** Invoke `uwf thread resume <threadId> -p <prompt>` through the built CLI. */
+function runResume(threadId: string, prompt: string): ExecResult {
+  try {
+    const stdout = execFileSync(
+      process.execPath,
+      [CLI_PATH, "thread", "resume", threadId, "-p", prompt],
+      {
+        encoding: "utf8",
+        stdio: ["ignore", "pipe", "pipe"],
+        env: { ...process.env, UWF_HOME: uwfHome, OCAS_HOME: casDir },
+        cwd: tmpDir,
+        timeout: 30000,
+      },
+    );
+    return { stdout, stderr: "", exitCode: 0 };
+  } catch (e: unknown) {
+    const err = e as NodeJS.ErrnoException & {
+      stdout?: string;
+      stderr?: string;
+      status?: number;
+    };
+    return { stdout: err.stdout ?? "", stderr: err.stderr ?? "", exitCode: err.status ?? 1 };
+  }
+}
+
 type StepOutputJson = {
  thread: string;
  head: string;
  status: string;
  currentRole: string | null;
+  suspendedRole: string | null;
+  suspendMessage: string | null;
  done: boolean;
 };

@@ -198,19 +229,25 @@ describe("E2E mock-agent: full uwf pipeline", () => {
    expect(getStatus(store, s1.output)).toBe("ready");
    expect(getStatus(store, s2.output)).toBe("done");

+    // Mock agent reports usage stats in step nodes.
+    expect(s1.usage).toEqual({ turns: 1, inputTokens: 0, outputTokens: 0, duration: 0 });
+    expect(s2.usage).toEqual({ turns: 1, inputTokens: 0, outputTokens: 0, duration: 0 });
+
    // The start node points at the registered workflow.
    const startNode = store.cas.get(startHash as CasRef);
    expect((startNode!.payload as StartNodePayload).workflow).toBe(workflowHash);

-    // Thread is completed: removed from active index, present in history.
+    // Thread is completed: status changed to "completed", head updated.
    const uwf = await createUwfStore(uwfHome);
-    expect(getThread(uwf.varStore, threadId)).toBeNull();
-    const hist = findHistoryEntry(uwf.varStore, threadId);
-    expect(hist).not.toBeNull();
-    expect(hist!.head).toBe(step2.head);
+    const finalEntry = getThread(uwf.varStore, threadId);
+    expect(finalEntry).not.toBeNull();
+    expect(finalEntry!.status).toBe("completed");
+    expect(finalEntry!.head).toBe(step2.head);
  });

-  test("2. branching workflow loops developer→reviewer→developer→reviewer→$END", async () => {
+  test("2. branching workflow loops developer→reviewer→developer→reviewer→$END", {
+    timeout: 30_000,
+  }, async () => {
    await writeMockConfig("e2e-loop.mock.yaml");
    const workflowHash = await addWorkflow("e2e-loop.workflow.yaml", "test-loop");

@@ -263,11 +300,14 @@ describe("E2E mock-agent: full uwf pipeline", () => {
    expect(getStatus(store, n4.output)).toBe("approved");

    const uwf = await createUwfStore(uwfHome);
-    expect(getThread(uwf.varStore, threadId)).toBeNull();
-    expect(findHistoryEntry(uwf.varStore, threadId)).not.toBeNull();
+    const finalEntry = getThread(uwf.varStore, threadId);
+    expect(finalEntry).not.toBeNull();
+    expect(finalEntry!.status).toBe("completed");
  });

-  test("3. role mismatch in mock data makes the agent exit with an error", async () => {
+  test("3. role mismatch in mock data makes the agent exit with an error", {
+    timeout: 30_000,
+  }, async () => {
    // Reuses the linear workflow but with a mock whose step[1].role is wrong.
    await writeMockConfig("e2e-mismatch.mock.yaml");
    const workflowHash = await addWorkflow("e2e-linear.workflow.yaml", "test-linear");
@@ -287,7 +327,172 @@ describe("E2E mock-agent: full uwf pipeline", () => {

    // The thread remains active (no step node was written for the failed step).
    const uwf = await createUwfStore(uwfHome);
-    expect(getThread(uwf.varStore, threadId)).not.toBeNull();
-    expect(getThread(uwf.varStore, threadId)!.head).toBe(step1.head);
+    const entry = getThread(uwf.varStore, threadId);
+    expect(entry).not.toBeNull();
+    expect(entry!.status).not.toBe("completed");
+    expect(entry!.head).toBe(step1.head);
+  });
+
+  test("4. planner $SUSPEND then resume re-runs planner and reaches $END", {
+    timeout: 30_000,
+  }, async () => {
+    await writeMockConfig("e2e-suspend.mock.yaml");
+    const workflowHash = await addWorkflow("e2e-suspend.workflow.yaml", "test-suspend");
+
+    const start = await cmdThreadStart(uwfHome, workflowHash, "Analyze the task", uwfHome, tmpDir);
+    const threadId = start.thread;
+
+    // Step 1 → planner emits insufficient_info → thread suspends.
+    const step1 = execStep(threadId);
+    expect(step1.status).toBe("suspended");
+    expect(step1.done).toBe(false);
+    expect(step1.currentRole).toBeNull();
+    expect(step1.suspendedRole).toBe("planner");
+    expect(step1.suspendMessage).toBe("Need more info: missing requirements");
+
+    // Thread index entry reflects the suspension with rendered metadata.
+    const suspendedEntry = getThread((await createUwfStore(uwfHome)).varStore, threadId);
+    expect(suspendedEntry).not.toBeNull();
+    expect(suspendedEntry!.status).toBe("suspended");
+    expect(suspendedEntry!.suspendedRole).toBe("planner");
+    expect(suspendedEntry!.suspendMessage).toBe("Need more info: missing requirements");
+
+    // Resume re-runs the planner role; the second scripted step is `ready` → $END.
+    const resume = runResume(threadId, "Here are the requirements");
+    expect(resume.exitCode).toBe(0);
+    const resumeOut = JSON.parse(resume.stdout.trim()) as StepOutputJson;
+    expect(resumeOut.status).toBe("completed");
+    expect(resumeOut.done).toBe(true);
+    expect(resumeOut.currentRole).toBeNull();
+    expect(resumeOut.suspendedRole).toBeNull();
+
+    // CAS chain: suspended planner step → resumed planner step.
+    const store = await openStore(casDir);
+    const s1 = getStepNode(store, step1.head);
+    const s2 = getStepNode(store, resumeOut.head);
+    expect(s1.role).toBe("planner");
+    expect(s2.role).toBe("planner");
+    expect(s2.prev).toBe(step1.head);
+    expect(getStatus(store, s1.output)).toBe("insufficient_info");
+    expect(getStatus(store, s2.output)).toBe("ready");
+
+    const finalEntry = getThread((await createUwfStore(uwfHome)).varStore, threadId);
+    expect(finalEntry).not.toBeNull();
+    expect(finalEntry!.status).toBe("completed");
+    expect(finalEntry!.head).toBe(resumeOut.head);
+  });
+
+  test("5. --count 3 runs the whole linear pipeline in one invocation", {
+    timeout: 30_000,
+  }, async () => {
+    await writeMockConfig("e2e-count.mock.yaml");
+    const workflowHash = await addWorkflow("e2e-count.workflow.yaml", "test-count");
+
+    const start = await cmdThreadStart(uwfHome, workflowHash, "Ship the feature", uwfHome, tmpDir);
+    const threadId = start.thread;
+
+    // Single invocation with --count 3 → moderator drives analyst → developer → reviewer → $END.
+    const { stdout, stderr, exitCode } = runExec(threadId, 3);
+    expect(exitCode, `stderr: ${stderr}`).toBe(0);
+
+    // Multi-step exec emits a JSON array (one entry per executed step).
+    const results = JSON.parse(stdout.trim()) as StepOutputJson[];
+    expect(Array.isArray(results)).toBe(true);
+    expect(results).toHaveLength(3);
+
+    expect(results[0].status).toBe("idle");
+    expect(results[0].currentRole).toBe("developer");
+    expect(results[1].status).toBe("idle");
+    expect(results[1].currentRole).toBe("reviewer");
+    expect(results[2].status).toBe("completed");
+    expect(results[2].done).toBe(true);
+
+    // Verify the CAS chain holds 3 step nodes in the correct order.
+    const store = await openStore(casDir);
+    const n1 = getStepNode(store, results[0].head);
+    const n2 = getStepNode(store, results[1].head);
+    const n3 = getStepNode(store, results[2].head);
+    expect([n1.role, n2.role, n3.role]).toEqual(["analyst", "developer", "reviewer"]);
+    expect(n1.prev).toBeNull();
+    expect(n2.prev).toBe(results[0].head);
+    expect(n3.prev).toBe(results[1].head);
+    expect(new Set([n1.start, n2.start, n3.start]).size).toBe(1);
+
+    const finalEntry = getThread((await createUwfStore(uwfHome)).varStore, threadId);
+    expect(finalEntry).not.toBeNull();
+    expect(finalEntry!.status).toBe("completed");
+    expect(finalEntry!.head).toBe(results[2].head);
+  });
+
+  test("6. mustache edge prompt renders planner variables into the worker step", {
+    timeout: 30_000,
+  }, async () => {
+    await writeMockConfig("e2e-mustache.mock.yaml");
+    const workflowHash = await addWorkflow("e2e-mustache.workflow.yaml", "test-mustache");
+
+    const start = await cmdThreadStart(uwfHome, workflowHash, "Plan the task", uwfHome, tmpDir);
+    const threadId = start.thread;
+
+    // Step 1 → planner emits branch + repoPath.
+    const step1 = execStep(threadId);
+    expect(step1.status).toBe("idle");
+    expect(step1.currentRole).toBe("worker");
+
+    // Step 2 → worker; the moderator renders the templated edge prompt before spawning it.
+    const step2 = execStep(threadId);
+    expect(step2.done).toBe(true);
+    expect(step2.status).toBe("completed");
+
+    const store = await openStore(casDir);
+    const plannerStep = getStepNode(store, step1.head);
+    expect(getStatus(store, plannerStep.output)).toBe("ready");
+
+    // The worker step's edgePrompt is the mustache-rendered template.
+    const workerStep = getStepNode(store, step2.head);
+    expect(workerStep.role).toBe("worker");
+    expect(workerStep.edgePrompt).toContain("fix/42-auth");
+    expect(workerStep.edgePrompt).toContain("/tmp/my-repo");
+    expect(workerStep.edgePrompt).toBe("Work on branch fix/42-auth in /tmp/my-repo");
+  });
+
+  test("7. completed thread can be resumed (衔尾蛇: end → start)", {
+    timeout: 30_000,
+  }, async () => {
+    // Reuse the suspend workflow (planner with ready → $END), but mock data
+    // goes straight to ready on first run, then ready again after resume.
+    await writeMockConfig("e2e-completed-resume.mock.yaml");
+    const workflowHash = await addWorkflow("e2e-suspend.workflow.yaml", "test-suspend");
+
+    const start = await cmdThreadStart(uwfHome, workflowHash, "Do the work", uwfHome, tmpDir);
+    const threadId = start.thread;
+
+    // Step 1: planner outputs ready → $END → thread completed.
+    const step1 = execStep(threadId);
+    expect(step1.done).toBe(true);
+    expect(step1.status).toBe("completed");
+
+    const uwf1 = await createUwfStore(uwfHome);
+    const entry1 = getThread(uwf1.varStore, threadId);
+    expect(entry1).not.toBeNull();
+    expect(entry1!.status).toBe("completed");
+
+    // Resume the completed thread — should re-evaluate $START → planner.
+    const resumeResult = runResume(threadId, "Additional context for round 2");
+    expect(resumeResult.exitCode).toBe(0);
+
+    // After resume step, planner ran again (step index 1 in mock) → ready → $END.
+    const uwf2 = await createUwfStore(uwfHome);
+    const entry2 = getThread(uwf2.varStore, threadId);
+    expect(entry2).not.toBeNull();
+    expect(entry2!.status).toBe("completed");
+    // Head should have advanced (not the same as step1).
+    expect(entry2!.head).not.toBe(step1.head);
+
+    // CAS chain: step2.prev === step1 head (chain is preserved across resume).
+    const store = await openStore(casDir);
+    const resumeOutput = JSON.parse(resumeResult.stdout.trim());
+    const step2Node = getStepNode(store, resumeOutput.head);
+    expect(step2Node.role).toBe("planner");
+    expect(step2Node.prev).toBe(step1.head);
  });
 });
@@ -0,0 +1,15 @@
+steps:
+  # Step 0: planner → ready → $END (thread completes)
+  - role: planner
+    output: |
+      ---
+      $status: ready
+      ---
+      Initial plan complete.
+  # Step 1: after resume, planner runs again from $START → ready → $END again
+  - role: planner
+    output: |
+      ---
+      $status: ready
+      ---
+      Revised plan after resume.
@@ -0,0 +1,19 @@
+steps:
+  - role: analyst
+    output: |
+      ---
+      $status: analyzed
+      ---
+      Analysis complete.
+  - role: developer
+    output: |
+      ---
+      $status: implemented
+      ---
+      Implementation complete.
+  - role: reviewer
+    output: |
+      ---
+      $status: approved
+      ---
+      Approved.
@@ -0,0 +1,46 @@
+name: test-count
+description: 3-step linear pipeline (analyst -> developer -> reviewer -> $END)
+roles:
+  analyst:
+    description: Analyzes the task
+    goal: Analyze the task
+    capabilities: []
+    procedure: Analyze it
+    output: Output the analysis and set $status to analyzed
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: analyzed }
+          required: [$status]
+  developer:
+    description: Implements the change
+    goal: Implement the change
+    capabilities: []
+    procedure: Write code
+    output: Output the implementation and set $status to implemented
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: implemented }
+          required: [$status]
+  reviewer:
+    description: Reviews the change
+    goal: Review the change
+    capabilities: []
+    procedure: Review code
+    output: Approve and set $status to approved
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: approved }
+          required: [$status]
+graph:
+  $START:
+    new: { role: analyst, prompt: 'Analyze the task' }
+    resume: { role: analyst, prompt: 'Review the previous run output and continue the work.' }
+  analyst:
+    analyzed: { role: developer, prompt: 'Implement the change' }
+  developer:
+    implemented: { role: reviewer, prompt: 'Review the change' }
+  reviewer:
+    approved: { role: '$END', prompt: 'Done' }
@@ -25,7 +25,8 @@ roles:
          required: [$status]
 graph:
  $START:
-    _: { role: planner, prompt: 'Plan the task' }
+    new: { role: planner, prompt: 'Plan the task' }
+    resume: { role: planner, prompt: 'Review the previous run output and continue the work.' }
  planner:
    ready: { role: worker, prompt: 'Do the work' }
  worker:
@@ -28,7 +28,8 @@ roles:
          required: [$status]
 graph:
  $START:
-    _: { role: developer, prompt: 'Implement the change' }
+    new: { role: developer, prompt: 'Implement the change' }
+    resume: { role: developer, prompt: 'Review the previous run output and continue the work.' }
  developer:
    review_needed: { role: reviewer, prompt: 'Review the change' }
  reviewer:
@@ -0,0 +1,15 @@
+steps:
+  - role: planner
+    output: |
+      ---
+      $status: ready
+      branch: fix/42-auth
+      repoPath: /tmp/my-repo
+      ---
+      Planned the work.
+  - role: worker
+    output: |
+      ---
+      $status: done
+      ---
+      Work complete.
@@ -0,0 +1,35 @@
+name: test-mustache
+description: Planner emits template variables consumed by the worker edge prompt
+roles:
+  planner:
+    description: Plans work and emits branch + repo path
+    goal: Plan the task
+    capabilities: []
+    procedure: Decide the branch and repo path
+    output: Set $status to ready and emit branch and repoPath
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: ready }
+            branch: { type: string }
+            repoPath: { type: string }
+          required: [$status, branch, repoPath]
+  worker:
+    description: Works on the planned branch
+    goal: Do the work
+    capabilities: []
+    procedure: Do it
+    output: Output the result and set $status to done
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: done }
+          required: [$status]
+graph:
+  $START:
+    new: { role: planner, prompt: 'Plan the task' }
+    resume: { role: planner, prompt: 'Review the previous run output and continue the work.' }
+  planner:
+    ready: { role: worker, prompt: 'Work on branch {{{branch}}} in {{{repoPath}}}' }
+  worker:
+    done: { role: '$END', prompt: 'Complete' }
@@ -0,0 +1,14 @@
+steps:
+  - role: planner
+    output: |
+      ---
+      $status: insufficient_info
+      reason: missing requirements
+      ---
+      I need more information before I can plan this.
+  - role: planner
+    output: |
+      ---
+      $status: ready
+      ---
+      I now have what I need. Ready to proceed.
@@ -0,0 +1,25 @@
+name: test-suspend
+description: Planner can suspend for more info or finish when ready
+roles:
+  planner:
+    description: Plans work and may request more info
+    goal: Analyze the task
+    capabilities: []
+    procedure: Analyze the task and decide if more info is needed
+    output: Set $status to insufficient_info (with reason) or ready
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: insufficient_info }
+            reason: { type: string }
+          required: [$status, reason]
+        - properties:
+            $status: { const: ready }
+          required: [$status]
+graph:
+  $START:
+    new: { role: planner, prompt: 'Analyze the task' }
+    resume: { role: planner, prompt: 'Review the previous run output and continue the work.' }
+  planner:
+    insufficient_info: { role: '$SUSPEND', prompt: 'Need more info: {{{reason}}}' }
+    ready: { role: '$END', prompt: 'Done' }
@@ -5,13 +5,18 @@ import { evaluate } from "../moderator/evaluate.js";

 const solveIssueGraph: WorkflowPayload["graph"] = {
  $START: {
-    _: { role: "planner", prompt: "Start planning from the issue in the task.", location: null },
+    new: { role: "planner", prompt: "Start planning from the issue in the task.", location: null },
+    resume: {
+      role: "planner",
+      prompt: "Review the previous run output and continue the work.",
+      location: null,
+    },
  },
  planner: {
-    _: { role: "developer", prompt: "Implement the plan: {{plan}}", location: null },
+    planned: { role: "developer", prompt: "Implement the plan: {{plan}}", location: null },
  },
  developer: {
-    _: { role: "reviewer", prompt: "Review the changes: {{summary}}", location: null },
+    implemented: { role: "reviewer", prompt: "Review the changes: {{summary}}", location: null },
  },
  reviewer: {
    approved: { role: "$END", prompt: "Done.", location: null },
@@ -20,8 +25,8 @@ const solveIssueGraph: WorkflowPayload["graph"] = {
 };

 describe("evaluate", () => {
-  test("$START → first role (unit status _)", () => {
-    const result = evaluate(solveIssueGraph, "$START", { $status: "_" });
+  test("$START → first role (status new)", () => {
+    const result = evaluate(solveIssueGraph, "$START", { $status: "new" });
    expect(result).toEqual({
      ok: true,
      value: {
@@ -32,6 +37,18 @@ describe("evaluate", () => {
    });
  });

+  test("$START → first role (status resume)", () => {
+    const result = evaluate(solveIssueGraph, "$START", { $status: "resume" });
+    expect(result).toEqual({
+      ok: true,
+      value: {
+        role: "planner",
+        prompt: "Review the previous run output and continue the work.",
+        location: null,
+      },
+    });
+  });
+
  test("status-based routing (reviewer rejected → developer)", () => {
    const result = evaluate(solveIssueGraph, "reviewer", {
      $status: "rejected",
@@ -95,7 +112,7 @@ describe("evaluate", () => {
  });

  test("missing role in graph → error", () => {
-    const result = evaluate(solveIssueGraph, "unknown-role", { $status: "_" });
+    const result = evaluate(solveIssueGraph, "unknown-role", { $status: "new" });
    expect(result.ok).toBe(false);
    if (!result.ok) {
      expect(result.error.message).toBe('no transitions defined for role "unknown-role"');
@@ -112,7 +129,7 @@ describe("evaluate", () => {

  test("mustache template rendering with simple fields", () => {
    const result = evaluate(solveIssueGraph, "planner", {
-      $status: "_",
+      $status: "planned",
      plan: "Add auth middleware",
    });
    expect(result).toEqual({
@@ -139,11 +156,11 @@ describe("evaluate", () => {
  test("triple mustache also works for unescaped output", () => {
    const graph: Record<string, Record<string, Target>> = {
      reviewer: {
-        _: { role: "developer", prompt: "Fix: {{{comments}}}", location: null },
+        rejected: { role: "developer", prompt: "Fix: {{{comments}}}", location: null },
      },
    };
    const result = evaluate(graph, "reviewer", {
-      $status: "_",
+      $status: "rejected",
      comments: "<script>alert(1)</script>",
    });
    expect(result).toEqual({
@@ -152,24 +169,22 @@ describe("evaluate", () => {
    });
  });

-  test("missing $status defaults to _ (unit routing)", () => {
+  test("missing $status → error (no unit fallback)", () => {
    const result = evaluate(solveIssueGraph, "planner", {
      plan: "Add auth middleware",
    });
-    expect(result).toEqual({
-      ok: true,
-      value: {
-        role: "developer",
-        prompt: "Implement the plan: Add auth middleware",
-        location: null,
-      },
-    });
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.message).toBe(
+        'agent output for role "planner" is missing required "$status" string',
+      );
+    }
  });

  test("mustache template with nested object paths", () => {
    const graph: Record<string, Record<string, Target>> = {
      reviewer: {
-        _: {
+        rejected: {
          role: "developer",
          prompt: "Address: {{review.comments}}",
          location: null,
@@ -177,7 +192,7 @@ describe("evaluate", () => {
      },
    };
    const result = evaluate(graph, "reviewer", {
-      $status: "_",
+      $status: "rejected",
      review: { comments: "refactor the handler" },
    });
    expect(result).toEqual({
@@ -6,101 +6,107 @@ import { describe, expect, test } from "vitest";
 const __dirname = dirname(fileURLToPath(import.meta.url));

 import {
-  cmdPromptAdapter,
-  cmdPromptAuthor,
-  cmdPromptDeveloper,
+  cmdPromptAdapterDeveloping,
+  cmdPromptBootstrap,
  cmdPromptList,
-  cmdPromptSetup,
  cmdPromptUsage,
-  cmdPromptUser,
+  cmdPromptWorkflowAuthoring,
 } from "../commands/prompt.js";

 describe("prompt commands", () => {
-  test("prompt list returns all prompt names", () => {
+  test("prompt list returns prompt names (no bootstrap)", () => {
    const result = cmdPromptList();
    expect(result).toBeInstanceOf(Array);
-    expect(result).toContain("user");
-    expect(result).toContain("author");
-    expect(result).toContain("developer");
-    expect(result).toContain("adapter");
+    expect(result).toContain("usage");
+    expect(result).toContain("workflow-authoring");
+    expect(result).toContain("adapter-developing");
+    expect(result).not.toContain("bootstrap");
    for (const name of result) {
      expect(name).toMatch(/^\S+$/);
    }
  });

-  test("prompt user returns non-empty markdown string", () => {
-    const result = cmdPromptUser();
+  test("prompt usage returns only the usage reference with frontmatter", () => {
+    const result = cmdPromptUsage();
    expect(typeof result).toBe("string");
    expect(result).toContain("uwf");
    expect(result).toContain("thread");
    expect(result).toContain("workflow");
    expect(result).toContain("Quick Start");
+    expect(result).toContain("---");
+    expect(result).toContain("name:");
+    expect(result).toContain("version:");
+    // Should NOT contain other references
+    expect(result).not.toContain("Workflow Authoring Reference");
+    expect(result).not.toContain("Adapter Developing Reference");
    expect(result.length).toBeGreaterThan(500);
  });

-  test("prompt author returns non-empty markdown string", () => {
-    const result = cmdPromptAuthor();
+  test("prompt workflow-authoring returns non-empty markdown string with frontmatter", () => {
+    const result = cmdPromptWorkflowAuthoring();
    expect(typeof result).toBe("string");
    expect(result).toContain("frontmatter");
    expect(result).toContain("graph");
    expect(result).toContain("$START");
    expect(result).toContain("$END");
    expect(result).toContain("$status");
+    expect(result).toContain("---");
+    expect(result).toContain("name:");
+    expect(result).toContain("version:");
    expect(result.length).toBeGreaterThan(500);
  });

-  test("prompt developer returns non-empty markdown string", () => {
-    const result = cmdPromptDeveloper();
-    expect(typeof result).toBe("string");
-    expect(result).toContain("Monorepo");
-    expect(result).toContain("CAS");
-    expect(result).toContain("Biome");
-    expect(result.length).toBeGreaterThan(500);
-  });
-
-  test("prompt adapter returns non-empty markdown string", () => {
-    const result = cmdPromptAdapter();
+  test("prompt adapter-developing returns non-empty markdown string with frontmatter", () => {
+    const result = cmdPromptAdapterDeveloping();
    expect(typeof result).toBe("string");
    expect(result).toContain("createAgent");
    expect(result).toContain("AgentContext");
    expect(result).toContain("frontmatter");
+    expect(result).toContain("---");
+    expect(result).toContain("name:");
+    expect(result).toContain("version:");
    expect(result.length).toBeGreaterThan(500);
  });

-  test("prompt usage combines all references", () => {
-    const result = cmdPromptUsage();
+  test("prompt bootstrap returns framework-agnostic setup instructions", () => {
+    const result = cmdPromptBootstrap();
    expect(typeof result).toBe("string");
-    expect(result).toContain("User Reference");
-    expect(result).toContain("Author Reference");
-    expect(result).toContain("Developer Reference");
-    expect(result).toContain("Adapter Reference");
-    expect(result).toContain("---");
-    expect(result.length).toBeGreaterThan(2000);
-  });
-
-  test("prompt setup returns setup instructions", () => {
-    const result = cmdPromptSetup();
-    expect(typeof result).toBe("string");
-    expect(result).toContain("uwf Skill Setup");
+    // Skills installation
    expect(result).toContain("uwf prompt usage");
-    expect(result).toContain("uwf prompt setup");
-    expect(result).toContain("SKILL.md");
-    expect(result).toContain("version");
+    expect(result).toContain("uwf prompt workflow-authoring");
+    expect(result).toContain("uwf prompt adapter-developing");
+    expect(result).toContain("uwf-usage");
+    expect(result).toContain("uwf-workflow-authoring");
+    expect(result).toContain("uwf-adapter-developing");
+    // Fresh install scenario
+    expect(result).toContain("Fresh Install");
+    expect(result).toContain("uwf setup");
+    expect(result).toContain("--provider");
+    expect(result).toContain("--api-key");
+    expect(result).toContain("agent adapter");
+    // Upgrade scenario
+    expect(result).toContain("Upgrade");
+    expect(result).toContain("Migrate");
+    // Should NOT contain Hermes-specific paths
+    expect(result).not.toContain("~/.hermes/skills/");
+    expect(result).not.toContain("> ~/.hermes/");
+    expect(result.length).toBeGreaterThan(100);
  });

-  test("prompt help subcommand is suppressed", () => {
-    const output = execFileSync("npx", ["tsx", "src/cli.ts", "prompt", "--help"], {
-      cwd: join(__dirname, "..", ".."),
+  test("prompt help subcommand is suppressed", { timeout: 30_000 }, () => {
+    const cliPath = join(__dirname, "..", "..", "dist", "cli.js");
+    const output = execFileSync("node", [cliPath, "prompt", "--help"], {
      encoding: "utf-8",
-      env: { ...process.env, PATH: `/opt/homebrew/bin:${process.env.PATH}` },
+      env: { ...process.env },
    });
    expect(output).not.toMatch(/help\s+\[command\]/i);
    expect(output).toContain("usage");
-    expect(output).toContain("setup");
-    expect(output).toContain("user");
-    expect(output).toContain("author");
-    expect(output).toContain("developer");
-    expect(output).toContain("adapter");
+    expect(output).toContain("bootstrap");
+    expect(output).toContain("workflow-authoring");
+    expect(output).toContain("adapter-developing");
    expect(output).toContain("list");
+    // Removed subcommands should not appear as command names
+    expect(output).not.toMatch(/^\s+setup\s/m);
+    expect(output).not.toContain("usage-reference");
  });
 });
@@ -4,7 +4,7 @@ import { join } from "node:path";
 import { type CasRef, createThreadIndexEntry, type ThreadId } from "@united-workforce/protocol";
 import { afterEach, beforeEach, describe, expect, test } from "vitest";
 import { resolveHeadHash } from "../commands/shared.js";
-import { addHistoryEntry, createUwfStore, setThread } from "../store.js";
+import { completeThread, createUwfStore, setThread } from "../store.js";

 let tmpDir: string;

@@ -31,19 +31,13 @@ describe("resolveHeadHash", () => {
    expect(result).toBe(headHash);
  });

-  test("falls back to history variable when thread not in active index", async () => {
+  test("finds completed thread", async () => {
    const threadId = "01JTEST0000000000000000002" as ThreadId;
-    const workflowHash = "workflow_hash_789" as CasRef;

    const uwf = await createUwfStore(tmpDir);
    const headHash = (await uwf.store.cas.put(uwf.schemas.text, "completed-head")) as CasRef;
-    addHistoryEntry(uwf.varStore, {
-      thread: threadId,
-      workflow: workflowHash,
-      head: headHash,
-      completedAt: Date.now(),
-      reason: null,
-    });
+    setThread(uwf.varStore, threadId, createThreadIndexEntry(headHash));
+    completeThread(uwf.varStore, threadId, "completed");

    const result = await resolveHeadHash(tmpDir, threadId);

@@ -54,58 +48,36 @@ describe("resolveHeadHash", () => {
  // calls fail() which does process.exit(1), terminating the test runner.
  // The error behavior is tested in integration tests below via CLI invocation.

-  test("prioritizes active thread over history when thread exists in both", async () => {
+  test("prioritizes active thread", async () => {
    const threadId = "01JTEST0000000000000000004" as ThreadId;
-    const workflowHash = "workflow_hash_xyz" as CasRef;

    const uwf = await createUwfStore(tmpDir);
    const activeHead = (await uwf.store.cas.put(uwf.schemas.text, "active-v2")) as CasRef;
-    const historicalHash = (await uwf.store.cas.put(uwf.schemas.text, "historical-v1")) as CasRef;
    setThread(uwf.varStore, threadId, createThreadIndexEntry(activeHead));
-    addHistoryEntry(uwf.varStore, {
-      thread: threadId,
-      workflow: workflowHash,
-      head: historicalHash,
-      completedAt: Date.now(),
-      reason: null,
-    });

    const result = await resolveHeadHash(tmpDir, threadId);

-    // Should return the active head, not the historical one
+    // Should return the active head
    expect(result).toBe(activeHead);
  });

-  test("finds thread from multiple history entries", async () => {
+  test("finds thread from multiple completed threads", async () => {
    const threadId1 = "01JTEST0000000000000000005" as ThreadId;
    const threadId2 = "01JTEST0000000000000000006" as ThreadId;
    const threadId3 = "01JTEST0000000000000000007" as ThreadId;
-    const workflowHash = "workflow_hash_abc" as CasRef;
    const uwf = await createUwfStore(tmpDir);
    const hash1 = (await uwf.store.cas.put(uwf.schemas.text, "hash-thread1")) as CasRef;
    const hash2 = (await uwf.store.cas.put(uwf.schemas.text, "hash-thread2")) as CasRef;
    const hash3 = (await uwf.store.cas.put(uwf.schemas.text, "hash-thread3")) as CasRef;
-    addHistoryEntry(uwf.varStore, {
-      thread: threadId1,
-      workflow: workflowHash,
-      head: hash1,
-      completedAt: Date.now() - 2000,
-      reason: null,
-    });
-    addHistoryEntry(uwf.varStore, {
-      thread: threadId2,
-      workflow: workflowHash,
-      head: hash2,
-      completedAt: Date.now() - 1000,
-      reason: null,
-    });
-    addHistoryEntry(uwf.varStore, {
-      thread: threadId3,
-      workflow: workflowHash,
-      head: hash3,
-      completedAt: Date.now(),
-      reason: null,
-    });
+
+    setThread(uwf.varStore, threadId1, createThreadIndexEntry(hash1));
+    completeThread(uwf.varStore, threadId1, "completed");
+
+    setThread(uwf.varStore, threadId2, createThreadIndexEntry(hash2));
+    completeThread(uwf.varStore, threadId2, "completed");
+
+    setThread(uwf.varStore, threadId3, createThreadIndexEntry(hash3));
+    completeThread(uwf.varStore, threadId3, "completed");

    const result = await resolveHeadHash(tmpDir, threadId2);

@@ -118,6 +118,7 @@ async function createTestStep(
    completedAtMs: Date.now() + 1000,
    assembledPrompt: null,
    cwd: "/tmp",
+    usage: null,
  };
  return store.cas.put(schemas.stepNode, stepPayload);
 }
@@ -96,6 +96,7 @@ describe("protocol types", () => {
      completedAtMs: 2000,
      assembledPrompt: null,
      cwd: "/test/path",
+      usage: null,
    };
    expect(record.startedAtMs).toBe(1000);
    expect(record.completedAtMs).toBe(2000);
@@ -110,6 +111,7 @@ describe("protocol types", () => {
      agent: "uwf-test",
      timestamp: 123,
      durationMs: 5000,
+      usage: null,
    };
    expect(entry.durationMs).toBe(5000);
  });
@@ -251,8 +253,11 @@ describe("thread read timing", () => {
        },
      },
      graph: {
-        $START: { _: { role: "worker", prompt: "go", location: null } },
-        worker: { _: { role: "$END", prompt: "", location: null } },
+        $START: {
+          new: { role: "worker", prompt: "go", location: null },
+          resume: { role: "worker", prompt: "resume", location: null },
+        },
+        worker: { done: { role: "$END", prompt: "", location: null } },
      },
    });

@@ -317,8 +322,11 @@ describe("thread read timing", () => {
        },
      },
      graph: {
-        $START: { _: { role: "worker", prompt: "go", location: null } },
-        worker: { _: { role: "$END", prompt: "", location: null } },
+        $START: {
+          new: { role: "worker", prompt: "go", location: null },
+          resume: { role: "worker", prompt: "resume", location: null },
+        },
+        worker: { done: { role: "$END", prompt: "", location: null } },
      },
    });

@@ -226,19 +226,15 @@ describe("Global CAS directory", () => {
    const uwf = await createUwfStore(storageRoot);
    const threadId = "thread-123" as ThreadId;
    const headHash = await uwf.store.cas.put(uwf.schemas.text, "history-head");
-    const { addHistoryEntry, findHistoryEntry } = await import("../store.js");
-    addHistoryEntry(uwf.varStore, {
-      thread: threadId,
-      workflow: "workflow-456",
-      head: headHash,
-      completedAt: Date.now(),
-      reason: "completed",
-    });
+    const { completeThread, setThread, getThread } = await import("../store.js");
+    const { createThreadIndexEntry } = await import("@united-workforce/protocol");

-    const entry = findHistoryEntry(uwf.varStore, threadId);
-    expect(entry?.thread).toBe(threadId);
-    expect(entry?.workflow).toBe("workflow-456");
+    setThread(uwf.varStore, threadId, createThreadIndexEntry(headHash));
+    completeThread(uwf.varStore, threadId, "completed");
+
+    const entry = getThread(uwf.varStore, threadId);
    expect(entry?.head).toBe(headHash);
+    expect(entry?.status).toBe("completed");

    const { access } = await import("node:fs/promises");
    await access(join(globalCasDir, "vars"));
@@ -274,15 +270,12 @@ describe("Global CAS directory", () => {
    );

    const uwf = await createUwfStore(storageRoot);
-    const { findHistoryEntry } = await import("../store.js");
-    const entry = findHistoryEntry(uwf.varStore, threadId);
-    expect(entry).toEqual({
-      thread: threadId,
-      workflow: workflowHash,
-      head: headHash,
-      completedAt,
-      reason: "cancelled",
-    });
+    const { getThread } = await import("../store.js");
+    const entry = getThread(uwf.varStore, threadId);
+    expect(entry).not.toBeNull();
+    expect(entry?.head).toBe(headHash);
+    expect(entry?.status).toBe("cancelled");
+    expect(entry?.completedAt).toBe(completedAt);

    await expect(access(historyPath)).rejects.toThrow();
    const migratedContent = await readFile(`${historyPath}.migrated`, "utf8");
@@ -0,0 +1,235 @@
+import { mkdir, mkdtemp } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import type { CasRef, ThreadId } from "@united-workforce/protocol";
+import { describe, expect, test } from "vitest";
+import {
+  completeThread,
+  createUwfStore,
+  getThread,
+  loadActiveThreads,
+  loadHistoryThreads,
+  setThread,
+} from "../store.js";
+
+async function makeUwfStore(storageRoot: string) {
+  const casDir = join(storageRoot, "cas");
+  await mkdir(casDir, { recursive: true });
+  process.env.OCAS_HOME = casDir;
+  return createUwfStore(storageRoot);
+}
+
+async function seedThreadHead(
+  uwf: Awaited<ReturnType<typeof createUwfStore>>,
+  label: string,
+): Promise<CasRef> {
+  return (await uwf.store.cas.put(uwf.schemas.text, label)) as CasRef;
+}
+
+describe("unified thread storage", () => {
+  test("loadActiveThreads excludes completed threads", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-active-test-"));
+    const uwf = await makeUwfStore(tmpDir);
+
+    const threadId1 = "01JTEST000000000000ACTIVE1" as ThreadId;
+    const threadId2 = "01JTEST000000000000ACTIVE2" as ThreadId;
+    const head1 = await seedThreadHead(uwf, "active-head");
+    const head2 = await seedThreadHead(uwf, "completed-head");
+
+    setThread(uwf.varStore, threadId1, {
+      head: head1,
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
+    });
+
+    setThread(uwf.varStore, threadId2, {
+      head: head2,
+      status: "completed",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: Date.now(),
+    });
+
+    const active = loadActiveThreads(uwf.varStore);
+    expect(Object.keys(active)).toHaveLength(1);
+    expect(active[threadId1]).toBeDefined();
+    expect(active[threadId2]).toBeUndefined();
+  });
+
+  test("loadActiveThreads excludes cancelled threads", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-active-test-"));
+    const uwf = await makeUwfStore(tmpDir);
+
+    const threadId1 = "01JTEST000000000000ACTIVE3" as ThreadId;
+    const threadId2 = "01JTEST000000000000ACTIVE4" as ThreadId;
+    const head1 = await seedThreadHead(uwf, "active-head");
+    const head2 = await seedThreadHead(uwf, "cancelled-head");
+
+    setThread(uwf.varStore, threadId1, {
+      head: head1,
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
+    });
+
+    setThread(uwf.varStore, threadId2, {
+      head: head2,
+      status: "cancelled",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: Date.now(),
+    });
+
+    const active = loadActiveThreads(uwf.varStore);
+    expect(Object.keys(active)).toHaveLength(1);
+    expect(active[threadId1]).toBeDefined();
+    expect(active[threadId2]).toBeUndefined();
+  });
+
+  test("loadHistoryThreads only returns completed and cancelled", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-history-test-"));
+    const uwf = await makeUwfStore(tmpDir);
+
+    const threadId1 = "01JTEST000000000000HISTOR1" as ThreadId;
+    const threadId2 = "01JTEST000000000000HISTOR2" as ThreadId;
+    const threadId3 = "01JTEST000000000000HISTOR3" as ThreadId;
+    const head1 = await seedThreadHead(uwf, "active-head");
+    const head2 = await seedThreadHead(uwf, "completed-head");
+    const head3 = await seedThreadHead(uwf, "cancelled-head");
+
+    setThread(uwf.varStore, threadId1, {
+      head: head1,
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
+    });
+
+    setThread(uwf.varStore, threadId2, {
+      head: head2,
+      status: "completed",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: Date.now(),
+    });
+
+    setThread(uwf.varStore, threadId3, {
+      head: head3,
+      status: "cancelled",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: Date.now(),
+    });
+
+    const history = loadHistoryThreads(uwf.varStore);
+    expect(Object.keys(history)).toHaveLength(2);
+    expect(history[threadId1]).toBeUndefined();
+    expect(history[threadId2]).toBeDefined();
+    expect(history[threadId3]).toBeDefined();
+  });
+
+  test("completeThread marks thread as completed", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-complete-test-"));
+    const uwf = await makeUwfStore(tmpDir);
+    const threadId = "01JTEST000000000000COMPLE1" as ThreadId;
+    const head = await seedThreadHead(uwf, "active-head");
+
+    setThread(uwf.varStore, threadId, {
+      head,
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
+    });
+
+    completeThread(uwf.varStore, threadId, "completed");
+
+    const entry = getThread(uwf.varStore, threadId);
+    expect(entry).not.toBeNull();
+    expect(entry?.status).toBe("completed");
+    expect(entry?.completedAt).toBeDefined();
+    expect(entry?.completedAt).toBeGreaterThan(0);
+  });
+
+  test("completeThread marks thread as cancelled", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-complete-test-"));
+    const uwf = await makeUwfStore(tmpDir);
+    const threadId = "01JTEST000000000000COMPLE2" as ThreadId;
+    const head = await seedThreadHead(uwf, "active-head");
+
+    setThread(uwf.varStore, threadId, {
+      head,
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
+    });
+
+    completeThread(uwf.varStore, threadId, "cancelled");
+
+    const entry = getThread(uwf.varStore, threadId);
+    expect(entry).not.toBeNull();
+    expect(entry?.status).toBe("cancelled");
+    expect(entry?.completedAt).toBeDefined();
+    expect(entry?.completedAt).toBeGreaterThan(0);
+  });
+
+  test("completeThread clears suspend metadata", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-complete-test-"));
+    const uwf = await makeUwfStore(tmpDir);
+    const threadId = "01JTEST000000000000COMPLE3" as ThreadId;
+    const head = await seedThreadHead(uwf, "suspended-head");
+
+    setThread(uwf.varStore, threadId, {
+      head,
+      status: "suspended",
+      suspendedRole: "test-role",
+      suspendMessage: "test message",
+      completedAt: null,
+    });
+
+    completeThread(uwf.varStore, threadId, "completed");
+
+    const entry = getThread(uwf.varStore, threadId);
+    expect(entry).not.toBeNull();
+    expect(entry?.status).toBe("completed");
+    expect(entry?.suspendedRole).toBeNull();
+    expect(entry?.suspendMessage).toBeNull();
+  });
+
+  test("completeThread handles non-existent thread gracefully", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-complete-test-"));
+    const uwf = await makeUwfStore(tmpDir);
+    const threadId = "01JTEST000000000000NOEXIST" as ThreadId;
+
+    // Should not throw
+    completeThread(uwf.varStore, threadId, "completed");
+
+    const entry = getThread(uwf.varStore, threadId);
+    expect(entry).toBeNull();
+  });
+
+  test("status and completedAt tags are persisted and loaded", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-tags-test-"));
+    const uwf = await makeUwfStore(tmpDir);
+    const threadId = "01JTEST000000000000TAGTEST" as ThreadId;
+    const head = await seedThreadHead(uwf, "test-head");
+    const now = Date.now();
+
+    setThread(uwf.varStore, threadId, {
+      head,
+      status: "completed",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: now,
+    });
+
+    const entry = getThread(uwf.varStore, threadId);
+    expect(entry).not.toBeNull();
+    expect(entry?.status).toBe("completed");
+    expect(entry?.completedAt).toBe(now);
+  });
+});
@@ -3,7 +3,13 @@ import { tmpdir } from "node:os";
 import { join } from "node:path";
 import type { CasRef, ThreadId } from "@united-workforce/protocol";
 import { describe, expect, test } from "vitest";
-import { addHistoryEntry, createUwfStore, loadAllHistory } from "../store.js";
+import {
+  completeThread,
+  createUwfStore,
+  getThread,
+  loadHistoryThreads,
+  setThread,
+} from "../store.js";

 async function makeUwfStore(storageRoot: string) {
  const casDir = join(storageRoot, "cas");
@@ -20,88 +26,113 @@ async function seedHistoryHead(
 }

 describe("thread cancel status", () => {
-  test("cancelled history entry has reason 'cancelled'", async () => {
+  test("cancelled thread has status 'cancelled'", async () => {
    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
    const threadId = "01JTEST000000000000CANCEL1" as ThreadId;
    const uwf = await makeUwfStore(tmpDir);
    const head = await seedHistoryHead(uwf, "cancelled-head");

-    addHistoryEntry(uwf.varStore, {
-      thread: threadId,
-      workflow: "test-workflow",
+    setThread(uwf.varStore, threadId, {
      head,
-      completedAt: Date.now(),
-      reason: "cancelled",
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
    });

-    const history = loadAllHistory(uwf.varStore);
-    expect(history).toHaveLength(1);
-    expect(history[0]?.reason).toBe("cancelled");
+    completeThread(uwf.varStore, threadId, "cancelled");
+
+    const entry = getThread(uwf.varStore, threadId);
+    expect(entry).not.toBeNull();
+    expect(entry?.status).toBe("cancelled");
  });

-  test("completed history entry has reason 'completed'", async () => {
+  test("completed thread has status 'completed'", async () => {
    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
    const threadId = "01JTEST000000000000CANCEL2" as ThreadId;
    const uwf = await makeUwfStore(tmpDir);
    const head = await seedHistoryHead(uwf, "completed-head");

-    addHistoryEntry(uwf.varStore, {
-      thread: threadId,
-      workflow: "test-workflow",
+    setThread(uwf.varStore, threadId, {
      head,
-      completedAt: Date.now(),
-      reason: "completed",
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
    });

-    const history = loadAllHistory(uwf.varStore);
-    expect(history).toHaveLength(1);
-    expect(history[0]?.reason).toBe("completed");
+    completeThread(uwf.varStore, threadId, "completed");
+
+    const entry = getThread(uwf.varStore, threadId);
+    expect(entry).not.toBeNull();
+    expect(entry?.status).toBe("completed");
  });

-  test("history entry with null reason is stored as completed", async () => {
-    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
-    const threadId = "01JTEST000000000000CANCEL3" as ThreadId;
-    const uwf = await makeUwfStore(tmpDir);
-    const head = await seedHistoryHead(uwf, "legacy-head");
-
-    addHistoryEntry(uwf.varStore, {
-      thread: threadId,
-      workflow: "test-workflow",
-      head,
-      completedAt: Date.now(),
-      reason: null,
-    });
-
-    const history = loadAllHistory(uwf.varStore);
-    expect(history).toHaveLength(1);
-    expect(history[0]?.reason).toBe("completed");
-  });
-
-  test("mixed completed and cancelled entries preserve distinct reasons", async () => {
+  test("loadHistoryThreads returns completed and cancelled", async () => {
    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
    const uwf = await makeUwfStore(tmpDir);
    const head1 = await seedHistoryHead(uwf, "head1");
    const head2 = await seedHistoryHead(uwf, "head2");

-    addHistoryEntry(uwf.varStore, {
-      thread: "01JTEST000000000000CANCEL4" as ThreadId,
-      workflow: "test-workflow",
+    const threadId1 = "01JTEST000000000000CANCEL4" as ThreadId;
+    setThread(uwf.varStore, threadId1, {
      head: head1,
-      completedAt: Date.now(),
-      reason: "completed",
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
    });
+    completeThread(uwf.varStore, threadId1, "completed");

-    addHistoryEntry(uwf.varStore, {
-      thread: "01JTEST000000000000CANCEL5" as ThreadId,
-      workflow: "test-workflow",
+    const threadId2 = "01JTEST000000000000CANCEL5" as ThreadId;
+    setThread(uwf.varStore, threadId2, {
      head: head2,
-      completedAt: Date.now(),
-      reason: "cancelled",
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
+    });
+    completeThread(uwf.varStore, threadId2, "cancelled");
+
+    const history = loadHistoryThreads(uwf.varStore);
+    expect(Object.keys(history)).toHaveLength(2);
+    const statuses = Object.values(history)
+      .map((entry) => entry.status)
+      .sort();
+    expect(statuses).toEqual(["cancelled", "completed"]);
  });

-    const history = loadAllHistory(uwf.varStore);
-    expect(history).toHaveLength(2);
-    const reasons = history.map((entry) => entry.reason).sort();
-    expect(reasons).toEqual(["cancelled", "completed"]);
+  test("mixed completed and cancelled entries preserve distinct statuses", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
+    const uwf = await makeUwfStore(tmpDir);
+    const head1 = await seedHistoryHead(uwf, "head1");
+    const head2 = await seedHistoryHead(uwf, "head2");
+
+    const threadId1 = "01JTEST000000000000CANCEL6" as ThreadId;
+    setThread(uwf.varStore, threadId1, {
+      head: head1,
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
+    });
+    completeThread(uwf.varStore, threadId1, "completed");
+
+    const threadId2 = "01JTEST000000000000CANCEL7" as ThreadId;
+    setThread(uwf.varStore, threadId2, {
+      head: head2,
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
+    });
+    completeThread(uwf.varStore, threadId2, "cancelled");
+
+    const history = loadHistoryThreads(uwf.varStore);
+    expect(Object.keys(history)).toHaveLength(2);
+    const statuses = Object.values(history)
+      .map((entry) => entry.status)
+      .sort();
+    expect(statuses).toEqual(["cancelled", "completed"]);
  });
 });
@@ -10,9 +10,8 @@ import { cmdThreadList } from "../commands/thread.js";
 import { parseTimeInput } from "../commands/thread-time-parser.js";
 import type { UwfStore } from "../store.js";
 import {
-  addHistoryEntry,
+  completeThread as completeThreadInStore,
  createUwfStore,
-  deleteThread,
  loadAllThreads,
  setThread,
 } from "../store.js";
@@ -73,18 +72,11 @@ async function markThreadRunning(storageRoot: string, threadId: ThreadId, workfl
 async function completeThread(
  storageRoot: string,
  threadId: ThreadId,
-  workflowHash: CasRef,
-  headHash: CasRef,
+  _workflowHash: CasRef,
+  _headHash: CasRef,
 ) {
  const uwfIdx = await createUwfStore(storageRoot);
-  deleteThread(uwfIdx.varStore, threadId);
-  addHistoryEntry(uwfIdx.varStore, {
-    thread: threadId,
-    workflow: workflowHash,
-    head: headHash,
-    completedAt: Date.now(),
-    reason: null,
-  });
+  completeThreadInStore(uwfIdx.varStore, threadId, "completed");
 }

 // ── test setup ────────────────────────────────────────────────────────────────
@@ -500,8 +492,10 @@ describe("edge cases", () => {
    )) as CasRef;
    index["INVALID_ULID_FORMAT_HERE" as ThreadId] = {
      head: placeholderHead,
+      status: "idle",
      suspendedRole: null,
      suspendMessage: null,
+      completedAt: null,
    };
    for (const [tid, ent] of Object.entries(index)) {
      setThread(uwfIdx.varStore, tid as ThreadId, ent);
@@ -54,15 +54,19 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string }
+        $status: { const: "ready" }
 graph:
  $START:
-    _:
+    new:
      role: planner
      prompt: "Plan the work"
      location: null
+    resume:
+      role: planner
+      prompt: "Resume the work"
+      location: null
  planner:
-    _:
+    ready:
      role: $END
      prompt: "Done"
      location: null
@@ -110,15 +114,19 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string }
+        $status: { const: "ready" }
 graph:
  $START:
-    _:
+    new:
      role: planner
      prompt: "Plan"
      location: null
+    resume:
+      role: planner
+      prompt: "Resume"
+      location: null
  planner:
-    _:
+    ready:
      role: $END
      prompt: "Done"
      location: null
@@ -153,15 +161,19 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string }
+        $status: { const: "ready" }
 graph:
  $START:
-    _:
+    new:
      role: planner
      prompt: "Plan"
      location: null
+    resume:
+      role: planner
+      prompt: "Resume"
+      location: null
  planner:
-    _:
+    ready:
      role: $END
      prompt: "Done"
      location: null
@@ -70,7 +70,10 @@ async function setupSuspendedThread(mode: MockAgentMode): Promise<{
      },
    },
    graph: {
-      $START: { _: { role: "worker", prompt: "Start work", location: null } },
+      $START: {
+        new: { role: "worker", prompt: "Start work", location: null },
+        resume: { role: "worker", prompt: "Resume the work", location: null },
+      },
      worker: {
        needs_input: {
          role: "$SUSPEND",
@@ -79,7 +82,7 @@ async function setupSuspendedThread(mode: MockAgentMode): Promise<{
        },
        ok: { role: "reviewer", prompt: "Review the work", location: null },
      },
-      reviewer: { _: { role: "$END", prompt: "Done", location: null } },
+      reviewer: { done: { role: "$END", prompt: "Done", location: null } },
    },
  });

@@ -118,8 +121,10 @@ async function setupSuspendedThread(mode: MockAgentMode): Promise<{
  await seedThreads(tmpDir, {
    [THREAD_ID]: {
      head: stepHash,
+      status: "suspended",
      suspendedRole: "worker",
      suspendMessage: SUSPEND_MESSAGE,
+      completedAt: null,
    },
  });

@@ -231,8 +236,11 @@ describe("uwf thread resume", () => {
        },
      },
      graph: {
-        $START: { _: { role: "worker", prompt: "Start", location: null } },
-        worker: { _: { role: "$END", prompt: "Done", location: null } },
+        $START: {
+          new: { role: "worker", prompt: "Start", location: null },
+          resume: { role: "worker", prompt: "Resume", location: null },
+        },
+        worker: { done: { role: "$END", prompt: "Done", location: null } },
      },
    });

@@ -247,7 +255,7 @@ describe("uwf thread resume", () => {

    const result = runUwf(["thread", "resume", THREAD_ID], casDir);
    expect(result.status).not.toBe(0);
-    expect(result.stderr).toContain("thread is not suspended");
+    expect(result.stderr).toContain("thread cannot be resumed");
  });

  test("resume suspended thread executes step and becomes idle", async () => {
@@ -347,8 +355,10 @@ describe("uwf thread resume", () => {
      const uwfAfterFirst = await createUwfStore(tmpDir);
      expect(getThread(uwfAfterFirst.varStore, THREAD_ID)).toEqual({
        head: firstResume.head,
+        status: "suspended",
        suspendedRole: "worker",
        suspendMessage: SUSPEND_MESSAGE,
+        completedAt: null,
      });

      const { mockAgentPath: okMockAgentPath } = await setupOkMockAgent(
@@ -444,3 +454,272 @@ echo '${adapterJson}'

  return { mockAgentPath };
 }
+
+describe("uwf thread resume - completed threads", () => {
+  test("resume completed thread starts from $START role", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = await openStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const outputSchemaHash = await putSchema(store, OUTPUT_SCHEMA);
+
+    const workflowHash = await store.cas.put(schemas.workflow, {
+      name: "test-completed-resume",
+      description: "completed thread resume test",
+      roles: {
+        worker: {
+          description: "Worker role",
+          goal: "Work",
+          capabilities: [],
+          procedure: "work",
+          output: "result",
+          frontmatter: outputSchemaHash,
+        },
+        reviewer: {
+          description: "Reviewer role",
+          goal: "Review",
+          capabilities: [],
+          procedure: "review",
+          output: "result",
+          frontmatter: outputSchemaHash,
+        },
+      },
+      graph: {
+        $START: {
+          new: { role: "worker", prompt: "Start work", location: null },
+          resume: { role: "worker", prompt: "Resume the work", location: null },
+        },
+        worker: { done: { role: "reviewer", prompt: "Review the work", location: null } },
+        reviewer: { done: { role: "$END", prompt: "Done", location: null } },
+      },
+    });
+
+    const startHash = await store.cas.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Initial task",
+      cwd: tmpDir,
+    });
+
+    process.env.OCAS_HOME = casDir;
+
+    const workerOutputHash = await store.cas.put(outputSchemaHash, { $status: "done" });
+    const reviewerOutputHash = await store.cas.put(outputSchemaHash, { $status: "done" });
+    const detailHash = await store.cas.put(schemas.text, "mock detail");
+
+    const workerStepHash = await store.cas.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: workerOutputHash,
+      detail: detailHash,
+      agent: "uwf-mock",
+      edgePrompt: "Start work",
+      startedAtMs: 1716600000000,
+      completedAtMs: 1716600001000,
+      cwd: tmpDir,
+      assembledPrompt: null,
+    });
+
+    const reviewerStepHash = await store.cas.put(schemas.stepNode, {
+      start: startHash,
+      prev: workerStepHash,
+      role: "reviewer",
+      output: reviewerOutputHash,
+      detail: detailHash,
+      agent: "uwf-mock",
+      edgePrompt: "Review the work",
+      startedAtMs: 1716600001000,
+      completedAtMs: 1716600002000,
+      cwd: tmpDir,
+      assembledPrompt: null,
+    });
+
+    await seedThreads(tmpDir, {
+      [THREAD_ID]: {
+        head: reviewerStepHash,
+        status: "completed",
+        suspendedRole: null,
+        suspendMessage: null,
+        completedAt: 1716600002000,
+      },
+    });
+
+    // Verify the status was actually set
+    const { createUwfStore, getThread } = await import("../store.js");
+    const verifyUwf = await createUwfStore(tmpDir);
+    const verifyEntry = getThread(verifyUwf.varStore, THREAD_ID);
+    console.log("Seeded entry status:", verifyEntry?.status);
+    console.log("Seeded entry:", JSON.stringify(verifyEntry, null, 2));
+
+    const promptCapturePath = join(tmpDir, "captured-prompt-completed.txt");
+    const mockAgentPath = join(tmpDir, "mock-agent-completed.sh");
+
+    const newWorkerStepHash = await store.cas.put(schemas.stepNode, {
+      start: startHash,
+      prev: reviewerStepHash,
+      role: "worker",
+      output: workerOutputHash,
+      detail: detailHash,
+      agent: "uwf-mock",
+      edgePrompt: "Start work",
+      startedAtMs: 1716600003000,
+      completedAtMs: 1716600004000,
+      cwd: tmpDir,
+      assembledPrompt: null,
+    });
+
+    const adapterJson = JSON.stringify({
+      stepHash: newWorkerStepHash,
+      detailHash,
+      role: "worker",
+      frontmatter: { $status: "done" },
+      body: "",
+      startedAtMs: 1716600003000,
+      completedAtMs: 1716600004000,
+    });
+
+    await writeFile(
+      mockAgentPath,
+      `#!/bin/sh
+prompt=""
+while [ $# -gt 0 ]; do
+  if [ "$1" = "--prompt" ]; then
+    prompt="$2"
+    shift 2
+  else
+    shift
+  fi
+done
+printf '%s' "$prompt" > '${promptCapturePath}'
+echo '${adapterJson}'
+`,
+      { mode: 0o755 },
+    );
+
+    const configPath = join(tmpDir, "config.yaml");
+    await writeFile(
+      configPath,
+      `defaultAgent: uwf-hermes\ndefaultModel: test-model\nagentOverrides: null\nagents: {}\nproviders: {}\nmodels: {}\n`,
+    );
+
+    const result = runUwf(
+      ["thread", "resume", THREAD_ID, "-p", "Additional context", "--agent", mockAgentPath],
+      casDir,
+    );
+
+    if (result.status !== 0) {
+      console.error("Command failed:", result.stderr);
+    }
+
+    expect(result.status).toBe(0);
+
+    const cliOutput = JSON.parse(result.stdout.trim());
+    expect(cliOutput.status).toBe("idle");
+    expect(cliOutput.currentRole).toBe("reviewer");
+    expect(cliOutput.done).toBe(false);
+
+    const capturedPrompt = await readFile(promptCapturePath, "utf8");
+    expect(capturedPrompt).toContain("Resume the work");
+    expect(capturedPrompt).toContain("Additional context");
+
+    const storeModule = await import("../store.js");
+    const uwf2 = await storeModule.createUwfStore(tmpDir);
+    const entry2 = storeModule.getThread(uwf2.varStore, THREAD_ID);
+    expect(entry2?.status).toBe("idle");
+    expect(entry2?.completedAt).toBeNull();
+  });
+
+  test("resume cancelled thread returns error", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = await openStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+
+    const workflowHash = await store.cas.put(schemas.workflow, {
+      name: "cancelled-workflow",
+      description: "cancelled thread",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "Work",
+          capabilities: [],
+          procedure: "work",
+          output: "result",
+          frontmatter: await putSchema(store, OUTPUT_SCHEMA),
+        },
+      },
+      graph: {
+        $START: {
+          new: { role: "worker", prompt: "Start", location: null },
+          resume: { role: "worker", prompt: "Resume", location: null },
+        },
+        worker: { done: { role: "$END", prompt: "Done", location: null } },
+      },
+    });
+
+    const startHash = await store.cas.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "task",
+      cwd: tmpDir,
+    });
+
+    process.env.OCAS_HOME = casDir;
+    await seedThreads(tmpDir, {
+      [THREAD_ID]: {
+        head: startHash,
+        status: "cancelled",
+        suspendedRole: null,
+        suspendMessage: null,
+        completedAt: null,
+      },
+    });
+
+    const result = runUwf(["thread", "resume", THREAD_ID], casDir);
+    expect(result.status).not.toBe(0);
+    expect(result.stderr).toContain("thread cannot be resumed");
+    expect(result.stderr).toContain("cancelled");
+  });
+
+  test("resume idle thread returns error", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = await openStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+
+    const workflowHash = await store.cas.put(schemas.workflow, {
+      name: "idle-workflow",
+      description: "idle thread",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "Work",
+          capabilities: [],
+          procedure: "work",
+          output: "result",
+          frontmatter: await putSchema(store, OUTPUT_SCHEMA),
+        },
+      },
+      graph: {
+        $START: {
+          new: { role: "worker", prompt: "Start", location: null },
+          resume: { role: "worker", prompt: "Resume", location: null },
+        },
+        worker: { done: { role: "$END", prompt: "Done", location: null } },
+      },
+    });
+
+    const startHash = await store.cas.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "task",
+      cwd: tmpDir,
+    });
+
+    process.env.OCAS_HOME = casDir;
+    await seedThreads(tmpDir, { [THREAD_ID]: startHash });
+
+    const result = runUwf(["thread", "resume", THREAD_ID], casDir);
+    expect(result.status).not.toBe(0);
+    expect(result.stderr).toContain("thread cannot be resumed");
+    expect(result.stderr).toContain("idle");
+  });
+});
@@ -6,13 +6,7 @@ import type { CasRef, ThreadId } from "@united-workforce/protocol";
 import { describe, expect, test } from "vitest";
 import { createMarker, deleteMarker } from "../background/index.js";
 import { cmdThreadShow, cmdThreadStart } from "../commands/thread.js";
-import {
-  addHistoryEntry,
-  createUwfStore,
-  deleteThread,
-  loadAllThreads,
-  setThread,
-} from "../store.js";
+import { completeThread, createUwfStore, loadAllThreads, setThread } from "../store.js";

 const OUTPUT_SCHEMA = {
  type: "object" as const,
@@ -37,15 +31,19 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string }
+        $status: { const: "ready" }
 graph:
  $START:
-    _:
+    new:
      role: planner
      prompt: "Plan the work"
      location: null
+    resume:
+      role: planner
+      prompt: "Resume the work"
+      location: null
  planner:
-    _:
+    ready:
      role: $END
      prompt: "Done"
      location: null
@@ -72,10 +70,14 @@ roles:
            question: { type: string }
 graph:
  $START:
-    _:
+    new:
      role: worker
      prompt: "Start work"
      location: null
+    resume:
+      role: worker
+      prompt: "Resume work"
+      location: null
  worker:
    needs_input:
      role: $SUSPEND
@@ -118,7 +120,13 @@ async function insertStepNode(
    assembledPrompt: null,
  })) as CasRef;

-  setThread(uwf.varStore, threadId, { head: stepHash, suspendedRole: null, suspendMessage: null });
+  setThread(uwf.varStore, threadId, {
+    head: stepHash,
+    status: "idle",
+    suspendedRole: null,
+    suspendMessage: null,
+    completedAt: null,
+  });
 }

 describe("thread show status field", () => {
@@ -200,7 +208,7 @@ describe("thread show status field", () => {
    // Create a thread
    const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
    const threadId = startResult.thread as ThreadId;
-    const workflow = startResult.workflow;
+    const _workflow = startResult.workflow;

    // Get the head hash before moving to history
    const uwfForIndex = await createUwfStore(storageRoot);
@@ -208,15 +216,7 @@ describe("thread show status field", () => {
    const head = index[threadId]!.head;
    if (!head) throw new Error("Thread not found in index");

-    deleteThread(uwfForIndex.varStore, threadId);
-
-    addHistoryEntry(uwfForIndex.varStore, {
-      thread: threadId,
-      workflow,
-      head,
-      completedAt: Date.now(),
-      reason: "completed",
-    });
+    completeThread(uwfForIndex.varStore, threadId, "completed");

    const result = await cmdThreadShow(storageRoot, threadId);

@@ -237,7 +237,7 @@ describe("thread show status field", () => {
    // Create a thread
    const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
    const threadId = startResult.thread as ThreadId;
-    const workflow = startResult.workflow;
+    const _workflow = startResult.workflow;

    // Get the head hash before moving to history
    const uwfForIndex = await createUwfStore(storageRoot);
@@ -245,15 +245,7 @@ describe("thread show status field", () => {
    const head = index[threadId]!.head;
    if (!head) throw new Error("Thread not found in index");

-    deleteThread(uwfForIndex.varStore, threadId);
-
-    addHistoryEntry(uwfForIndex.varStore, {
-      thread: threadId,
-      workflow,
-      head,
-      completedAt: Date.now(),
-      reason: "cancelled",
-    });
+    completeThread(uwfForIndex.varStore, threadId, "cancelled");

    const result = await cmdThreadShow(storageRoot, threadId);

@@ -274,7 +266,7 @@ describe("thread show status field", () => {
    // Create a thread
    const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
    const threadId = startResult.thread as ThreadId;
-    const workflow = startResult.workflow;
+    const _workflow = startResult.workflow;

    // Get the head hash before moving to history
    const uwfForIndex = await createUwfStore(storageRoot);
@@ -282,15 +274,7 @@ describe("thread show status field", () => {
    const head = index[threadId]!.head;
    if (!head) throw new Error("Thread not found in index");

-    deleteThread(uwfForIndex.varStore, threadId);
-
-    addHistoryEntry(uwfForIndex.varStore, {
-      thread: threadId,
-      workflow,
-      head,
-      completedAt: Date.now(),
-      reason: null,
-    });
+    completeThread(uwfForIndex.varStore, threadId, "completed");

    const result = await cmdThreadShow(storageRoot, threadId);

@@ -54,15 +54,19 @@ roles:
      type: object
      required: ["$status"]
      properties:
-        $status: { type: string }
+        $status: { const: "ready" }
 graph:
  $START:
-    _:
+    new:
      role: planner
      prompt: "Plan the work"
      location: null
+    resume:
+      role: planner
+      prompt: "Resume the work"
+      location: null
  planner:
-    _:
+    ready:
      role: $END
      prompt: "Done"
      location: null
@@ -2,19 +2,28 @@ import { execFileSync } from "node:child_process";
 import { dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
 import { describe, expect, test } from "vitest";
+import { validateCount } from "../commands/thread.js";

-const CLI_PATH = join(dirname(fileURLToPath(import.meta.url)), "..", "cli.js");
+const CLI_PATH = join(dirname(fileURLToPath(import.meta.url)), "..", "..", "dist", "cli.js");

-function runCli(args: string[]): { stdout: string; stderr: string; exitCode: number } {
+function runCli(args: string[]): {
+  stdout: string;
+  stderr: string;
+  exitCode: number;
+} {
  try {
-    const stdout = execFileSync("npx", ["tsx", CLI_PATH, ...args], {
+    const stdout = execFileSync("node", [CLI_PATH, ...args], {
      encoding: "utf8",
      env: { ...process.env, UWF_HOME: "/tmp/uwf-test-nonexistent" },
      stdio: ["ignore", "pipe", "pipe"],
    });
    return { stdout, stderr: "", exitCode: 0 };
  } catch (e: unknown) {
-    const err = e as NodeJS.ErrnoException & { stdout?: string; stderr?: string; status?: number };
+    const err = e as NodeJS.ErrnoException & {
+      stdout?: string;
+      stderr?: string;
+      status?: number;
+    };
    return {
      stdout: err.stdout ?? "",
      stderr: err.stderr ?? "",
@@ -23,50 +32,39 @@ function runCli(args: string[]): { stdout: string; stderr: string; exitCode: num
  }
 }

-describe("thread exec --count CLI parsing", () => {
+describe("thread exec --count CLI parsing", { timeout: 30_000 }, () => {
  test("--help shows -c/--count option", () => {
    const result = runCli(["thread", "exec", "--help"]);
-    expect(result.stdout).toContain("--count");
-    expect(result.stdout).toContain("-c");
+    const combined = result.stdout + result.stderr;
+    expect(combined).toContain("--count");
+    expect(combined).toContain("-c");
  });

  test("description says 'one or more steps'", () => {
    const result = runCli(["thread", "exec", "--help"]);
-    expect(result.stdout).toContain("one or more steps");
+    const combined = result.stdout + result.stderr;
+    expect(combined).toContain("one or more steps");
  });
 });

-describe("cmdThreadExec count logic", () => {
-  test("count=0 fails with validation error", () => {
-    const result = runCli(["thread", "exec", "FAKE_THREAD_ID", "-c", "0"]);
-    expect(result.exitCode).not.toBe(0);
-    expect(result.stderr).toContain("positive integer");
+describe("validateCount", () => {
+  test("count=0 throws validation error", () => {
+    expect(() => validateCount(0)).toThrow("positive integer");
  });

-  test("negative count fails with validation error", () => {
-    const result = runCli(["thread", "exec", "FAKE_THREAD_ID", "-c", "-1"]);
-    expect(result.exitCode).not.toBe(0);
-    expect(result.stderr).toContain("positive integer");
+  test("negative count throws validation error", () => {
+    expect(() => validateCount(-1)).toThrow("positive integer");
  });

-  test("non-integer count fails with validation error", () => {
-    const result = runCli(["thread", "exec", "FAKE_THREAD_ID", "-c", "1.5"]);
-    expect(result.exitCode).not.toBe(0);
-    expect(result.stderr).toContain("positive integer");
+  test("non-integer count throws validation error", () => {
+    expect(() => validateCount(1.5)).toThrow("positive integer");
  });

-  test("count=1 is the default (no -c flag)", () => {
-    // Without -c, it should attempt to run 1 step (failing on missing thread, not on count validation)
-    const result = runCli(["thread", "exec", "FAKE_THREAD_ID"]);
-    expect(result.exitCode).not.toBe(0);
-    // Should NOT contain "positive integer" error — should fail on thread lookup instead
-    expect(result.stderr).not.toContain("positive integer");
+  test("count=1 passes validation", () => {
+    expect(() => validateCount(1)).not.toThrow();
  });

-  test("count=3 passes validation (fails on thread lookup)", () => {
-    const result = runCli(["thread", "exec", "FAKE_THREAD_ID", "-c", "3"]);
-    expect(result.exitCode).not.toBe(0);
-    // Should NOT contain "positive integer" error — should fail on thread/storage lookup
-    expect(result.stderr).not.toContain("positive integer");
+  test("count=3 passes validation", () => {
+    expect(() => validateCount(3)).not.toThrow();
  });
 });
@@ -58,7 +58,10 @@ describe("suspend step CAS chain and threads.yaml metadata", () => {
          },
        },
        graph: {
-          $START: { _: { role: "worker", prompt: "Start work", location: null } },
+          $START: {
+            new: { role: "worker", prompt: "Start work", location: null },
+            resume: { role: "worker", prompt: "Resume work", location: null },
+          },
          worker: {
            needs_input: {
              role: "$SUSPEND",
@@ -160,8 +163,10 @@ describe("suspend step CAS chain and threads.yaml metadata", () => {
      const threadEntry = getThread(uwf.varStore, threadId);
      expect(threadEntry).toEqual({
        head: stepHash,
+        status: "suspended",
        suspendedRole: "worker",
        suspendMessage: "Please clarify: Which API?",
+        completedAt: null,
      });

      const showResult = await cmdThreadShow(tmpDir, threadId);
@@ -55,7 +55,10 @@ describe("suspended thread display", () => {
          },
        },
        graph: {
-          $START: { _: { role: "worker", prompt: "Start work", location: null } },
+          $START: {
+            new: { role: "worker", prompt: "Start work", location: null },
+            resume: { role: "worker", prompt: "Resume work", location: null },
+          },
          worker: {
            needs_input: {
              role: "$SUSPEND",
@@ -162,7 +165,10 @@ describe("suspended thread display", () => {
          },
        },
        graph: {
-          $START: { _: { role: "worker", prompt: "Start work", location: null } },
+          $START: {
+            new: { role: "worker", prompt: "Start work", location: null },
+            resume: { role: "worker", prompt: "Resume work", location: null },
+          },
          worker: {
            needs_input: {
              role: "$SUSPEND",
@@ -248,7 +254,10 @@ describe("suspended thread display", () => {
          },
        },
        graph: {
-          $START: { _: { role: "worker", prompt: "Start work", location: null } },
+          $START: {
+            new: { role: "worker", prompt: "Start work", location: null },
+            resume: { role: "worker", prompt: "Resume work", location: null },
+          },
        },
      });

@@ -11,7 +11,7 @@ import {
  THREAD_READ_DEFAULT_QUOTA,
 } from "../commands/thread.js";
 import type { UwfStore } from "../store.js";
-import { addHistoryEntry, createUwfStore } from "../store.js";
+import { completeThread, createUwfStore, setThread } from "../store.js";
 import { seedThreads } from "./thread-test-helpers.js";

 // ── schemas used in tests ────────────────────────────────────────────────────
@@ -745,13 +745,14 @@ describe("cmdStepList with completed threads", () => {
    const threadId = "01JTEST0000000000000000A2" as ThreadId;
    // Thread is NOT in active index (simulating completed thread)
    // But it IS in history variable store
-    addHistoryEntry(uwf.varStore, {
-      thread: threadId,
-      workflow: workflowHash,
+    setThread(uwf.varStore, threadId, {
      head: step2Hash,
-      completedAt: Date.now(),
-      reason: null,
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
    });
+    completeThread(uwf.varStore, threadId, "completed");

    const result = await cmdStepList(tmpDir, threadId);

@@ -872,14 +873,15 @@ describe("cmdStepShow with completed threads", () => {

    const threadId = "01JTEST0000000000000000B2" as ThreadId;
    // Thread is NOT in active index
-    // But it IS in history variable store
-    addHistoryEntry(uwf.varStore, {
-      thread: threadId,
-      workflow: workflowHash,
+    // But it IS in the unified store with completed status
+    setThread(uwf.varStore, threadId, {
      head: stepHash,
-      completedAt: Date.now(),
-      reason: null,
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
    });
+    completeThread(uwf.varStore, threadId, "completed");

    const result = await cmdStepShow(tmpDir, stepHash);

@@ -934,15 +936,15 @@ describe("cmdThreadRead with completed threads", () => {
    });

    const threadId = "01JTEST0000000000000000C1" as ThreadId;
-    // Thread is NOT in active index
-    // But it IS in history variable store
-    addHistoryEntry(uwf.varStore, {
-      thread: threadId,
-      workflow: workflowHash,
+    // Thread is in store with completed status
+    setThread(uwf.varStore, threadId, {
      head: stepHash,
-      completedAt: Date.now(),
-      reason: null,
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
    });
+    completeThread(uwf.varStore, threadId, "completed");

    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);

@@ -998,13 +1000,14 @@ describe("cmdThreadRead with completed threads", () => {
    });

    const threadId = "01JTEST0000000000000000C2" as ThreadId;
-    addHistoryEntry(uwf.varStore, {
-      thread: threadId,
-      workflow: workflowHash,
+    setThread(uwf.varStore, threadId, {
      head: step3Hash,
-      completedAt: Date.now(),
-      reason: null,
+      status: "idle",
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt: null,
    });
+    completeThread(uwf.varStore, threadId, "completed");

    const markdown = await cmdThreadRead(
      tmpDir,
@@ -17,7 +17,7 @@ function makeWorkflow(overrides?: Partial<WorkflowPayload>): WorkflowPayload {
        frontmatter: {
          type: "object",
          properties: {
-            $status: { enum: ["_"] },
+            $status: { const: "done" },
            plan: { type: "string" },
          },
          required: ["$status", "plan"],
@@ -51,8 +51,11 @@ function makeWorkflow(overrides?: Partial<WorkflowPayload>): WorkflowPayload {
      },
    },
    graph: {
-      $START: { _: { role: "writer", prompt: "Begin writing", location: null } },
-      writer: { _: { role: "reviewer", prompt: "Review this: {{{plan}}}", location: null } },
+      $START: {
+        new: { role: "writer", prompt: "Begin writing", location: null },
+        resume: { role: "writer", prompt: "Review previous output and continue", location: null },
+      },
+      writer: { done: { role: "reviewer", prompt: "Review this: {{{plan}}}", location: null } },
      reviewer: {
        approved: { role: "$END", prompt: "Done: {{{summary}}}", location: null },
        rejected: { role: "writer", prompt: "Fix: {{{reason}}}", location: null },
@@ -82,7 +85,7 @@ describe("Suite 1: Role Reference Integrity", () => {
      output: "None",
      frontmatter: {
        type: "object",
-        properties: { $status: { enum: ["_"] } },
+        properties: { $status: { const: "done" } },
        required: ["$status"],
      } as unknown as string,
    };
@@ -135,27 +138,38 @@ describe("Suite 2: Graph Structure", () => {
    expect(errors.some((e) => e.includes("$START must be defined in graph"))).toBe(true);
  });

-  test("2.2 $START has multiple status keys", () => {
+  test("2.2 $START missing resume edge", () => {
    const wf = makeWorkflow();
    wf.graph.$START = {
-      _: { role: "writer", prompt: "Begin", location: null },
-      other: { role: "reviewer", prompt: "Also", location: null },
+      new: { role: "writer", prompt: "Begin", location: null },
    };
    const errors = validateWorkflow(wf);
    expect(
-      errors.some((e) => e.includes('$START must have exactly one edge with status "_"')),
+      errors.some((e) => e.includes('$START must have edges with statuses "new" and "resume"')),
    ).toBe(true);
  });

-  test("2.3 $START edge uses non-_ status", () => {
+  test("2.3 $START missing new edge", () => {
    const wf = makeWorkflow();
-    wf.graph.$START = { ready: { role: "writer", prompt: "Begin", location: null } };
+    wf.graph.$START = {
+      resume: { role: "writer", prompt: "Resume", location: null },
+    };
    const errors = validateWorkflow(wf);
    expect(
-      errors.some((e) => e.includes('$START must have exactly one edge with status "_"')),
+      errors.some((e) => e.includes('$START must have edges with statuses "new" and "resume"')),
    ).toBe(true);
  });

+  test("2.3b $START with new and resume passes", () => {
+    const wf = makeWorkflow();
+    wf.graph.$START = {
+      new: { role: "writer", prompt: "Begin", location: null },
+      resume: { role: "writer", prompt: "Resume", location: null },
+    };
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes("$START must have edges"))).toBe(false);
+  });
+
  test("2.4 $END has outgoing edges", () => {
    const wf = makeWorkflow();
    wf.graph.$END = { _: { role: "writer", prompt: "Loop", location: null } };
@@ -173,11 +187,11 @@ describe("Suite 2: Graph Structure", () => {
      output: "Isolated",
      frontmatter: {
        type: "object",
-        properties: { $status: { enum: ["_"] } },
+        properties: { $status: { const: "done" } },
        required: ["$status"],
      } as unknown as string,
    };
-    wf.graph.isolated = { _: { role: "$END", prompt: "done", location: null } };
+    wf.graph.isolated = { done: { role: "$END", prompt: "done", location: null } };
    const errors = validateWorkflow(wf);
    expect(errors.some((e) => e.includes('role "isolated" is not reachable from $START'))).toBe(
      true,
@@ -186,34 +200,37 @@ describe("Suite 2: Graph Structure", () => {

  test("2.6 edge target references invalid role", () => {
    const wf = makeWorkflow();
-    wf.graph.writer = { _: { role: "ghost", prompt: "Go to ghost", location: null } };
+    wf.graph.writer = { done: { role: "ghost", prompt: "Go to ghost", location: null } };
    const errors = validateWorkflow(wf);
    expect(errors.some((e) => e.includes('unknown target role "ghost"'))).toBe(true);
  });
 });

 describe("Suite 3: Status-Edge Consistency", () => {
-  test("3.1 single-exit role with multiple graph keys", () => {
+  test("3.1 user role using _ graph key is treated as an unknown status", () => {
+    // "_" is no longer special-cased — it's just a status key that does not
+    // match the role's $status enum, so it surfaces as extra/missing keys.
    const wf = makeWorkflow();
-    wf.graph.writer = {
-      _: { role: "reviewer", prompt: "Review", location: null },
-      extra: { role: "$END", prompt: "Done", location: null },
-    };
+    wf.graph.writer = { _: { role: "reviewer", prompt: "Review", location: null } };
    const errors = validateWorkflow(wf);
-    expect(
-      errors.some((e) =>
-        e.includes('role "writer" is single-exit but has status keys other than "_"'),
-      ),
-    ).toBe(true);
+    expect(errors.some((e) => e.includes('role "writer" graph has extra status keys: _'))).toBe(
+      true,
+    );
+    expect(errors.some((e) => e.includes('role "writer" graph is missing status keys: done'))).toBe(
+      true,
+    );
  });

-  test("3.2 single-exit role missing _ key", () => {
+  test("3.2 user role graph key not matching $status enum", () => {
    const wf = makeWorkflow();
-    wf.graph.writer = { done: { role: "reviewer", prompt: "Review", location: null } };
+    wf.graph.writer = { wrong: { role: "reviewer", prompt: "Review", location: null } };
    const errors = validateWorkflow(wf);
-    expect(
-      errors.some((e) => e.includes('role "writer" is single-exit but graph has no "_" key')),
-    ).toBe(true);
+    expect(errors.some((e) => e.includes('role "writer" graph has extra status keys: wrong'))).toBe(
+      true,
+    );
+    expect(errors.some((e) => e.includes('role "writer" graph is missing status keys: done'))).toBe(
+      true,
+    );
  });

  test("3.3 multi-exit role with extra statuses", () => {
@@ -240,18 +257,23 @@ describe("Suite 3: Status-Edge Consistency", () => {
    ).toBe(true);
  });

-  test("3.5 multi-exit role with _ key", () => {
+  test("3.5 multi-exit role with _ key is treated as an unknown status", () => {
    const wf = makeWorkflow();
    wf.graph.reviewer = { _: { role: "$END", prompt: "Done", location: null } };
    const errors = validateWorkflow(wf);
-    expect(errors.some((e) => e.includes('role "reviewer" is multi-exit but graph uses "_"'))).toBe(
+    expect(errors.some((e) => e.includes('role "reviewer" graph has extra status keys: _'))).toBe(
      true,
    );
+    expect(
+      errors.some((e) =>
+        e.includes('role "reviewer" graph is missing status keys: approved, rejected'),
+      ),
+    ).toBe(true);
  });
 });

-describe("Suite 3b: Enum-Based Multi-Exit", () => {
-  test("3b.1 enum multi-exit passes with matching graph keys", () => {
+describe("Suite 3b: Enum-Based $status is Rejected", () => {
+  test("3b.1 enum multi-exit is rejected (must use oneOf + const)", () => {
    const wf = makeWorkflow();
    wf.roles.reviewer = {
      ...wf.roles.reviewer,
@@ -269,99 +291,102 @@ describe("Suite 3b: Enum-Based Multi-Exit", () => {
      rejected: { role: "writer", prompt: "Fix: {{{comments}}}", location: null },
    };
    const errors = validateWorkflow(wf);
-    expect(errors).toEqual([]);
+    expect(errors.some((e) => e.includes("must define") && e.includes("const"))).toBe(true);
  });

-  test("3b.2 enum multi-exit with extra graph key", () => {
-    const wf = makeWorkflow();
-    wf.roles.reviewer = {
-      ...wf.roles.reviewer,
-      frontmatter: {
-        type: "object",
-        properties: {
-          $status: { enum: ["approved", "rejected"] },
-          comments: { type: "string" },
-        },
-        required: ["$status", "comments"],
-      } as unknown as string,
-    };
-    wf.graph.reviewer = {
-      approved: { role: "$END", prompt: "Done", location: null },
-      rejected: { role: "writer", prompt: "Fix", location: null },
-      timeout: { role: "$END", prompt: "Timed out", location: null },
-    };
-    const errors = validateWorkflow(wf);
-    expect(errors.some((e) => e.includes("extra status keys: timeout"))).toBe(true);
-  });
-
-  test("3b.3 enum multi-exit with missing graph key", () => {
-    const wf = makeWorkflow();
-    wf.roles.reviewer = {
-      ...wf.roles.reviewer,
-      frontmatter: {
-        type: "object",
-        properties: {
-          $status: { enum: ["approved", "rejected"] },
-          comments: { type: "string" },
-        },
-        required: ["$status", "comments"],
-      } as unknown as string,
-    };
-    wf.graph.reviewer = {
-      approved: { role: "$END", prompt: "Done", location: null },
-    };
-    const errors = validateWorkflow(wf);
-    expect(errors.some((e) => e.includes("missing status keys: rejected"))).toBe(true);
-  });
-
-  test("3b.4 enum with single value (not multi-exit) treated as single-exit", () => {
+  test("3b.2 enum single-exit is rejected (must use const)", () => {
    const wf = makeWorkflow();
    wf.roles.writer = {
      ...wf.roles.writer,
      frontmatter: {
        type: "object",
        properties: {
-          $status: { enum: ["_"] },
+          $status: { enum: ["ready"] },
+          plan: { type: "string" },
+        },
+        required: ["$status", "plan"],
+      } as unknown as string,
+    };
+    wf.graph.writer = { ready: { role: "reviewer", prompt: "Review: {{{plan}}}", location: null } };
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes("must define") && e.includes("const"))).toBe(true);
+  });
+});
+
+describe("Suite 3c: Const-Based Flat Schema", () => {
+  test("3c.1 flat schema with const $status passes validation", () => {
+    const wf = makeWorkflow();
+    wf.roles.writer = {
+      ...wf.roles.writer,
+      frontmatter: {
+        type: "object",
+        properties: {
+          $status: { const: "done" },
          plan: { type: "string" },
        },
        required: ["$status", "plan"],
      } as unknown as string,
    };
-    wf.graph.writer = { _: { role: "reviewer", prompt: "Review: {{{plan}}}", location: null } };
    const errors = validateWorkflow(wf);
    expect(errors).toEqual([]);
  });

-  test("3b.5 enum multi-exit mustache var not in frontmatter", () => {
+  test("3c.2 flat schema with const $status detects extra graph key", () => {
    const wf = makeWorkflow();
-    wf.roles.reviewer = {
-      ...wf.roles.reviewer,
+    wf.roles.writer = {
+      ...wf.roles.writer,
      frontmatter: {
        type: "object",
        properties: {
-          $status: { enum: ["approved", "rejected"] },
-          comments: { type: "string" },
+          $status: { const: "done" },
+          plan: { type: "string" },
        },
-        required: ["$status", "comments"],
+        required: ["$status", "plan"],
      } as unknown as string,
    };
-    wf.graph.reviewer = {
-      approved: { role: "$END", prompt: "Done: {{{nonexistent}}}", location: null },
-      rejected: { role: "writer", prompt: "Fix: {{{comments}}}", location: null },
+    wf.graph.writer = {
+      done: { role: "reviewer", prompt: "Review.", location: null },
+      extra: { role: "$END", prompt: "Nope.", location: null },
    };
    const errors = validateWorkflow(wf);
-    expect(errors.some((e) => e.includes("nonexistent") && e.includes("not found"))).toBe(true);
+    expect(errors.some((e) => e.includes("extra status keys") && e.includes("extra"))).toBe(true);
+  });
+
+  test("3c.3 flat schema with const $status validates mustache vars", () => {
+    const wf = makeWorkflow();
+    wf.roles.writer = {
+      ...wf.roles.writer,
+      frontmatter: {
+        type: "object",
+        properties: {
+          $status: { const: "done" },
+          plan: { type: "string" },
+        },
+        required: ["$status", "plan"],
+      } as unknown as string,
+    };
+    wf.graph.writer = {
+      done: { role: "reviewer", prompt: "Review: {{{nonexistent}}}", location: null },
+    };
+    const errors = validateWorkflow(wf);
+    expect(
+      errors.some(
+        (e) => e.includes('prompt variable "nonexistent"') && e.includes('role "writer"'),
+      ),
+    ).toBe(true);
  });
 });

 describe("Suite 4: Mustache Template Variable Existence", () => {
-  test("4.1 prompt references nonexistent variable (single-exit)", () => {
+  test("4.1 prompt references nonexistent variable (enum status)", () => {
    const wf = makeWorkflow();
-    wf.graph.writer = { _: { role: "reviewer", prompt: "Review: {{{branch}}}", location: null } };
+    wf.graph.writer = {
+      done: { role: "reviewer", prompt: "Review: {{{branch}}}", location: null },
+    };
    const errors = validateWorkflow(wf);
    expect(
-      errors.some((e) =>
-        e.includes('prompt variable "branch" not found in role "writer" frontmatter'),
+      errors.some(
+        (e) => e.includes('prompt variable "branch"') && e.includes('role "writer" frontmatter'),
      ),
    ).toBe(true);
  });
@@ -388,7 +413,7 @@ describe("Suite 4: Mustache Template Variable Existence", () => {

  test("4.4 $status variable is always valid", () => {
    const wf = makeWorkflow();
-    wf.graph.writer = { _: { role: "reviewer", prompt: "Status: {{$status}}", location: null } };
+    wf.graph.writer = { done: { role: "reviewer", prompt: "Status: {{$status}}", location: null } };
    const errors = validateWorkflow(wf);
    expect(errors).toEqual([]);
  });
@@ -456,14 +481,14 @@ describe("Suite 6: Multiple Errors Collection", () => {
      output: "None",
      frontmatter: {
        type: "object",
-        properties: { $status: { enum: ["_"] } },
+        properties: { $status: { const: "done" } },
        required: ["$status"],
      } as unknown as string,
    };
    // unknown graph reference
-    wf.graph.nonexistent = { _: { role: "$END", prompt: "done", location: null } };
+    wf.graph.nonexistent = { done: { role: "$END", prompt: "done", location: null } };
    // bad mustache var
-    wf.graph.writer = { _: { role: "reviewer", prompt: "{{{badvar}}}", location: null } };
+    wf.graph.writer = { done: { role: "reviewer", prompt: "{{{badvar}}}", location: null } };
    const errors = validateWorkflow(wf);
    expect(errors.length).toBeGreaterThanOrEqual(3);
  });
@@ -31,15 +31,18 @@ function makeMinimalPayload(name: string, description: string): WorkflowPayload
        frontmatter: {
          type: "object",
          properties: {
-            $status: { type: "string" },
+            $status: { const: "done" },
          },
          required: ["$status"],
        } as unknown as CasRef,
      },
    },
    graph: {
-      $START: { _: { role: "worker", prompt: "start working", location: null } },
-      worker: { _: { role: "$END", prompt: "done", location: null } },
+      $START: {
+        new: { role: "worker", prompt: "start working", location: null },
+        resume: { role: "worker", prompt: "resume working", location: null },
+      },
+      worker: { done: { role: "$END", prompt: "done", location: null } },
    },
  };
 }
@@ -1,20 +1,17 @@
-#!/usr/bin/env node
+#!/usr/bin/env -S node --disable-warning=ExperimentalWarning

 import type { CasRef, ThreadId, ThreadStatus } from "@united-workforce/protocol";
 import { Command } from "commander";
 import { cmdConfigGet, cmdConfigList, cmdConfigSet } from "./commands/config.js";
 import { cmdLogClean, cmdLogList, cmdLogShow } from "./commands/log.js";
 import {
-  cmdPromptAdapter,
-  cmdPromptAuthor,
+  cmdPromptAdapterDeveloping,
  cmdPromptBootstrap,
-  cmdPromptDeveloper,
  cmdPromptList,
-  cmdPromptSetup,
  cmdPromptUsage,
-  cmdPromptUser,
+  cmdPromptWorkflowAuthoring,
 } from "./commands/prompt.js";
-import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js";
+import { cmdSetup, cmdSetupInteractive, resolvePresetBaseUrl } from "./commands/setup.js";
 import { cmdStepFork, cmdStepList, cmdStepRead, cmdStepShow } from "./commands/step.js";
 import {
  cmdThreadCancel,
@@ -510,53 +507,32 @@ prompt.addHelpCommand(false);

 prompt
  .command("usage")
-  .description("Print the complete skill content (all references combined)")
+  .description("Print the usage reference (CLI guide + typical workflows)")
  .action(() => {
    console.log(cmdPromptUsage());
  });

-prompt
-  .command("setup")
-  .description("Print setup instructions for installing the uwf skill")
-  .action(() => {
-    console.log(cmdPromptSetup());
-  });
-
-prompt
-  .command("adapter")
-  .description("Print the adapter reference (building agent adapters)")
-  .action(() => {
-    console.log(cmdPromptAdapter());
-  });
-
-prompt
-  .command("author")
-  .description("Print the author reference (workflow YAML design guide)")
-  .action(() => {
-    console.log(cmdPromptAuthor());
-  });
-
-prompt
-  .command("developer")
-  .description("Print the developer reference (coding conventions + architecture)")
-  .action(() => {
-    console.log(cmdPromptDeveloper());
-  });
-
-prompt
-  .command("user")
-  .description("Print the user reference (CLI guide + typical workflows)")
-  .action(() => {
-    console.log(cmdPromptUser());
-  });
-
 prompt
  .command("bootstrap")
-  .description("Print the bootstrap skill YAML for Hermes agents")
+  .description("Print setup instructions for installing uwf skills")
  .action(() => {
    console.log(cmdPromptBootstrap());
  });

+prompt
+  .command("workflow-authoring")
+  .description("Print the workflow authoring reference (YAML design guide)")
+  .action(() => {
+    console.log(cmdPromptWorkflowAuthoring());
+  });
+
+prompt
+  .command("adapter-developing")
+  .description("Print the adapter developing reference (building agent adapters)")
+  .action(() => {
+    console.log(cmdPromptAdapterDeveloping());
+  });
+
 prompt
  .command("list")
  .description("List all available prompt names")
@@ -566,7 +542,7 @@ prompt

 program
  .command("setup")
-  .description("Configure provider, model, and agent")
+  .description("Configure provider, model, and agent. Run without options for interactive wizard.")
  .option("--provider <name>", "Provider name")
  .option("--base-url <url>", "OpenAI-compatible API base URL")
  .option("--api-key <key>", "API key")
@@ -582,10 +558,14 @@ program
    }) => {
      const storageRoot = resolveStorageRoot();
      runAction(async () => {
-        if (opts.provider && opts.baseUrl && opts.apiKey && opts.model) {
+        // Resolve preset base-url when provider is known but --base-url is omitted
+        const resolvedBaseUrl =
+          opts.baseUrl ??
+          (opts.provider !== undefined ? resolvePresetBaseUrl(opts.provider) : null);
+        if (opts.provider && resolvedBaseUrl && opts.apiKey && opts.model) {
          const result = await cmdSetup({
            provider: opts.provider,
-            baseUrl: opts.baseUrl,
+            baseUrl: resolvedBaseUrl,
            apiKey: opts.apiKey,
            model: opts.model,
            agent: opts.agent ?? undefined,
@@ -596,7 +576,7 @@ program
          await cmdSetupInteractive(storageRoot);
        } else {
          throw new Error(
-            "Non-interactive setup requires all of: --provider, --base-url, --api-key, --model",
+            "Non-interactive setup requires: --provider, --api-key, --model (--base-url is optional for preset providers)",
          );
        }
      });
@@ -1,101 +1,330 @@
+import { readFileSync } from "node:fs";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
 import {
-  generateAdapterReference,
-  generateAuthorReference,
-  generateBootstrapReference,
-  generateDeveloperReference,
-  generateUserReference,
+  generateAdapterDevelopingReference,
+  generateUsageReference,
+  generateWorkflowAuthoringReference,
 } from "@united-workforce/util";

+// CLI package version (for bootstrap prompt — uwf --version prints this)
+// Walk up from __dirname to find the nearest package.json (works from both src/ and dist/)
+function _findCliVersion(): string {
+  let dir = dirname(fileURLToPath(import.meta.url));
+  for (let i = 0; i < 5; i++) {
+    const candidate = join(dir, "package.json");
+    try {
+      const pkg = JSON.parse(readFileSync(candidate, "utf-8")) as {
+        name?: string;
+        version?: string;
+      };
+      if (pkg.name === "@united-workforce/cli") {
+        return pkg.version ?? "0.0.0";
+      }
+    } catch {
+      // not found, keep walking
+    }
+    dir = dirname(dir);
+  }
+  return "0.0.0";
+}
+const CLI_VERSION = _findCliVersion();
+
 export {
-  generateAdapterReference as cmdPromptAdapter,
-  generateAuthorReference as cmdPromptAuthor,
-  generateBootstrapReference as cmdPromptBootstrap,
-  generateDeveloperReference as cmdPromptDeveloper,
-  generateUserReference as cmdPromptUser,
+  generateAdapterDevelopingReference as cmdPromptAdapterDeveloping,
+  generateUsageReference as cmdPromptUsage,
+  generateWorkflowAuthoringReference as cmdPromptWorkflowAuthoring,
 };

 const PROMPT_ENTRIES: ReadonlyArray<{ name: string; generate: () => string }> = [
-  { name: "user", generate: generateUserReference },
-  { name: "author", generate: generateAuthorReference },
-  { name: "developer", generate: generateDeveloperReference },
-  { name: "adapter", generate: generateAdapterReference },
-  { name: "bootstrap", generate: generateBootstrapReference },
+  { name: "usage", generate: generateUsageReference },
+  { name: "workflow-authoring", generate: generateWorkflowAuthoringReference },
+  { name: "adapter-developing", generate: generateAdapterDevelopingReference },
 ];

 export function cmdPromptList(): ReadonlyArray<string> {
  return PROMPT_ENTRIES.map((e) => e.name);
 }

-export function cmdPromptUsage(): string {
-  return PROMPT_ENTRIES.map((e) => e.generate()).join("\n\n---\n\n");
-}
+export function cmdPromptBootstrap(): string {
+  return `# uwf Bootstrap

-export function cmdPromptSetup(): string {
-  return `# uwf Skill Setup
+Set up or upgrade uwf (United Workforce) — from zero to running your first workflow.

-You are being asked to install or update the uwf (United Workforce) skill
-so that you know how to use the \`uwf\` CLI for workflow orchestration.
+## Scenario A: Fresh Install

-## Steps
+### Step 0 — Environment pre-flight check
+
+Run ALL checks below BEFORE installing anything. If any check fails, fix it first — do NOT skip ahead.

-1. **Check if uwf CLI is installed:**
 \`\`\`bash
-   uwf --help
-   \`\`\`
-   If not installed: \`bun add -g @united-workforce/cli\`
+# 1. Node.js
+node --version   # need v20+
+# FIX: install via nvm (https://github.com/nvm-sh/nvm) or your OS package manager

-2. **Clean up old versions of the skill** (if any exist):
-   - Look for any existing uwf/workflow skill files in your skill directories
-     (e.g. \`~/.hermes/skills/uwf/SKILL.md\` or similar paths).
-   - If a skill exists, read its \`version\` from the YAML frontmatter.
-   - If the version matches the current CLI version (\`uwf --version\`), you're done — no update needed.
-   - If the version is older, **delete the old skill file** before proceeding.
+# 2. Package manager — pnpm (preferred) or npm
+pnpm --version || npm --version
+# FIX for pnpm: corepack enable pnpm   (Node 20+)
+#   or: npm install -g pnpm
+# If pnpm is not an option, npm works too — adjust install commands below

-3. **Get the usage skill content:**
-   \`\`\`bash
-   uwf prompt usage
+# 3. Global bin directory is in PATH
+# For pnpm:
+pnpm bin -g       # prints the global bin directory
+# For npm:
+npm prefix -g     # global prefix; bin is <prefix>/bin
+# The directory printed above MUST be in your PATH.
+# Test: echo \\$PATH | tr ':' '\\\\n'
+# FIX: add the missing directory to your shell's startup file
+#   (e.g. ~/.bashrc, ~/.zshrc, ~/.profile, or fish config):
+#   export PATH="<global-bin-dir>:$PATH"
 \`\`\`

-4. **Create a SKILL.md file** in your skill directory (e.g. \`~/.hermes/skills/uwf/SKILL.md\`).
-   The output of \`uwf prompt usage\` is the skill body WITHOUT frontmatter.
-   You must prepend the following YAML frontmatter:
+**All checks must pass before continuing.** If you had to modify PATH, verify the change persists by opening a new shell or sourcing your shell config.

+### Step 1 — Discover agents and install adapter
+
+**First, detect which supported agents are already installed on the user's machine:**
+
+\`\`\`bash
+# Check for Hermes Agent
+which hermes 2>/dev/null && hermes --version
+
+# Check for Claude Code
+which claude 2>/dev/null && claude --version   # should show "X.Y.Z (Claude Code)"
+\`\`\`
+
+**Based on the results:**
+
+- **Only hermes found** → install \`uwf-hermes\` adapter
+- **Only claude found** → install \`uwf-claude-code\` adapter
+- **Both found** → ask the user which agent they want uwf to use as default
+- **Neither found** → the user must install at least one agent first:
+  - Hermes Agent: https://hermes-agent.nousresearch.com/docs
+  - Claude Code: \`npm install -g @anthropic-ai/claude-code\`
+
+**Install the uwf CLI and the chosen adapter** using pnpm or npm:
+
+\`\`\`bash
+# CLI (required)
+pnpm add -g @united-workforce/cli       # or: npm install -g @united-workforce/cli
+
+# Adapter — install the one matching the detected agent:
+pnpm add -g @united-workforce/agent-hermes       # or: npm i -g @united-workforce/agent-hermes
+pnpm add -g @united-workforce/agent-claude-code   # or: npm i -g @united-workforce/agent-claude-code
+\`\`\`
+
+**⚠ Adapter versions are independent from CLI versions.** Do NOT try to match adapter version to CLI version. Just install \`@latest\` (the default).
+
+**After installing, verify that \`uwf\` and the adapter are available in PATH:**
+
+\`\`\`bash
+uwf --version          # should print ${CLI_VERSION}
+uwf-hermes --version   # or: uwf-claude-code --version
+\`\`\`
+
+If either command is not found, the global bin directory is not in the current shell's PATH. **You must fix this before continuing:**
+
+1. Find where the binary was installed:
+   \`\`\`bash
+   find ~/.local ~/.hermes /usr/local -name uwf -type f 2>/dev/null
+   npm prefix -g    # global prefix — bin is <prefix>/bin
+   \`\`\`
+2. Add the directory to PATH permanently by appending to the user's shell config (e.g. \`~/.bashrc\`, \`~/.zshrc\`, \`~/.profile\`, or fish config):
+   \`\`\`bash
+   export PATH="<global-bin-dir>:$PATH"
+   \`\`\`
+3. Source the updated config or open a new shell, then re-verify the commands work.
+
+**uwf-hermes** also requires the Hermes ACP plugin. Verify with \`hermes acp --help\`. If not available, install it:
+\`\`\`bash
+# Option A: install into hermes venv (recommended)
+source ~/.hermes/hermes-agent/.venv/bin/activate && pip install hermes-agent[acp]
+
+# Option B: pipx
+pipx install 'hermes-agent[acp]'
+
+# Option C: if installed from source
+pip install -e '.[acp]'
+\`\`\`
+
+### Step 2 — Configure provider and model
+
+uwf needs an LLM provider to run agents. **Ask the user** for their provider, API key, and model, then run:
+
+\`\`\`bash
+uwf setup --provider <name> --api-key <key> --model <model> --agent <adapter-command>
+\`\`\`
+
+**Note:** \`--agent\` takes the adapter **command name** (e.g. \`uwf-hermes\`), not the npm package name.
+
+**Preset providers** — when using a preset name, \`--base-url\` is auto-filled and can be omitted:
+
+| Provider | Name | Default base URL |
+|----------|------|-----------------|
+| OpenAI | \`openai\` | https://api.openai.com/v1 |
+| xAI | \`xai\` | https://api.x.ai/v1 |
+| OpenRouter | \`openrouter\` | https://openrouter.ai/api/v1 |
+| Venice | \`venice\` | https://api.venice.ai/api/v1 |
+| Dashscope | \`dashscope\` | https://dashscope.aliyuncs.com/compatible-mode/v1 |
+| DeepSeek | \`deepseek\` | https://api.deepseek.com/v1 |
+| SiliconFlow | \`siliconflow\` | https://api.siliconflow.cn/v1 |
+| VolcEngine | \`volcengine\` | https://ark.cn-beijing.volces.com/api/v3 |
+| Kimi (Moonshot) | \`kimi\` | https://api.moonshot.cn/v1 |
+| GLM (Zhipu AI) | \`glm\` | https://open.bigmodel.cn/api/paas/v4 |
+| StepFun | \`stepfun\` | https://api.stepfun.com/v1 |
+| MiniMax | \`minimax\` | https://api.minimax.io/v1 |
+| Ollama (local) | \`ollama\` | http://localhost:11434/v1 |
+
+For **non-preset providers**, you must specify \`--base-url\` manually.
+
+Example:
+\`\`\`bash
+uwf setup --provider openrouter --api-key sk-or-... --model anthropic/claude-sonnet-4 --agent uwf-hermes
+\`\`\`
+
+If the user doesn't know what to choose, suggest \`openrouter\` with \`anthropic/claude-sonnet-4\` as a sensible default.
+
+Config is saved to \`~/.uwf/config.yaml\`. Verify with \`cat ~/.uwf/config.yaml\`.
+
+### Step 3 — Install skills
+
+Run each command and create a skill from its output:
+
+\`\`\`bash
+uwf prompt usage               # → save as skill "uwf-usage"
+uwf prompt workflow-authoring  # → save as skill "uwf-workflow-authoring"
+uwf prompt adapter-developing  # → save as skill "uwf-adapter-developing"
+\`\`\`
+
+Each command outputs a complete SKILL.md with YAML frontmatter. Use your agent framework's skill creation API to save them (e.g. \`skill_manage(action='create', name='uwf-usage', content=<output>)\`).
+
+Verify skills are installed by listing them (e.g. \`skills_list()\`) and confirming all three appear.
+
+**⚠ After saving all skills, start a new session** so the agent loads the updated skill content. Skills saved in the current session are not active until the next session.
+
+### Step 4 — Verify end-to-end
+
+Create a minimal workflow file to test your setup:
+
+\`\`\`bash
+cat > /tmp/hello.yaml << 'YAML'
+name: hello
+description: Minimal smoke test
+roles:
+  greeter:
+    description: "Greet the user"
+    goal: "Respond with a friendly greeting"
+    capabilities: []
+    procedure: "Write a short greeting based on the prompt."
+    output: "A greeting message."
+    frontmatter:
+      type: object
+      properties:
+        $status: { const: done }
+        message: { type: string }
+      required: [$status, message]
+graph:
+  $START:
+    new: { role: greeter, prompt: "Say hello to the user." }
+    resume: { role: greeter, prompt: "Greet the user again." }
+  greeter:
+    done: { role: "$END", prompt: "Done." }
+YAML
+\`\`\`
+
+Then run:
+
+\`\`\`bash
+uwf thread start /tmp/hello.yaml -p "Hello, world!"
+uwf thread exec <thread-id>
+uwf thread show <thread-id>
+\`\`\`
+
+If the thread reaches \`$END\` with status \`completed\`, the setup is working.
+
+## Scenario B: Upgrade from Previous Version
+
+### Step 1 — Update packages
+
+\`\`\`bash
+# Using pnpm
+pnpm add -g @united-workforce/cli@latest
+
+# Using npm
+npm install -g @united-workforce/cli@latest
+\`\`\`
+
+\`\`\`bash
+uwf --version   # should print ${CLI_VERSION}
+\`\`\`
+
+Also update your adapter(s):
+
+\`\`\`bash
+# pnpm
+pnpm add -g @united-workforce/agent-hermes@latest
+
+# npm
+npm install -g @united-workforce/agent-hermes@latest
+\`\`\`
+
+### Step 2 — Regenerate skills
+
+Skill content is bundled with the CLI — always regenerate after upgrading:
+
+\`\`\`bash
+uwf prompt usage               # → update skill "uwf-usage"
+uwf prompt workflow-authoring  # → update skill "uwf-workflow-authoring"
+uwf prompt adapter-developing  # → update skill "uwf-adapter-developing"
+\`\`\`
+
+**⚠ After updating skills, start a new session** to load the new skill content.
+
+### Step 3 — Migrate workflow YAML files (if needed)
+
+Check the changelog for breaking changes. Known migrations:
+
+- **v0.2.0**: \`$START._\` → \`$START.new\` + \`$START.resume\`. All workflow YAML files must be updated:
  \`\`\`yaml
-   ---
-   name: uwf
-   description: "Use when orchestrating multi-agent workflows with uwf (United Workforce). Covers CLI usage, workflow YAML authoring, moderator logic, and agent development."
-   version: <VERSION>
-   author: United Workforce
-   license: MIT
-   metadata:
-     hermes:
-       tags: [workflow, agents, orchestration, cli, yaml]
-   ---
+  # Before (v0.1.x)
+  $START:
+    _: { role: planner, prompt: "..." }
+
+  # After (v0.2.0+)
+  $START:
+    new: { role: planner, prompt: "..." }
+    resume: { role: planner, prompt: "Review previous run and continue." }
  \`\`\`

-   Replace \`<VERSION>\` with the installed CLI version (from \`uwf --version\`).
+Update all \`.workflow/\` and \`.workflows/\` YAML files in your projects. \`uwf workflow add\` will reject files with the old \`_\` syntax.

-5. **Verify** the skill is loadable by your agent framework.
+- **v0.2.1**: \`$status: { enum: [value] }\` → \`$status: { const: "value" }\`. The validator no longer accepts \`enum\` for \`$status\`. Update all workflow YAML files:
+  \`\`\`yaml
+  # Before (v0.2.0)
+  $status: { enum: [done] }
+  $status: { type: string, enum: ["ready", "failed"] }

-## Individual prompts
+  # After (v0.2.1+)
+  $status: { const: "done" }
+  # For multi-exit, use oneOf with const (unchanged)
+  \`\`\`

-You can also get individual reference sections:
+### Step 4 — Verify
+
+\`\`\`bash
+uwf thread start <your-workflow> -p "upgrade test"
+uwf thread exec <thread-id>
+\`\`\`
+
+## Available prompts

 \`\`\`bash
 uwf prompt list                # list available prompt names
-uwf prompt user                # user reference (CLI guide + typical workflows)
-uwf prompt author              # author reference (workflow YAML design guide)
-uwf prompt developer           # developer reference (coding conventions + architecture)
-uwf prompt adapter             # adapter reference (building agent adapters)
-uwf prompt bootstrap           # bootstrap skill YAML for Hermes agents
+uwf prompt usage               # CLI usage guide
+uwf prompt workflow-authoring  # workflow YAML design guide
+uwf prompt adapter-developing  # building agent adapters
+uwf prompt bootstrap           # this guide
 \`\`\`
-
-## Notes
-
- The skill content is bundled with the CLI and versioned with it — always use
-  \`uwf prompt usage\` to get the content matching your installed version.
- Do NOT hand-edit the skill body. If the CLI is updated, re-run \`uwf prompt setup\`
-  and follow the steps again.
- When upgrading, always delete the old skill first to avoid stale instructions.
 `;
 }
@@ -1,3 +1,4 @@
+import { execFileSync } from "node:child_process";
 import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from "node:fs";
 import { join } from "node:path";
 import { stdin as input, stdout as output } from "node:process";
@@ -72,6 +73,12 @@ const PRESET_PROVIDERS = [
  { name: "ollama", label: "Ollama (local)", baseUrl: "http://localhost:11434/v1" },
 ] as const;

+/** Look up the base URL for a preset provider name. Returns null if not a preset. */
+export function resolvePresetBaseUrl(providerName: string): string | null {
+  const preset = PRESET_PROVIDERS.find((p) => p.name === providerName);
+  return preset !== undefined ? preset.baseUrl : null;
+}
+
 type SetupArgs = {
  provider: string;
  baseUrl: string;
@@ -175,7 +182,6 @@ export async function _discoverAgents(): Promise<string[]> {

 async function _tryWhichDiscovery(): Promise<string[] | null> {
  try {
-    const { execFileSync } = await import("node:child_process");
    const text = execFileSync("which", ["-a", "uwf-hermes", "uwf-claude-code", "uwf-cursor"], {
      encoding: "utf-8",
      stdio: ["pipe", "pipe", "pipe"],
@@ -391,6 +397,37 @@ function mergeConfig(existing: Record<string, unknown>, args: SetupArgs): Record
  };
 }

+/**
+ * Check if the configured adapter binary (and its dependencies) are in PATH.
+ * Returns warnings array — empty means all good.
+ */
+export function _checkAdapterAvailability(agentName: string): string[] {
+  const warnings: string[] = [];
+  const binary = `uwf-${agentName}`;
+
+  try {
+    execFileSync("which", [binary], { encoding: "utf8", stdio: ["pipe", "pipe", "pipe"] });
+  } catch {
+    warnings.push(
+      `${binary} not found in PATH. Install it: pnpm add -g @united-workforce/agent-${agentName}`,
+    );
+    return warnings; // skip dependency check if adapter itself is missing
+  }
+
+  // uwf-hermes depends on hermes CLI
+  if (agentName === "hermes") {
+    try {
+      execFileSync("which", ["hermes"], { encoding: "utf8", stdio: ["pipe", "pipe", "pipe"] });
+    } catch {
+      warnings.push(
+        'hermes CLI not found in PATH (required by uwf-hermes). Fix: export PATH="$HOME/.hermes/hermes-agent/.venv/bin:$PATH"',
+      );
+    }
+  }
+
+  return warnings;
+}
+
 /**
 * Non-interactive setup. All required args provided via CLI flags.
 */
@@ -405,15 +442,26 @@ export async function cmdSetup(args: SetupArgs): Promise<Record<string, unknown>

  writeFileSync(configPath, stringify(merged, { indent: 2 }), "utf8");

+  // Print config path to stderr (stdout is reserved for JSON output)
+  console.error(`Config saved to ${configPath} ✓`);
+
  // Validate model connectivity
  const validation = await validateModel(args.baseUrl, args.apiKey, args.model);

+  // Check adapter availability
+  const agentName = _agentNameFromBinary(args.agent ?? "hermes");
+  const adapterWarnings = _checkAdapterAvailability(agentName);
+  for (const w of adapterWarnings) {
+    console.error(`⚠ ${w}`);
+  }
+
  return {
    configPath,
    provider: args.provider,
    model: args.model,
    defaultAgent: merged.defaultAgent,
    validation,
+    adapterWarnings,
  };
 }

@@ -6,7 +6,7 @@ import type {
  StepNodePayload,
  ThreadId,
 } from "@united-workforce/protocol";
-import { createUwfStore, findHistoryEntry, getThread, type UwfStore } from "../store.js";
+import { createUwfStore, getThread, type UwfStore } from "../store.js";

 type ChainState = {
  startHash: CasRef;
@@ -207,10 +207,6 @@ async function resolveHeadHash(storageRoot: string, threadId: ThreadId): Promise
  if (entry !== null) {
    return entry.head;
  }
-  const hist = findHistoryEntry(uwf.varStore, threadId);
-  if (hist !== null) {
-    return hist.head;
-  }
  fail(`thread not found: ${threadId}`);
 }

@@ -66,6 +66,7 @@ export async function cmdStepList(
      agent: item.payload.agent,
      timestamp: item.timestamp,
      durationMs: item.payload.completedAtMs - item.payload.startedAtMs,
+      usage: item.payload.usage ?? null,
    });
  }

@@ -114,8 +115,10 @@ export async function cmdStepFork(
  const newThreadId = generateUlid(Date.now()) as ThreadId;
  setThread(uwf.varStore, newThreadId, {
    head: stepHash,
+    status: "idle",
    suspendedRole: null,
    suspendMessage: null,
+    completedAt: null,
  });

  return {
@@ -38,17 +38,14 @@ import { createMarker, deleteMarker, isThreadRunning } from "../background/index
 import { createIncludeTag } from "../include.js";
 import { evaluate, isSuspendResult } from "../moderator/index.js";
 import {
-  addHistoryEntry,
+  completeThread,
  createUwfStore,
-  deleteThread,
-  findHistoryEntry,
  getThread,
-  loadAllHistory,
-  loadAllThreads,
+  loadActiveThreads,
+  loadHistoryThreads,
  loadWorkflowRegistry,
  resolveWorkflowHash,
  setThread,
-  type ThreadHistoryLine,
  type UwfStore,
 } from "../store.js";
 import { checkWorkflowFilenameConsistency, isCasRef, parseWorkflowPayload } from "../validate.js";
@@ -485,20 +482,35 @@ export async function cmdThreadShow(
 ): Promise<ThreadShowOutput> {
  const uwf = await createUwfStore(storageRoot);
  const entry = getThread(uwf.varStore, threadId);
-  if (entry !== null) {
+  if (entry === null) {
+    fail(`thread not found: ${threadId}`);
+  }
+
  const activeHead = entry.head;
  const workflow = resolveWorkflowFromHead(uwf, activeHead);
  if (workflow === null) {
    fail(`failed to resolve workflow from head: ${activeHead}`);
  }

-    const status = await resolveActiveThreadStatus(
-      storageRoot,
-      threadId,
-      uwf,
-      activeHead,
+  // Determine if this is a completed/cancelled thread
+  if (entry.status === "completed" || entry.status === "cancelled") {
+    const hint = null;
+    return {
      workflow,
-    );
+      thread: threadId,
+      head: activeHead,
+      status: entry.status,
+      currentRole: null,
+      suspendedRole: null,
+      suspendMessage: null,
+      done: true,
+      background: null,
+      hint,
+    };
+  }
+
+  // Active thread
+  const status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, activeHead, workflow);
  const currentRole = resolveCurrentRole(uwf, activeHead, workflow);
  const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, activeHead, workflow);

@@ -521,27 +533,6 @@ export async function cmdThreadShow(
  };
 }

-  const hist = findHistoryEntry(uwf.varStore, threadId);
-  if (hist !== null) {
-    const status: ThreadStatus = hist.reason === "cancelled" ? "cancelled" : "completed";
-
-    return {
-      workflow: hist.workflow,
-      thread: threadId,
-      head: hist.head,
-      status,
-      currentRole: null,
-      suspendedRole: null,
-      suspendMessage: null,
-      done: true,
-      background: null,
-      hint: null,
-    };
-  }
-
-  fail(`thread not found: ${threadId}`);
-}
-
 export type ThreadListItemWithStatus = ThreadListItem & {
  status: ThreadStatus;
  currentRole: string | null;
@@ -594,19 +585,20 @@ async function collectActiveThreads(
 }

 function collectCompletedThreads(
-  varStore: VarStore,
+  uwf: UwfStore,
  activeIds: Set<ThreadId>,
 ): ThreadListItemWithStatus[] {
  const items: ThreadListItemWithStatus[] = [];
-  const history = loadAllHistory(varStore);
+  const history = loadHistoryThreads(uwf.varStore);
  const seen = new Set<ThreadId>(); // Deduplication (issue #470)
-  for (const entry of history) {
-    if (!activeIds.has(entry.thread) && !seen.has(entry.thread)) {
-      seen.add(entry.thread);
-      const status = entry.reason === "cancelled" ? "cancelled" : "completed";
+  for (const [threadId, entry] of Object.entries(history)) {
+    if (!activeIds.has(threadId as ThreadId) && !seen.has(threadId as ThreadId)) {
+      seen.add(threadId as ThreadId);
+      const status = entry.status;
+      const workflow = resolveWorkflowFromHead(uwf, entry.head);
      items.push({
-        thread: entry.thread,
-        workflow: entry.workflow,
+        thread: threadId as ThreadId,
+        workflow: workflow ?? "",
        head: entry.head,
        status,
        currentRole: null,
@@ -659,7 +651,7 @@ export async function cmdThreadList(
  take: number | null,
 ): Promise<ThreadListItemWithStatus[]> {
  const uwf = await createUwfStore(storageRoot);
-  const index = loadAllThreads(uwf.varStore);
+  const index = loadActiveThreads(uwf.varStore);

  // Collect active threads
  let items = await collectActiveThreads(storageRoot, uwf, index);
@@ -671,7 +663,7 @@ export async function cmdThreadList(
    statusFilter.includes("cancelled");
  if (includeCompleted) {
    const activeIds = new Set(items.map((i) => i.thread));
-    const completedItems = collectCompletedThreads(uwf.varStore, activeIds);
+    const completedItems = collectCompletedThreads(uwf, activeIds);
    items = items.concat(completedItems);
  }

@@ -919,7 +911,7 @@ function resolveEvaluateArgs(
  chain: ChainState,
 ): { lastRole: string; lastOutput: EvaluateLastOutput } {
  if (chain.headIsStart) {
-    return { lastRole: START_ROLE, lastOutput: { [STATUS_KEY]: "_" } };
+    return { lastRole: START_ROLE, lastOutput: { [STATUS_KEY]: "new" } };
  }

  const lastStep = chain.stepsNewestFirst[0];
@@ -969,6 +961,12 @@ function resolveAgentConfig(
  agentOverride: string | null,
 ): AgentConfig {
  if (agentOverride !== null) {
+    // Try config alias first (e.g. "hermes" → config.agents.hermes),
+    // then fall back to raw command name (e.g. "uwf-hermes" or "/usr/bin/agent").
+    const fromAlias = config.agents[agentOverride as AgentAlias];
+    if (fromAlias !== undefined) {
+      return fromAlias;
+    }
    return parseAgentOverride(agentOverride);
  }

@@ -1006,6 +1004,12 @@ function spawnAgent(
    });
  } catch (e) {
    const err = e as NodeJS.ErrnoException & { stderr?: Buffer | string | null };
+    if (err.code === "ENOENT") {
+      failStep(
+        plog,
+        `"${agent.command}" not found in PATH. Install it or check your PATH config. Run: which ${agent.command}`,
+      );
+    }
    const stderr =
      err.stderr == null
        ? ""
@@ -1035,15 +1039,8 @@ function spawnAgent(
  return obj as unknown as AdapterOutput;
 }

-function archiveThread(uwf: UwfStore, threadId: ThreadId, workflow: CasRef, head: CasRef): void {
-  deleteThread(uwf.varStore, threadId);
-  addHistoryEntry(uwf.varStore, {
-    thread: threadId,
-    workflow,
-    head,
-    completedAt: Date.now(),
-    reason: "completed",
-  });
+function archiveThread(uwf: UwfStore, threadId: ThreadId, _workflow: CasRef, _head: CasRef): void {
+  completeThread(uwf.varStore, threadId, "completed");
 }

 export async function cmdThreadResume(
@@ -1067,17 +1064,24 @@ export async function cmdThreadResume(
  const chain = walkChain(uwf, headHash);
  const workflowHash = chain.start.workflow;

-  const status = await resolveActiveThreadStatus(
-    storageRoot,
-    threadId,
-    uwf,
-    headHash,
-    workflowHash,
-  );
-  if (status !== "suspended") {
-    fail(`thread is not suspended: ${threadId} (status: ${status})`);
+  // Check entry.status first for completed/cancelled (like in cmdThreadShow)
+  let status: ThreadStatus;
+  if (entry.status === "completed" || entry.status === "cancelled") {
+    status = entry.status;
+  } else {
+    status = await resolveActiveThreadStatus(storageRoot, threadId, uwf, headHash, workflowHash);
  }

+  if (status !== "suspended" && status !== "completed") {
+    fail(`thread cannot be resumed: ${threadId} (status: ${status})`);
+  }
+
+  const plog = createProcessLogger({
+    storageRoot,
+    context: { thread: threadId, workflow: workflowHash },
+  });
+
+  if (status === "suspended") {
    const suspendFields = resolveSuspendFieldsForShow(entry, status, uwf, headHash, workflowHash);
    if (suspendFields.suspendedRole === null) {
      fail(`thread is suspended but suspendedRole is missing: ${threadId}`);
@@ -1087,10 +1091,6 @@ export async function cmdThreadResume(
    }

    const resumePrompt = buildResumePrompt(suspendFields.suspendMessage, supplement);
-  const plog = createProcessLogger({
-    storageRoot,
-    context: { thread: threadId, workflow: workflowHash },
-  });

    plog.log(
      PL_THREAD_RESUME,
@@ -1104,6 +1104,43 @@ export async function cmdThreadResume(
    });
  }

+  // status === "completed"
+  const workflow = loadWorkflowPayload(uwf, workflowHash);
+  const startResult = evaluate(workflow.graph, START_ROLE, { [STATUS_KEY]: "resume" });
+  if (!startResult.ok) {
+    fail(`failed to evaluate $START: ${startResult.error.message}`);
+  }
+  if (isSuspendResult(startResult.value)) {
+    fail("workflow cannot start with $SUSPEND");
+  }
+  if (startResult.value.role === END_ROLE) {
+    fail("workflow cannot start with $END");
+  }
+
+  const startRole = startResult.value.role;
+  const completedResumePrompt = buildResumePrompt(startResult.value.prompt, supplement);
+
+  const updatedEntry = { ...entry, status: "idle" as const, completedAt: null };
+  setThread(uwf.varStore, threadId, updatedEntry);
+
+  plog.log(
+    PL_THREAD_RESUME,
+    `resume completed role=${startRole} supplement=${supplement !== null}`,
+    null,
+  );
+
+  return cmdThreadStepOnce(storageRoot, threadId, agentOverride, plog, {
+    role: startRole,
+    prompt: completedResumePrompt,
+  });
+}
+
+export function validateCount(count: number): void {
+  if (count < 1 || !Number.isInteger(count)) {
+    throw new Error(`--count must be a positive integer, got: ${count}`);
+  }
+}
+
 export async function cmdThreadExec(
  storageRoot: string,
  threadId: ThreadId,
@@ -1112,9 +1149,7 @@ export async function cmdThreadExec(
  background: boolean,
  backgroundWorker: boolean,
 ): Promise<StepOutput[]> {
-  if (count < 1 || !Number.isInteger(count)) {
-    fail(`--count must be a positive integer, got: ${count}`);
-  }
+  validateCount(count);

  // Check if thread is already running in background (unless we ARE the background worker)
  if (!backgroundWorker) {
@@ -1249,7 +1284,7 @@ function resolveResumeStepTarget(
 }

 async function resolveModeratorStepTarget(
-  storageRoot: string,
+  _storageRoot: string,
  threadId: ThreadId,
  entry: ThreadIndexEntry,
  headHash: CasRef,
@@ -1318,7 +1353,7 @@ async function resolveModeratorStepTarget(
 }

 async function finalizeAgentStep(
-  storageRoot: string,
+  _storageRoot: string,
  threadId: ThreadId,
  workflowHash: CasRef,
  workflow: WorkflowPayload,
@@ -1450,10 +1485,6 @@ async function resolveHeadHash(storageRoot: string, threadId: ThreadId): Promise
  if (entry !== null) {
    return entry.head;
  }
-  const hist = findHistoryEntry(uwf.varStore, threadId);
-  if (hist !== null) {
-    return hist.head;
-  }
  fail(`thread not found: ${threadId}`);
 }

@@ -1533,7 +1564,6 @@ export async function cmdThreadCancel(
  if (entry === null) {
    fail(`thread not active: ${threadId}`);
  }
-  const head = entry.head;

  // Check if thread is running in background and terminate it
  const runningMarker = await isThreadRunning(storageRoot, threadId);
@@ -1546,21 +1576,7 @@ export async function cmdThreadCancel(
    await deleteMarker(storageRoot, threadId);
  }

-  const workflow = resolveWorkflowFromHead(uwf, head);
-  if (workflow === null) {
-    fail(`failed to resolve workflow from head: ${head}`);
-  }
-
-  deleteThread(uwf.varStore, threadId);
-
-  const historyEntry: ThreadHistoryLine = {
-    thread: threadId,
-    workflow,
-    head,
-    completedAt: Date.now(),
-    reason: "cancelled",
-  };
-  addHistoryEntry(uwf.varStore, historyEntry);
+  completeThread(uwf.varStore, threadId, "cancelled");

  return { thread: threadId, cancelled: true };
 }
@@ -6,11 +6,11 @@ describe("Edge prompt template variable resolution", () => {
  test("returns error when rendered prompt is empty string", () => {
    const graph = {
      $START: {
-        _: { role: "classifier", prompt: "{{{userPrompt}}}", location: null },
+        new: { role: "classifier", prompt: "{{{userPrompt}}}", location: null },
      },
    };

-    const result = evaluate(graph, "$START", {});
+    const result = evaluate(graph, "$START", { $status: "new" });

    expect(result.ok).toBe(false);
    if (!result.ok) {
@@ -22,11 +22,11 @@ describe("Edge prompt template variable resolution", () => {
  test("returns error when rendered prompt is whitespace-only", () => {
    const graph = {
      $START: {
-        _: { role: "classifier", prompt: "  {{{userPrompt}}}  ", location: null },
+        new: { role: "classifier", prompt: "  {{{userPrompt}}}  ", location: null },
      },
    };

-    const result = evaluate(graph, "$START", {});
+    const result = evaluate(graph, "$START", { $status: "new" });

    expect(result.ok).toBe(false);
    if (!result.ok) {
@@ -38,11 +38,11 @@ describe("Edge prompt template variable resolution", () => {
  test("succeeds when all template variables resolve to non-empty values", () => {
    const graph = {
      $START: {
-        _: { role: "classifier", prompt: "{{{userPrompt}}}", location: null },
+        new: { role: "classifier", prompt: "{{{userPrompt}}}", location: null },
      },
    };

-    const result = evaluate(graph, "$START", { userPrompt: "Fix the bug" });
+    const result = evaluate(graph, "$START", { $status: "new", userPrompt: "Fix the bug" });

    expect(result.ok).toBe(true);
    if (result.ok) {
@@ -53,11 +53,11 @@ describe("Edge prompt template variable resolution", () => {
  test("succeeds with static (no-variable) prompt", () => {
    const graph = {
      $START: {
-        _: { role: "classifier", prompt: "Classify this input", location: null },
+        new: { role: "classifier", prompt: "Classify this input", location: null },
      },
    };

-    const result = evaluate(graph, "$START", {});
+    const result = evaluate(graph, "$START", { $status: "new" });

    expect(result.ok).toBe(true);
    if (result.ok) {
@@ -68,11 +68,11 @@ describe("Edge prompt template variable resolution", () => {
  test("succeeds when prompt has mix of static text and unresolved variables", () => {
    const graph = {
      $START: {
-        _: { role: "classifier", prompt: "Please handle: {{{userPrompt}}}", location: null },
+        new: { role: "classifier", prompt: "Please handle: {{{userPrompt}}}", location: null },
      },
    };

-    const result = evaluate(graph, "$START", {});
+    const result = evaluate(graph, "$START", { $status: "new" });

    expect(result.ok).toBe(true);
    if (result.ok) {
@@ -83,11 +83,11 @@ describe("Edge prompt template variable resolution", () => {
  test("returns error when ALL variables missing and no static text remains", () => {
    const graph = {
      $START: {
-        _: { role: "classifier", prompt: "{{{a}}}{{{b}}}", location: null },
+        new: { role: "classifier", prompt: "{{{a}}}{{{b}}}", location: null },
      },
    };

-    const result = evaluate(graph, "$START", {});
+    const result = evaluate(graph, "$START", { $status: "new" });

    expect(result.ok).toBe(false);
  });
@@ -6,9 +6,7 @@ import type { EvaluateResult, Result } from "./types.js";
 // Disable HTML escaping — prompts are plain text, not HTML.
 mustache.escape = (text: string) => text;

-const START_ROLE = "$START";
 const SUSPEND_ROLE = "$SUSPEND";
-const UNIT_STATUS = "_";

 type LastOutput = Record<string, unknown>;

@@ -19,12 +17,15 @@ export function evaluate(
  lastRole: string,
  lastOutput: LastOutput,
 ): Result<EvaluateResult, Error> {
-  const status =
-    lastRole === START_ROLE
-      ? UNIT_STATUS
-      : typeof lastOutput[STATUS_KEY] === "string"
-        ? (lastOutput[STATUS_KEY] as string)
-        : UNIT_STATUS;
+  let status: string;
+  if (typeof lastOutput[STATUS_KEY] === "string") {
+    status = lastOutput[STATUS_KEY] as string;
+  } else {
+    return {
+      ok: false,
+      error: new Error(`agent output for role "${lastRole}" is missing required "$status" string`),
+    };
+  }

  const roleTargets = graph[lastRole];
  if (roleTargets === undefined) {
@@ -6,13 +6,7 @@ import { join } from "node:path";

 import { bootstrap, type Hash, type Store, type VarStore } from "@ocas/core";
 import { createFsStore, createSqliteVarStore } from "@ocas/fs";
-import type {
-  CasRef,
-  ThreadId,
-  ThreadIndexEntry,
-  ThreadListItem,
-  ThreadsIndex,
-} from "@united-workforce/protocol";
+import type { CasRef, ThreadId, ThreadIndexEntry, ThreadsIndex } from "@united-workforce/protocol";
 import { parseThreadsIndex } from "@united-workforce/protocol";
 import { parse } from "yaml";

@@ -26,9 +20,6 @@ export const REGISTRY_VAR_PREFIX = "@uwf/registry/";
 /** Variable name prefix for active thread entries (`@uwf/thread/<thread-id>`). */
 export const THREAD_VAR_PREFIX = "@uwf/thread/";

-/** Variable name prefix for completed/cancelled thread history (`@uwf/history/<thread-id>`). */
-export const HISTORY_VAR_PREFIX = "@uwf/history/";
-
 /** A workflow entry discovered from the project-local .workflows/ directory. */
 export type ProjectWorkflowEntry = {
  /** Workflow name (from YAML `name` field, equals filename stem). */
@@ -156,11 +147,6 @@ export function getThreadsPath(storageRoot: string): string {
  return join(storageRoot, "threads.yaml");
 }

-export type ThreadHistoryLine = ThreadListItem & {
-  completedAt: number;
-  reason: "completed" | "cancelled" | null;
-};
-
 export type UwfStore = {
  storageRoot: string;
  store: Store;
@@ -179,6 +165,7 @@ export async function createUwfStore(storageRoot: string): Promise<UwfStore> {
  await migrateWorkflowRegistryIfNeeded(storageRoot, varStore);
  await migrateThreadsIndexIfNeeded(storageRoot, varStore);
  await migrateHistoryIfNeeded(storageRoot, varStore);
+  migrateHistoryVarsToThreadVars(varStore);
  return { storageRoot, store, schemas, varStore };
 }

@@ -299,8 +286,10 @@ function threadVarName(threadId: ThreadId): string {
 function entryFromVariable(v: { value: string; tags: Record<string, string> }): ThreadIndexEntry {
  return {
    head: v.value as CasRef,
+    status: (v.tags.status ?? "idle") as ThreadIndexEntry["status"],
    suspendedRole: v.tags.suspendedRole ?? null,
    suspendMessage: v.tags.suspendMessage ?? null,
+    completedAt: v.tags.completedAt !== undefined ? Number(v.tags.completedAt) : null,
  };
 }

@@ -331,21 +320,74 @@ export function setThread(varStore: VarStore, threadId: ThreadId, entry: ThreadI
  // Head CAS nodes may use different schemas (StartNode vs StepNode) — clear all variants first.
  varStore.remove(name);
  const tags: Record<string, string> = {};
+  if (entry.status !== "idle") {
+    tags.status = entry.status;
+  }
  if (entry.suspendedRole !== null) {
    tags.suspendedRole = entry.suspendedRole;
  }
  if (entry.suspendMessage !== null) {
    tags.suspendMessage = entry.suspendMessage;
  }
+  if (entry.completedAt !== null) {
+    tags.completedAt = String(entry.completedAt);
+  }
  varStore.set(name, entry.head, { tags });
 }

-/** Remove an active thread entry (on complete/cancel). */
-export function deleteThread(varStore: VarStore, threadId: ThreadId): void {
-  varStore.remove(threadVarName(threadId));
+/** Load only active threads (status not in completed/cancelled). */
+export function loadActiveThreads(varStore: VarStore): ThreadsIndex {
+  const all = loadAllThreads(varStore);
+  const active: ThreadsIndex = {};
+  for (const [threadId, entry] of Object.entries(all)) {
+    if (entry.status !== "completed" && entry.status !== "cancelled") {
+      active[threadId as ThreadId] = entry;
+    }
+  }
+  return active;
 }

-function parseHistoryJsonlLine(trimmed: string): ThreadHistoryLine | null {
+/** Load only completed/cancelled threads (history). */
+export function loadHistoryThreads(varStore: VarStore): ThreadsIndex {
+  const all = loadAllThreads(varStore);
+  const history: ThreadsIndex = {};
+  for (const [threadId, entry] of Object.entries(all)) {
+    if (entry.status === "completed" || entry.status === "cancelled") {
+      history[threadId as ThreadId] = entry;
+    }
+  }
+  return history;
+}
+
+/** Complete a thread by marking it completed or cancelled. */
+export function completeThread(
+  varStore: VarStore,
+  threadId: ThreadId,
+  reason: "completed" | "cancelled",
+): void {
+  const entry = getThread(varStore, threadId);
+  if (entry === null) {
+    return;
+  }
+  const completed = {
+    head: entry.head,
+    status: reason,
+    suspendedRole: null,
+    suspendMessage: null,
+    completedAt: Date.now(),
+  } as ThreadIndexEntry;
+  setThread(varStore, threadId, completed);
+}
+
+type LegacyHistoryEntry = {
+  thread: ThreadId;
+  workflow: CasRef;
+  head: CasRef;
+  completedAt: number;
+  reason: "completed" | "cancelled" | null;
+};
+
+function parseLegacyHistoryJsonlLine(trimmed: string): LegacyHistoryEntry | null {
  let raw: unknown;
  try {
    raw = JSON.parse(trimmed) as unknown;
@@ -379,7 +421,7 @@ function parseHistoryJsonlLine(trimmed: string): ThreadHistoryLine | null {
  return null;
 }

-/** One-time migration: `~/.uwf/history.jsonl` → `@uwf/history/*` variables. */
+/** One-time migration: `~/.uwf/history.jsonl` → `@uwf/thread/*` variables with status tags. */
 export async function migrateHistoryIfNeeded(
  storageRoot: string,
  varStore: VarStore,
@@ -395,47 +437,43 @@ export async function migrateHistoryIfNeeded(
    if (trimmed === "") {
      continue;
    }
-    const entry = parseHistoryJsonlLine(trimmed);
+    const entry = parseLegacyHistoryJsonlLine(trimmed);
    if (entry !== null) {
-      addHistoryEntry(varStore, entry);
+      const status = entry.reason === "cancelled" ? "cancelled" : "completed";
+      const threadEntry: ThreadIndexEntry = {
+        head: entry.head,
+        status: status as ThreadIndexEntry["status"],
+        suspendedRole: null,
+        suspendMessage: null,
+        completedAt: entry.completedAt,
+      };
+      setThread(varStore, entry.thread, threadEntry);
    }
  }

  await rename(path, `${path}.migrated`);
 }

-export function loadAllHistory(varStore: VarStore): ThreadHistoryLine[] {
-  const vars = varStore.list({ namePrefix: HISTORY_VAR_PREFIX });
-  return vars.map((v) => ({
-    thread: v.name.slice(HISTORY_VAR_PREFIX.length) as ThreadId,
-    workflow: v.tags.workflow ?? "",
-    head: v.value as CasRef,
-    completedAt: Number(v.tags.completedAt ?? "0"),
-    reason: v.tags.reason === "completed" || v.tags.reason === "cancelled" ? v.tags.reason : null,
-  }));
-}
+/** Migrate `@uwf/history/*` variables to `@uwf/thread/*` with status tags. */
+export function migrateHistoryVarsToThreadVars(varStore: VarStore): void {
+  const LEGACY_HISTORY_VAR_PREFIX = "@uwf/history/";
+  const vars = varStore.list({ namePrefix: LEGACY_HISTORY_VAR_PREFIX });

-export function findHistoryEntry(varStore: VarStore, threadId: ThreadId): ThreadHistoryLine | null {
-  const vars = varStore.list({ namePrefix: `${HISTORY_VAR_PREFIX}${threadId}` });
-  const v = vars.find((entry) => entry.name === `${HISTORY_VAR_PREFIX}${threadId}`);
-  if (v === undefined) {
-    return null;
-  }
-  return {
-    thread: threadId,
-    workflow: v.tags.workflow ?? "",
+  for (const v of vars) {
+    const threadId = v.name.slice(LEGACY_HISTORY_VAR_PREFIX.length) as ThreadId;
+    const reason = v.tags.reason;
+    const status = reason === "cancelled" ? "cancelled" : "completed";
+    const completedAt = Number(v.tags.completedAt ?? Date.now());
+
+    const threadEntry: ThreadIndexEntry = {
      head: v.value as CasRef,
-    completedAt: Number(v.tags.completedAt ?? "0"),
-    reason: v.tags.reason === "completed" || v.tags.reason === "cancelled" ? v.tags.reason : null,
+      status: status as ThreadIndexEntry["status"],
+      suspendedRole: null,
+      suspendMessage: null,
+      completedAt,
    };
-}

-export function addHistoryEntry(varStore: VarStore, entry: ThreadHistoryLine): void {
-  varStore.set(`${HISTORY_VAR_PREFIX}${entry.thread}`, entry.head, {
-    tags: {
-      workflow: entry.workflow,
-      completedAt: String(entry.completedAt),
-      reason: entry.reason ?? "completed",
-    },
-  });
+    setThread(varStore, threadId, threadEntry);
+    varStore.remove(v.name);
+  }
 }
@@ -24,26 +24,22 @@ function isOneOfSchema(fm: unknown): fm is SchemaObj & { oneOf: SchemaObj[] } {
  return Array.isArray(obj.oneOf);
 }

-/** Check if a frontmatter schema uses enum-based multi-exit ($status with multiple enum values). */
-function isEnumMultiExit(fm: unknown): boolean {
+/** Check if a frontmatter schema declares "$status" as const (flat schema form). */
+function hasStatusConst(fm: unknown): boolean {
  if (typeof fm !== "object" || fm === null) return false;
  const obj = fm as SchemaObj;
  const props = obj.properties as Record<string, SchemaObj> | undefined;
  if (!props?.$status) return false;
-  const statusDef = props.$status;
-  if (!Array.isArray(statusDef.enum)) return false;
-  // Filter out "_" (wildcard) — if remaining values > 1, it's multi-exit
-  const statuses = (statusDef.enum as string[]).filter((s) => s !== "_");
-  return statuses.length > 1;
+  return typeof props.$status.const === "string";
 }

-/** Extract status values from an enum-based $status field. */
-function getEnumStatuses(fm: SchemaObj): string[] {
+/** Extract status values from a const-based $status field. */
+function getConstStatuses(fm: SchemaObj): string[] {
  const props = fm.properties as Record<string, SchemaObj> | undefined;
  if (!props?.$status) return [];
  const statusDef = props.$status;
-  if (!Array.isArray(statusDef.enum)) return [];
-  return (statusDef.enum as string[]).filter((s) => s !== "_");
+  if (typeof statusDef.const === "string") return [statusDef.const];
+  return [];
 }

 /** Get property names from a schema object. */
@@ -101,9 +97,9 @@ function checkGraphStructure(payload: WorkflowPayload, errors: string[]): void {
  if (!graphNodes.has("$START")) {
    errors.push("$START must be defined in graph");
  } else {
-    const startKeys = Object.keys(payload.graph.$START);
-    if (startKeys.length !== 1 || startKeys[0] !== "_") {
-      errors.push('$START must have exactly one edge with status "_"');
+    const startKeys = new Set(Object.keys(payload.graph.$START));
+    if (!startKeys.has("new") || !startKeys.has("resume")) {
+      errors.push('$START must have edges with statuses "new" and "resume"');
    }
  }

@@ -194,18 +190,13 @@ function checkOneOfDiscriminant(
  }
 }

-/** Check status-edge consistency for a multi-exit role. */
-function checkMultiExitEdges(
+/** Check status-edge consistency for a user role. */
+function checkStatusEdges(
  roleName: string,
  graphKeys: Set<string>,
  statusSet: Set<string>,
  errors: string[],
 ): void {
-  if (graphKeys.has("_")) {
-    errors.push(`role "${roleName}" is multi-exit but graph uses "_"`);
-    return;
-  }
-
  const extraKeys = [...graphKeys].filter((k) => !statusSet.has(k));
  const missingKeys = [...statusSet].filter((k) => !graphKeys.has(k));
  if (extraKeys.length > 0) {
@@ -255,50 +246,23 @@ function checkRoleConsistency(payload: WorkflowPayload, errors: string[]): void
      const statuses = getOneOfStatuses(variants);

      checkOneOfDiscriminant(roleName, variants, statuses, errors);
-      checkMultiExitEdges(roleName, graphKeys, new Set(statuses), errors);
+      checkStatusEdges(roleName, graphKeys, new Set(statuses), errors);
      checkMultiExitMustache(roleName, graphEntry, variants, errors);
-    } else if (isEnumMultiExit(fm)) {
-      const statuses = getEnumStatuses(fm as SchemaObj);
-      checkMultiExitEdges(roleName, graphKeys, new Set(statuses), errors);
-      // For enum-based schemas, mustache vars come from the flat properties
-      checkSingleExitMustache(roleName, graphEntry, fm as SchemaObj, errors);
+    } else if (hasStatusConst(fm)) {
+      const statuses = getConstStatuses(fm as SchemaObj);
+      checkStatusEdges(roleName, graphKeys, new Set(statuses), errors);
+      // For const-based flat schemas, mustache vars come from the flat properties
+      checkFlatMustache(roleName, graphEntry, fm as SchemaObj, errors);
    } else {
-      checkSingleExitRole(roleName, graphKeys, graphEntry, fm as SchemaObj | null, errors);
-    }
-  }
-}
-
-/** Check single-exit role status and mustache. */
-function checkSingleExitRole(
-  roleName: string,
-  graphKeys: Set<string>,
-  graphEntry: Record<string, { role: string; prompt: string }>,
-  fm: SchemaObj | null,
-  errors: string[],
-): void {
-  if (graphKeys.size > 1 || (graphKeys.size === 1 && !graphKeys.has("_"))) {
-    if (!graphKeys.has("_")) {
-      errors.push(`role "${roleName}" is single-exit but graph has no "_" key`);
-    } else {
-      errors.push(`role "${roleName}" is single-exit but has status keys other than "_"`);
-    }
-  }
-
-  const singleTarget = graphEntry._;
-  if (!singleTarget) return;
-
-  const vars = extractMustacheVars(singleTarget.prompt);
-  const propNames = fm ? getPropertyNames(fm) : new Set<string>();
-  for (const v of vars) {
-    if (v === "$status") continue;
-    if (!propNames.has(v)) {
-      errors.push(`prompt variable "${v}" not found in role "${roleName}" frontmatter`);
+      errors.push(
+        `role "${roleName}" must define "$status" as const (or oneOf with const) in frontmatter`,
+      );
    }
  }
 }

 /** Check mustache vars in all edge prompts against flat schema properties. */
-function checkSingleExitMustache(
+function checkFlatMustache(
  roleName: string,
  graphEntry: Record<string, { role: string; prompt: string }>,
  fm: SchemaObj,
@@ -57,9 +57,18 @@ function isGraph(value: unknown): boolean {
  if (!isRecord(value)) {
    return false;
  }
-  return Object.values(value).every(
-    (statusMap) => isRecord(statusMap) && Object.values(statusMap).every((t) => isTarget(t)),
-  );
+  return Object.values(value).every((statusMap) => {
+    if (!isRecord(statusMap)) {
+      return false;
+    }
+    return Object.entries(statusMap).every(([status, target]) => {
+      // "_" is no longer a valid status key anywhere — $START uses "new"/"resume".
+      if (status === "_") {
+        return false;
+      }
+      return isTarget(target);
+    });
+  });
 }

 /**
@@ -90,12 +99,13 @@ export function checkWorkflowFilenameConsistency(
 ): string | null {
  const expected = workflowNameFromPath(filePath);
  if (payload.name !== expected) {
-    return `workflow name mismatch: file "${basename(filePath)}" implies name "${expected}" but YAML declares name "${payload.name}"`;
+    return `workflow name mismatch: file "${basename(filePath)}" implies name "${expected}" but YAML declares name "${payload.name}". Either rename the file to "${payload.name}.yaml" or change the YAML \`name\` field to "${expected}"`;
  }
  return null;
 }

 /** Validate YAML-parsed workflow document shape (outputSchema may be inline JSON Schema). */
+// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: validation function with many field checks
 export function parseWorkflowPayload(raw: unknown): WorkflowPayload | null {
  if (!isRecord(raw)) {
    return null;
@@ -1,6 +1,6 @@
 {
  "name": "@united-workforce/dashboard",
-  "version": "0.5.0-alpha.4",
+  "version": "0.1.0",
  "private": true,
  "type": "module",
  "scripts": {
@@ -0,0 +1,9 @@
+# @united-workforce/eval
+
+## 0.1.2
+
+### Patch Changes
+
+- 850a3b2: fix: resolve --agent override via config alias before raw command
+
+  `resolveAgentConfig()` now checks `config.agents[alias]` first before falling back to `parseAgentOverride()`. Eval CLI default `--agent` changed from `"hermes"` to `"uwf-hermes"`.
@@ -0,0 +1,219 @@
+import type { StepEntry } from "@united-workforce/protocol";
+import { beforeEach, describe, expect, test, vi } from "vitest";
+
+import {
+  runFrontmatterJudge,
+  runHallucinationJudge,
+  runTokenStatsJudge,
+  runUpstreamJudge,
+} from "../src/judge/builtin/index.js";
+
+// Mock the shared read-steps helper so the judges never shell out to `uwf`.
+vi.mock("../src/judge/builtin/read-steps.js", () => ({
+  readThreadSteps: vi.fn(),
+}));
+
+import { readThreadSteps } from "../src/judge/builtin/read-steps.js";
+
+const mockedReadSteps = vi.mocked(readThreadSteps);
+
+function makeStep(overrides: Partial<StepEntry>): StepEntry {
+  return {
+    hash: "HASH000000000",
+    role: "worker",
+    output: "---\n$status: done\n---\n\nbody",
+    detail: "DETAIL0000000",
+    agent: "hermes",
+    timestamp: 0,
+    durationMs: 0,
+    usage: null,
+    ...overrides,
+  };
+}
+
+beforeEach(() => {
+  mockedReadSteps.mockReset();
+});
+
+describe("frontmatter-compliance judge", () => {
+  test("all steps have valid frontmatter → score 1.0", async () => {
+    mockedReadSteps.mockReturnValue([
+      makeStep({ role: "a", output: "---\n$status: done\n---\n\nwork" }),
+      makeStep({ role: "b", output: "---\n$status: needs_input\n---\nmore" }),
+    ]);
+
+    const result = await runFrontmatterJudge("T1");
+    const data = result.data as { stepsTotal: number; stepsValid: number; invalidSteps: unknown[] };
+
+    expect(result.score).toBe(1.0);
+    expect(data.stepsTotal).toBe(2);
+    expect(data.stepsValid).toBe(2);
+    expect(data.invalidSteps).toHaveLength(0);
+  });
+
+  test("some steps missing $status → partial score", async () => {
+    mockedReadSteps.mockReturnValue([
+      makeStep({ role: "a", output: "---\n$status: done\n---\nok" }),
+      makeStep({ role: "b", output: "---\nfoo: bar\n---\nmissing status" }),
+      makeStep({ role: "c", output: "no frontmatter at all" }),
+    ]);
+
+    const result = await runFrontmatterJudge("T2");
+    const data = result.data as {
+      stepsTotal: number;
+      stepsValid: number;
+      invalidSteps: Array<{ stepIndex: number; role: string; errors: string[] }>;
+    };
+
+    expect(result.score).toBeCloseTo(1 / 3, 10);
+    expect(data.stepsTotal).toBe(3);
+    expect(data.stepsValid).toBe(1);
+    expect(data.invalidSteps).toHaveLength(2);
+    expect(data.invalidSteps[0]).toMatchObject({ stepIndex: 1, role: "b" });
+    expect(data.invalidSteps[1]).toMatchObject({ stepIndex: 2, role: "c" });
+  });
+
+  test("no steps → score 0 (0/0 edge case)", async () => {
+    mockedReadSteps.mockReturnValue([]);
+
+    const result = await runFrontmatterJudge("T3");
+    const data = result.data as { stepsTotal: number; stepsValid: number; invalidSteps: unknown[] };
+
+    expect(result.score).toBe(0);
+    expect(data.stepsTotal).toBe(0);
+    expect(data.stepsValid).toBe(0);
+    expect(data.invalidSteps).toHaveLength(0);
+  });
+
+  test("empty-string $status counts as invalid", async () => {
+    mockedReadSteps.mockReturnValue([makeStep({ role: "a", output: '---\n$status: ""\n---\nx' })]);
+
+    const result = await runFrontmatterJudge("T4");
+    expect(result.score).toBe(0);
+  });
+
+  test("parsed object output with $status → score 1.0", async () => {
+    mockedReadSteps.mockReturnValue([
+      makeStep({ role: "a", output: { $status: "done", summary: "fixed" } as unknown as string }),
+      makeStep({ role: "b", output: { $status: "reviewed" } as unknown as string }),
+    ]);
+
+    const result = await runFrontmatterJudge("T5");
+    const data = result.data as { stepsTotal: number; stepsValid: number; invalidSteps: unknown[] };
+
+    expect(result.score).toBe(1.0);
+    expect(data.stepsTotal).toBe(2);
+    expect(data.stepsValid).toBe(2);
+  });
+
+  test("parsed object output missing $status → score 0", async () => {
+    mockedReadSteps.mockReturnValue([
+      makeStep({ role: "a", output: { summary: "no status field" } as unknown as string }),
+    ]);
+
+    const result = await runFrontmatterJudge("T6");
+    expect(result.score).toBe(0);
+  });
+});
+
+describe("token-stats judge", () => {
+  test("steps with usage → sums correctly", async () => {
+    mockedReadSteps.mockReturnValue([
+      makeStep({
+        role: "a",
+        usage: { turns: 2, inputTokens: 100, outputTokens: 50, duration: 1.5 },
+      }),
+      makeStep({
+        role: "b",
+        usage: { turns: 3, inputTokens: 200, outputTokens: 75, duration: 2.0 },
+      }),
+    ]);
+
+    const result = await runTokenStatsJudge("T1");
+    const data = result.data as {
+      totalInput: number;
+      totalOutput: number;
+      totalTurns: number;
+      perStep: Array<{ role: string; inputTokens: number; outputTokens: number; turns: number }>;
+    };
+
+    expect(result.score).toBe(1.0);
+    expect(data.totalInput).toBe(300);
+    expect(data.totalOutput).toBe(125);
+    expect(data.totalTurns).toBe(5);
+    expect(data.perStep).toHaveLength(2);
+    expect(data.perStep[0]).toEqual({
+      role: "a",
+      inputTokens: 100,
+      outputTokens: 50,
+      turns: 2,
+      duration: 1.5,
+    });
+  });
+
+  test("steps with null usage → zeros", async () => {
+    mockedReadSteps.mockReturnValue([
+      makeStep({ role: "a", usage: null }),
+      makeStep({ role: "b", usage: null }),
+    ]);
+
+    const result = await runTokenStatsJudge("T2");
+    const data = result.data as {
+      totalInput: number;
+      totalOutput: number;
+      totalTurns: number;
+      perStep: Array<{
+        inputTokens: number;
+        outputTokens: number;
+        turns: number;
+        duration: number;
+      }>;
+    };
+
+    expect(result.score).toBe(1.0);
+    expect(data.totalInput).toBe(0);
+    expect(data.totalOutput).toBe(0);
+    expect(data.totalTurns).toBe(0);
+    expect(data.perStep[0]).toEqual({
+      role: "a",
+      inputTokens: 0,
+      outputTokens: 0,
+      turns: 0,
+      duration: 0,
+    });
+  });
+
+  test("empty steps → all zeros, score 1.0", async () => {
+    mockedReadSteps.mockReturnValue([]);
+
+    const result = await runTokenStatsJudge("T3");
+    const data = result.data as {
+      totalInput: number;
+      totalOutput: number;
+      totalTurns: number;
+      perStep: unknown[];
+    };
+
+    expect(result.score).toBe(1.0);
+    expect(data.totalInput).toBe(0);
+    expect(data.totalOutput).toBe(0);
+    expect(data.totalTurns).toBe(0);
+    expect(data.perStep).toHaveLength(0);
+  });
+});
+
+describe("LLM-as-judge stubs", () => {
+  test("upstream-consumption returns a stub", async () => {
+    const result = await runUpstreamJudge("T1");
+    expect(result.score).toBe(0);
+    expect(result.data).toEqual({ perStep: [] });
+    expect(result.schema.title).toBe("@uwf/eval-judge-upstream");
+  });
+
+  test("hallucination returns a stub", async () => {
+    const result = await runHallucinationJudge("T1");
+    expect(result.score).toBe(0);
+    expect(result.data).toEqual({ perStep: [] });
+    expect(result.schema.title).toBe("@uwf/eval-judge-hallucination");
+  });
+});
--- a/Show More
+++ b/Show More