improve: committer — check git status before staging (from retrospect PR #578 )

Developer already commits changes, so committer's git add -A is redundant. Now checks git status first and skips to push if tree is clean.
feat: retrospect-workflow — add Phase 0 validation
2026-05-30 15:45:17 +08:00 · 2026-05-30 15:32:33 +08:00 · 2026-05-30 15:28:24 +08:00 · 2026-05-30 14:24:33 +08:00 · 2026-05-30 14:23:37 +08:00 · 2026-05-25 22:59:38 +08:00
262 changed files with 23711 additions and 1897 deletions
@@ -1,27 +1,3 @@
---
-description: Ban dynamic import() in production code — use static imports instead
-globs: packages/*/src/**/*.ts
-alwaysApply: true
---
+# No Dynamic Import

-# No Dynamic Import in Production Code
-
-## Rule
-
-Do NOT use `await import()` or dynamic `import()` expressions in production source code.
-Always use static top-level `import` statements.
-
-## Exception (must include a comment explaining why)
-
-1. **Bundle loader** — loads user-authored workflow bundles whose paths are only known at runtime
-
-When suppressing, add a comment directly above:
-
-```ts
-// Dynamic import required: user bundle path resolved at runtime
-const mod = await import(bundlePath);
-```
-
-## Test Files
-
-Test files (`__tests__/**`) are exempt.
+See [docs/no-dynamic-import.md](../../docs/no-dynamic-import.md) for full rules.
@@ -0,0 +1,3 @@
+# Sync Readme
+
+See [docs/sync-readme.md](../../docs/sync-readme.md) for full rules.
@@ -0,0 +1,28 @@
+name: CI
+
+on:
+  push:
+    branches: ['*']
+  pull_request:
+    branches: [main]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Bun
+        uses: oven-sh/setup-bun@v2
+
+      - name: Install dependencies
+        run: bun install
+
+      - name: Lint
+        run: bun run lint
+
+      - name: Type check
+        run: bun run typecheck
+
+      - name: Test
+        run: bun test
@@ -0,0 +1,31 @@
+---
+name: Bug Report
+about: Report a bug or unexpected behavior
+labels: bug
+---
+
+## Describe the bug
+
+A clear description of what the bug is.
+
+## To reproduce
+
+Steps or commands to reproduce:
+
+```bash
+uwf ...
+```
+
+## Expected behavior
+
+What you expected to happen.
+
+## Actual behavior
+
+What actually happened. Include error messages or logs.
+
+## Environment
+
+- OS: 
+- Bun version: 
+- uwf version (`uwf --version`): 
@@ -0,0 +1,17 @@
+---
+name: Feature Request
+about: Suggest a new feature or improvement
+labels: enhancement
+---
+
+## What
+
+Describe the feature or improvement.
+
+## Why
+
+Why is this needed? What problem does it solve?
+
+## Proposed solution
+
+How should it work? Include API sketches, CLI examples, or workflow YAML snippets if applicable.
@@ -0,0 +1,15 @@
+## What
+
+What this PR does.
+
+## Why
+
+Why the change is needed.
+
+## Changes
+
+- `path/to/file` — what changed and why
+
+## Ref
+
+Fixes #
@@ -0,0 +1,28 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  check:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: oven-sh/setup-bun@v2
+        with:
+          bun-version: latest
+
+      - run: bun install --frozen-lockfile
+
+      - name: Build
+        run: bun run build
+
+      - name: Lint
+        run: bunx biome check .
+
+      - name: Test
+        run: bun run test:ci
@@ -11,3 +11,5 @@ solve-issue-entry.ts
 packages/workflow-template-develop/develop.esm.js
 .DS_Store
 *.py
+.claude
+tmp
@@ -0,0 +1,83 @@
+# Test Spec: uwf setup model connectivity validation (#335)
+
+## Context
+
+File: `packages/cli-workflow/src/commands/setup.ts`
+Test file: `packages/cli-workflow/src/__tests__/setup-validate.test.ts`
+
+After `cmdSetup` writes config, it should send a test chat completion request to verify the configured model is reachable. If validation fails, warn the user (don't abort — config is already saved).
+
+## Implementation Notes
+
+- Add a `validateModel(baseUrl, apiKey, model)` function that sends a minimal chat completion request (`POST /chat/completions` with `messages: [{role:"user",content:"hi"}]`, `max_tokens: 1`)
+- Returns `Result<void, string>` — ok if 2xx response, error with reason string otherwise
+- Use `AbortSignal.timeout(15_000)` for the request
+- Both `cmdSetup` and `cmdSetupInteractive` should call it after saving config
+- `cmdSetup` returns validation result in its return object: `{ ...existing, validation: { ok: true } | { ok: false, error: string } }`
+- `cmdSetupInteractive` prints a warning to console if validation fails, success message if it passes
+- Use the project logger (`createLogger`) — no raw `console.log` except in interactive CLI output (per CLAUDE.md)
+
+## Test Cases (vitest)
+
+### 1. `validateModel` — success path
+- Mock `fetch` to return `{ status: 200, ok: true, json: () => ({}) }`
+- Call `validateModel(baseUrl, apiKey, model)`
+- Assert returns `{ ok: true, value: undefined }`
+- Assert fetch was called with correct URL (`${baseUrl}/chat/completions`), correct headers (`Authorization: Bearer ${apiKey}`), correct body (model, messages, max_tokens: 1)
+
+### 2. `validateModel` — HTTP error (401 unauthorized)
+- Mock `fetch` to return `{ status: 401, ok: false, statusText: "Unauthorized" }`
+- Call `validateModel(baseUrl, apiKey, model)`
+- Assert returns `{ ok: false, error: <string containing "401"> }`
+
+### 3. `validateModel` — HTTP error (404 model not found)
+- Mock `fetch` to return `{ status: 404, ok: false, statusText: "Not Found" }`
+- Assert returns `{ ok: false, error: <string containing "404"> }`
+
+### 4. `validateModel` — network timeout
+- Mock `fetch` to throw `DOMException` with name `AbortError`
+- Assert returns `{ ok: false, error: <string containing "timeout" or "unreachable"> }`
+
+### 5. `validateModel` — network error (DNS failure, connection refused)
+- Mock `fetch` to throw `TypeError("fetch failed")`
+- Assert returns `{ ok: false, error: <string mentioning connectivity> }`
+
+### 6. `cmdSetup` — includes validation result on success
+- Mock global `fetch` for `/chat/completions` to succeed
+- Call `cmdSetup({ provider, baseUrl, apiKey, model, storageRoot })`
+- Assert returned object has `validation: { ok: true, value: undefined }`
+- Assert config files are still written (existing behavior preserved)
+
+### 7. `cmdSetup` — includes validation result on failure (config still saved)
+- Mock global `fetch` for `/chat/completions` to return 401
+- Call `cmdSetup({ ... })`
+- Assert returned object has `validation: { ok: false, error: ... }`
+- Assert `config.yaml` and `.env` are still written (validation failure doesn't prevent saving)
+
+### 8. `cmdSetupInteractive` — prints success message on validation pass
+- Mock `fetch` for both `/models` and `/chat/completions` to succeed
+- Mock stdin to provide valid selections
+- Capture console output
+- Assert output contains a success message like "Model verified" or "✓"
+
+### 9. `cmdSetupInteractive` — prints warning on validation failure
+- Mock `fetch`: `/models` succeeds, `/chat/completions` returns 401
+- Mock stdin for valid selections
+- Capture console output
+- Assert output contains a warning about model not being reachable and suggests trying a different model
+
+### 10. `validateModel` — request body correctness
+- Mock `fetch` to capture the request body
+- Call `validateModel(baseUrl, apiKey, "test-model")`
+- Assert body is `{ model: "test-model", messages: [{role: "user", content: "hi"}], max_tokens: 1 }`
+
+## Export Requirements
+
+- `validateModel` must be exported (for direct unit testing)
+- Signature: `async function validateModel(baseUrl: string, apiKey: string, model: string): Promise<Result<void, string>>`
+- `Result` type: `{ ok: true; value: T } | { ok: false; error: E }` (project convention)
+
+## Files to Create/Modify
+
+- **New**: `packages/cli-workflow/src/__tests__/setup-validate.test.ts` — all test cases above
+- **Modify**: `packages/cli-workflow/src/commands/setup.ts` — add `validateModel`, integrate into `cmdSetup` and `cmdSetupInteractive`
@@ -0,0 +1,220 @@
+name: "retrospect-workflow"
+description: "Post-execution retrospective: analyze a completed thread, find inefficiencies, and improve the workflow definition."
+roles:
+  analyst:
+    description: "Scans thread execution for anomalies and produces a findings report"
+    goal: "You are a workflow execution analyst. You review completed thread data to find inefficiencies, wasted effort, and procedure gaps."
+    capabilities:
+      - data-analysis
+    procedure: |
+      You receive a completed thread ID in your task prompt.
+
+      Phase 0 — Validation (must pass before any analysis):
+      1. Run `uwf step list <thread-id>` to get thread metadata including the workflow hash
+      2. Run `uwf workflow show <workflow-hash>` to get the workflow name
+      3. Verify the workflow exists locally: check `.workflows/<name>.yaml` in the current repo
+         - If NOT found: output $status=wrong_project with the workflow name. Do NOT proceed.
+      4. Compare the thread's workflow hash against the current registered version:
+         - Run `uwf workflow show <name>` to get the current hash
+         - If hashes differ: the thread ran on an older version. Note this — you will need to diff versions after analysis.
+
+      Phase 1 — Overview scan:
+      5. From the step list, compute a health signal for each step:
+         - Duration: flag if >2x the median of other steps
+         - Output tokens: flag if >2x the median
+         - Status flow: flag non-happy-path transitions (rejected, fix_code, fix_spec, hook_failed)
+         - Step count: flag if the same role appears more than expected (indicates loops)
+      6. If no anomalies found AND versions match: output $status=clean
+      7. If no anomalies found BUT versions differ:
+         - Diff the two workflow versions to check if any procedure changes are relevant
+         - If the current version already addresses potential concerns: output $status=clean with a note
+         - Otherwise: proceed to Phase 2
+
+      Phase 2 — Targeted deep-dive (only for flagged steps):
+      8. For each flagged step, run `uwf step show <hash>` to get the detail with turns
+      9. Analyze the turn sequence for:
+         - Repeated tool calls with the same or similar input (blind retries)
+         - Tool errors followed by no strategy change (same approach retried)
+         - Unnecessary exploration (reading files or running commands unrelated to the task)
+         - Hallucinated commands or flags (commands that don't exist or wrong syntax)
+         - Excessive turns before reaching the goal
+      10. For each finding, record:
+          - Which role and step hash
+          - What happened (specific turn indices and commands)
+          - Root cause hypothesis (procedure gap, missing pitfall, unclear instruction)
+          - Suggested fix (what to add/change in the procedure)
+      11. If versions differ: compare findings against the version diff.
+          Mark any finding that is already fixed in the current version as "resolved_in_current".
+          Only report findings that are NOT yet addressed.
+
+      Output a structured findings report. Set $status=clean if nothing actionable, $status=findings if unresolved issues exist, or $status=wrong_project if the workflow doesn't belong here.
+    output: "A findings report with per-issue root cause and suggested procedure fixes. Set $status to clean or findings (with report hash)."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "clean" }
+            summary: { type: string }
+          required: [$status, summary]
+        - properties:
+            $status: { const: "findings" }
+            report: { type: string }
+            targetWorkflow: { type: string }
+          required: [$status, report, targetWorkflow]
+        - properties:
+            $status: { const: "wrong_project" }
+            workflowName: { type: string }
+          required: [$status, workflowName]
+  proposer:
+    description: "Translates findings into concrete workflow edits"
+    goal: "You are a workflow improvement proposer. You read the analyst's findings and produce specific, minimal edits to the workflow YAML."
+    capabilities:
+      - planning
+    procedure: |
+      1. Read the analyst's findings report from your task prompt
+      2. Locate the target workflow YAML:
+         - Workflow definitions live in the WORKFLOW ENGINE repo (where `uwf` is developed), NOT in the repo that was analyzed.
+         - Find it via: `uwf workflow show <targetWorkflow> --format yaml` to read the current definition
+         - The physical file is `.workflows/<targetWorkflow>.yaml` in the workflow engine repo
+         - Use `git rev-parse --show-toplevel` in the current directory to find the workflow engine repo root
+      3. Read the current workflow YAML to understand existing procedures
+      4. For each finding, draft a minimal edit:
+         - Prefer adding a pitfall note or clarifying instruction over restructuring
+         - If a procedure step is ambiguous, make it explicit
+         - If a tool usage pattern is wrong, add a "Do NOT" or "IMPORTANT" note
+         - Keep edits surgical — don't rewrite procedures that work fine
+      5. Check if existing tests need updating (search for test files referencing the workflow)
+      6. Produce a change plan as CAS text node via `uwf cas put-text "<plan>"`
+
+      The plan should list each edit with:
+      - File path
+      - What to change (old text → new text, or addition)
+      - Why (linked to which finding)
+      - Any test updates needed
+    output: "A change plan stored in CAS. Set $status to ready (with plan hash and repoPath) or no_action (if findings don't warrant changes)."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "ready" }
+            plan: { type: string }
+            repoPath: { type: string }
+          required: [$status, plan, repoPath]
+        - properties:
+            $status: { const: "no_action" }
+            reason: { type: string }
+          required: [$status, reason]
+  developer:
+    description: "Applies the proposed workflow edits"
+    goal: "You are a developer agent. You apply workflow YAML edits and update related tests."
+    capabilities:
+      - coding
+    procedure: |
+      IMPORTANT: Always work in a git worktree, NEVER modify the main working directory directly.
+      The workflow definitions live in THIS repo (the workflow engine), not the repo that was analyzed.
+
+      Before starting any work, set up an isolated worktree:
+      1. Use `git rev-parse --show-toplevel` to find the repo root (do NOT use repoPath from proposer — that's the analyzed repo)
+      2. `git fetch origin` to get latest refs
+      3. `git worktree add .worktrees/retrospect/<short-slug> -b retrospect/<short-slug> origin/main`
+      4. `cd .worktrees/retrospect/<short-slug> && bun install`
+      5. ALL subsequent work must happen inside the worktree directory.
+
+      Then apply changes:
+      6. Read the change plan from CAS: `uwf cas get <plan hash>`
+      7. Apply each edit from the plan to the workflow YAML
+      8. Update or add tests as specified in the plan
+      9. Run `bun run build` and `bun test` to verify
+      10. Run `bun run check` for lint
+      11. Commit with message: `improve: <workflow-name> — <brief summary>`
+    output: "List all files changed and provide a summary. Set $status to done (with branch/worktree), or failed (with reason)."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "done" }
+            branch: { type: string }
+            worktree: { type: string }
+          required: [$status, branch, worktree]
+        - properties:
+            $status: { const: "failed" }
+            reason: { type: string }
+          required: [$status, reason]
+  reviewer:
+    description: "Reviews the workflow edits for correctness"
+    goal: "You are a reviewer. You verify that workflow edits are minimal, correct, and actually address the findings."
+    capabilities:
+      - code-review
+    procedure: |
+      The worktree path is provided in your task prompt. cd into it first.
+
+      Review criteria:
+      1. Each edit must trace back to a specific finding — no drive-by changes
+      2. Edits should be minimal — don't rewrite working procedures
+      3. New pitfall notes or instructions must be clear and actionable
+      4. Tests must be updated if assertions changed
+      5. `bun run build` and `bun test` must pass
+      6. `bunx biome check` must pass
+
+      IMPORTANT: `tea pr create` must run from the MAIN repo directory (not a worktree), because tea cannot detect the repo from worktree `.git` files.
+    output: "Explain your decision. Set $status to approved (with branch/worktree) or rejected (with comments)."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "approved" }
+            branch: { type: string }
+            worktree: { type: string }
+          required: [$status, branch, worktree]
+        - properties:
+            $status: { const: "rejected" }
+            comments: { type: string }
+            worktree: { type: string }
+          required: [$status, comments, worktree]
+  committer:
+    description: "Commits and creates PR"
+    goal: "You are a committer agent. You create a clean commit and push a PR."
+    capabilities: []
+    procedure: |
+      The worktree path, branch name, and repo info are provided in your task prompt.
+      cd into the worktree first.
+
+      Note: You inherit the developer's worktree and branch. Do NOT create a new branch.
+      1. Stage all changes: `git add -A`
+      2. Commit with a descriptive message: `git commit -m "improve: <workflow> — <summary>"`
+      3. Push the branch: `git push -u origin <branch-name>`
+         - If push hook fails: capture the error log in your output, mark hook_failed
+      4. On push success: create a PR via `tea pr create --title "..." --description "..."`
+         - IMPORTANT: `tea pr create` must run from the MAIN repo directory (not a worktree), because tea cannot detect the repo from worktree `.git` files. cd to the repo root first.
+         - Do NOT pass `--repo` — let tea auto-detect from the main repo's git remote.
+         - PR description must include: What / Why / Findings / Changes sections
+         - On tea failure: capture stderr/stdout, include PR details for manual creation, mark hook_failed
+      5. After PR creation, clean up the worktree:
+         - cd to the repo root (parent of .worktrees)
+         - `git worktree remove <worktree-path>`
+    output: "Include PR URL on success or error log on failure. Set $status to committed (with prUrl) or hook_failed (with error)."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "committed" }
+            prUrl: { type: string }
+          required: [$status, prUrl]
+        - properties:
+            $status: { const: "hook_failed" }
+            error: { type: string }
+          required: [$status, error]
+graph:
+  $START:
+    _: { role: "analyst", prompt: "Analyze completed thread {{{threadId}}} for execution anomalies." }
+  analyst:
+    clean: { role: "$END", prompt: "No issues found. Thread executed cleanly." }
+    findings: { role: "proposer", prompt: "Findings report: {{{report}}}. Target workflow: {{{targetWorkflow}}}. Propose minimal edits." }
+    wrong_project: { role: "$END", prompt: "Thread uses workflow '{{{workflowName}}}' which does not exist in this project. Run retrospect from the correct repo." }
+  proposer:
+    no_action: { role: "$END", prompt: "No actionable changes needed: {{{reason}}}." }
+    ready: { role: "developer", prompt: "Apply the change plan (CAS hash: {{{plan}}}) to the workflow definitions in this repo." }
+  developer:
+    done: { role: "reviewer", prompt: "Review workflow edits on branch {{{branch}}} at {{{worktree}}}." }
+    failed: { role: "$END", prompt: "Developer failed: {{{reason}}}. Ending workflow." }
+  reviewer:
+    rejected: { role: "developer", prompt: "Reviewer rejected: {{{comments}}}. Fix the issues in {{{worktree}}}." }
+    approved: { role: "committer", prompt: "Approved. Commit and push branch {{{branch}}} from {{{worktree}}}." }
+  committer:
+    hook_failed: { role: "developer", prompt: "Push hook failed: {{{error}}}. Fix and re-submit." }
+    committed: { role: "$END", prompt: "PR created: {{{prUrl}}}. Workflow improved." }
@@ -0,0 +1,199 @@
+name: "solve-issue"
+description: "TDD-driven issue resolution for small, focused changes. Loop protection relies on engine maxRounds."
+roles:
+  planner:
+    description: "Analyzes issue and outputs a TDD test spec"
+    goal: "You are a planning agent. You analyze Gitea issues and produce a TDD test specification that downstream roles will implement and verify."
+    capabilities:
+      - issue-analysis
+      - planning
+    procedure: |
+      On first run (no previous steps):
+      1. Read the issue and all comments from Gitea using `tea issues <number> -r <owner/repo>`
+      2. Look for project conventions files (CLAUDE.md, CONTRIBUTING.md, .cursor/rules/) in the repo
+      3. Assess whether the issue has enough information to produce a test spec
+      4. If insufficient info: comment on the issue via `echo "..." | tea comment <number> -r <owner/repo>` (skip if you already commented), then output $status=insufficient_info
+      5. If sufficient: produce a detailed TDD test spec in markdown covering all scenarios
+
+      On subsequent runs (bounced back by tester with fix_spec):
+      1. Read the tester's output from the previous step to understand what's wrong with the spec
+      2. Revise the test spec accordingly
+
+      After producing the test spec:
+      1. Store it via `uwf cas put-text "<markdown content>"` and capture the returned hash
+      2. Put the hash in frontmatter.plan (required when $status=ready)
+      3. Set repoPath to the absolute path of the repository root
+    output: "Output a brief summary of the test spec. Set $status to ready (with plan hash and repoPath) or insufficient_info."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "ready" }
+            plan: { type: string }
+            repoPath: { type: string }
+          required: [$status, plan, repoPath]
+        - properties:
+            $status: { const: "insufficient_info" }
+          required: [$status]
+  developer:
+    description: "TDD implementation per test spec"
+    goal: "You are a developer agent. You implement code changes following TDD — write tests first, then implementation."
+    capabilities:
+      - coding
+    procedure: |
+      IMPORTANT: Always work in a git worktree, NEVER modify the main working directory directly.
+      The repo path and other details are provided in your task prompt.
+
+      Before starting any work, set up an isolated worktree:
+      1. cd into the repo path provided in your task prompt
+      2. `git fetch origin` to get latest refs
+      3. First time (no existing branch):
+         - `git worktree add .worktrees/fix/<issue-number>-<short-slug> -b fix/<issue-number>-<short-slug> origin/main`
+         - `cd .worktrees/fix/<issue-number>-<short-slug> && bun install`
+      4. If bounced back from reviewer or tester (branch already exists):
+         - cd into the existing worktree under `.worktrees/fix/<issue-number>-<short-slug>`
+         - `git fetch origin && git rebase origin/main`
+      5. ALL subsequent work must happen inside the worktree directory.
+
+      Then implement TDD:
+      6. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the planner's output in your task prompt)
+      7. If bounced back from reviewer or tester: read the previous role's feedback in your task prompt
+      8. Write tests first based on the spec
+      9. Implement the code to make tests pass
+      10. Ensure `bun run build` passes with no errors
+      11. Run `bun test` to verify all tests pass
+
+      If you cannot complete the implementation (e.g. the issue is too complex, blocked by external factors,
+      or repeated attempts fail), set $status=failed with a reason.
+    output: "List all files changed and provide a summary. Set $status to done (with branch/worktree), or failed (with reason)."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "done" }
+            branch: { type: string }
+            worktree: { type: string }
+          required: [$status, branch, worktree]
+        - properties:
+            $status: { const: "failed" }
+            reason: { type: string }
+          required: [$status, reason]
+  reviewer:
+    description: "Code standards compliance check"
+    goal: "You are a code reviewer. You verify code standards compliance — NOT functionality (that's the tester's job)."
+    capabilities:
+      - code-review
+      - static-analysis
+    procedure: |
+      The worktree path is provided in your task prompt. cd into it first.
+
+      Before reviewing, verify the git branch:
+      1. Run `git branch --show-current` — confirm the branch name references the issue number being worked on
+      2. If the branch doesn't correspond to the issue, flag it in your output and reject
+
+      Then perform code review:
+      Hard checks (must all pass):
+      3. `bun run build` — no build errors
+      4. `bunx biome check` — no lint violations
+      5. TypeScript strict mode — no type errors
+
+      Soft checks (review against project conventions if CLAUDE.md / .cursor/rules exist):
+      - Naming conventions, module boundaries, code style
+      - No `console.log` in production code
+      - No dynamic imports in production code
+
+      Only review standards compliance. Do NOT test functionality.
+      If rejecting, you MUST explain the specific reason in your output.
+    output: "Explain your decision with specific file/line references. Set $status to approved (with branch/worktree) or rejected (with comments)."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "approved" }
+            branch: { type: string }
+            worktree: { type: string }
+          required: [$status, branch, worktree]
+        - properties:
+            $status: { const: "rejected" }
+            comments: { type: string }
+            worktree: { type: string }
+          required: [$status, comments, worktree]
+  tester:
+    description: "Functional correctness verification"
+    goal: "You are a tester agent. You verify that the implementation correctly satisfies every scenario in the test spec."
+    capabilities:
+      - testing
+    procedure: |
+      The worktree path is provided in your task prompt. cd into it first.
+
+      1. Run `bun test` for automated test verification
+      2. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the planner step in the thread history)
+      3. Verify each scenario in the spec is covered and passing
+      4. Determine outcome:
+         - passed: all scenarios verified, tests pass
+         - fix_code: tests fail or implementation doesn't match spec → send back to developer
+         - fix_spec: the spec itself is wrong or incomplete → send back to planner
+    output: "Report test results per scenario. Set $status to passed (with branch/worktree), fix_code (with report), or fix_spec (with report)."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "passed" }
+            branch: { type: string }
+            worktree: { type: string }
+          required: [$status, branch, worktree]
+        - properties:
+            $status: { const: "fix_code" }
+            report: { type: string }
+          required: [$status, report]
+        - properties:
+            $status: { const: "fix_spec" }
+            report: { type: string }
+          required: [$status, report]
+  committer:
+    description: "Commits and creates PR"
+    goal: "You are a committer agent. You create a clean commit and push a PR linking the original issue."
+    capabilities: []
+    procedure: |
+      The worktree path, branch name, and repo info are provided in your task prompt.
+      cd into the worktree first.
+
+      Note: You inherit the developer's worktree and branch. Do NOT create a new branch.
+      1. Check `git status` — if working tree is clean and branch is ahead of origin, skip to step 3 (push).
+      2. If there are unstaged/uncommitted changes: `git add -A` then `git commit -m "type: description\n\nFixes #N"`
+      3. Push the branch: `git push -u origin <branch-name>`
+         - If push hook fails: capture the error log in your output, mark hook_failed
+      4. On push success: create a PR via `tea pr create --title "..." --description "..."`
+         - IMPORTANT: `tea pr create` must run from the MAIN repo directory (not a worktree), because tea cannot detect the repo from worktree `.git` files. cd to the repo root first.
+         - Do NOT pass `--repo` — let tea auto-detect from the main repo's git remote.
+         - PR description must include: What / Why / Changes / Ref sections, with `Fixes #N` in Ref
+         - On tea failure: capture stderr/stdout, include PR details for manual creation, mark hook_failed
+      5. After PR creation, clean up the worktree:
+         - cd to the repo root (parent of .worktrees)
+         - `git worktree remove <worktree-path>`
+    output: "Include PR URL on success or error log on failure. Set $status to committed (with prUrl) or hook_failed (with error)."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "committed" }
+            prUrl: { type: string }
+          required: [$status, prUrl]
+        - properties:
+            $status: { const: "hook_failed" }
+            error: { type: string }
+          required: [$status, error]
+graph:
+  $START:
+    _: { role: "planner", prompt: "Analyze the issue and produce an implementation plan." }
+  planner:
+    insufficient_info: { role: "$END", prompt: "Insufficient information to proceed; end the workflow." }
+    ready: { role: "developer", prompt: "Implement the TDD test spec (CAS hash: {{{plan}}}) in repo {{{repoPath}}}." }
+  developer:
+    done: { role: "reviewer", prompt: "Review branch {{{branch}}} at {{{worktree}}} for code standards compliance." }
+    failed: { role: "$END", prompt: "Developer failed: {{{reason}}}. Ending workflow." }
+  reviewer:
+    rejected: { role: "developer", prompt: "Reviewer rejected: {{{comments}}}. Fix the issues in repo {{{worktree}}}." }
+    approved: { role: "tester", prompt: "Review passed. Run tests on branch {{{branch}}} at {{{worktree}}}." }
+  tester:
+    fix_code: { role: "developer", prompt: "Tests found code issues: {{{report}}}. Fix and re-submit." }
+    fix_spec: { role: "planner", prompt: "Tests found spec issues: {{{report}}}. Revise the test spec." }
+    passed: { role: "committer", prompt: "All tests passed. Commit and push branch {{{branch}}} from {{{worktree}}}." }
+  committer:
+    hook_failed: { role: "developer", prompt: "Push hook failed: {{{error}}}. Fix and re-submit." }
+    committed: { role: "$END", prompt: "PR created: {{{prUrl}}}. Workflow complete." }
@@ -8,10 +8,10 @@ This monorepo implements a stateless workflow engine driven by a single-step CLI

 | Concept | What it is |
 |---------|-----------|
-| **Workflow** | A YAML definition (`WorkflowPayload`) with roles, conditions, and a routing graph. Stored as a CAS node, identified by its XXH64 hash. |
+| **Workflow** | A YAML definition (`WorkflowPayload`) with roles, status-based routing, and a directed graph. Stored as a CAS node, identified by its XXH64 hash. |
 | **Thread** | A single execution of a workflow, identified by a ULID. State is an immutable CAS chain; active threads indexed in `threads.yaml`; completed threads in `history.jsonl`. |
 | **Role** | A named actor within a workflow. Each role has a system prompt and a JSON Schema `outputSchema`. |
-| **Moderator** | JSONata-based graph evaluator — determines the next role (or `$END`) with zero LLM cost. |
+| **Moderator** | Status-based graph evaluator — determines the next role (or `$END`) with zero LLM cost. |
 | **Agent** | An external CLI command (`uwf-hermes`, etc.) spawned by `uwf thread step`. Produces frontmatter markdown output. |
 | **CAS** | Content-Addressed Storage via `@uncaged/json-cas` — all workflow definitions, thread nodes, and outputs are immutable CAS nodes. |
 | **Registry** | `~/.uncaged/workflow/registry.yaml` — maps workflow names to current CAS hashes. |
@@ -23,10 +23,9 @@ workflow/
  packages/
    workflow-protocol/    # @uncaged/workflow-protocol — shared types (WorkflowPayload, StepNodePayload, WorkflowConfig, etc.)
    workflow-util/        # @uncaged/workflow-util — Crockford Base32, ULID, logger, frontmatter parsing/validation
-    workflow-moderator/   # @uncaged/workflow-moderator — JSONata graph evaluator
-    workflow-agent-kit/   # @uncaged/workflow-agent-kit — createAgent factory, context builder, extract pipeline
+    workflow-util-agent/  # @uncaged/workflow-util-agent — createAgent factory, context builder, extract pipeline
    workflow-agent-hermes/ # @uncaged/workflow-agent-hermes — uwf-hermes CLI binary (spawns hermes chat)
-    cli-workflow/         # @uncaged/cli-workflow — uwf CLI binary
+    cli-workflow/         # @uncaged/cli-workflow — uwf CLI binary (includes status-based moderator in src/moderator/)
  legacy-packages/       # Archived packages (preserved for reference, not active)
  examples/              # Workflow YAML examples (solve-issue.yaml)
  docs/                  # Architecture docs
@@ -34,7 +33,7 @@ workflow/
  tsconfig.json          # root TypeScript config
 ```

- Dependency layers: `workflow-protocol` → (`workflow-util`, `workflow-moderator`) → `workflow-agent-kit` → `workflow-agent-hermes` / `cli-workflow`
+- Dependency layers: `workflow-protocol` → `workflow-util` → `workflow-util-agent` → `workflow-agent-hermes` / `cli-workflow`
 - Packages use `workspace:^` protocol (resolves to `^x.y.z` on publish)
 - External CAS: `@uncaged/json-cas` (store API, hashing, schema validation) + `@uncaged/json-cas-fs` (filesystem backend)

@@ -285,6 +284,11 @@ moderator → agent → extract      — one step per invocation, repeat until $
 2. **Register** — `uwf workflow put <file.yaml>` parses YAML, registers output schemas, stores `WorkflowPayload` in CAS
 3. **Run** — `uwf thread start` creates a thread, `uwf thread step` executes one cycle per invocation

+## Project Rules
+
+- [docs/sync-readme.md](docs/sync-readme.md) — README sync conventions
+- [docs/no-dynamic-import.md](docs/no-dynamic-import.md) — no dynamic import in production code
+
 ## Commit Convention

 ```
@@ -0,0 +1,109 @@
+# Contributing to @uncaged/workflow
+
+Thank you for your interest in contributing! This guide covers setup, conventions, and the PR workflow.
+
+## Prerequisites
+
+- [Bun](https://bun.sh/) (latest)
+- [Node.js](https://nodejs.org/) 20+
+- Git
+
+## Setup
+
+```bash
+git clone https://github.com/shazhou-ww/uncaged-workflow.git
+cd uncaged-workflow
+bun install
+bun run build
+bun test
+```
+
+## Development Workflow
+
+```bash
+bun run build     # TypeScript compilation (all packages)
+bun run check     # tsc + biome lint + log tag validation
+bun run format    # Auto-format with Biome
+bun test          # Run all tests
+```
+
+All three (`build`, `check`, `test`) must pass before submitting a PR. A pre-push hook runs `check` + `test` automatically.
+
+## Coding Conventions
+
+See [CLAUDE.md](CLAUDE.md) for the full coding standard. Key points:
+
+- **Functional-first** — `function` + `type`, not `class` + `interface`
+- **No optional properties** — use `T | null` instead of `?:`
+- **Named exports only** — no default exports
+- **No `console.log`** — use the structured logger from `@uncaged/workflow-util`
+- **Static imports only** — no `await import()` in production code
+- **Biome** for lint + format — run `bun run check` before committing
+
+## Commit Messages
+
+```
+<type>(<scope>): <description>
+
+type: feat | fix | refactor | docs | chore | test
+scope: cli | moderator | agent-kit | hermes | builtin | claude-code | util | protocol | dashboard
+```
+
+Examples:
+- `feat(moderator): add cycle detection to graph evaluator`
+- `fix(cli): handle missing config file gracefully`
+- `docs(protocol): update StepNode field descriptions`
+
+## Pull Request Process
+
+1. **Branch** from `main`: `git checkout -b feat/123-short-description`
+2. **Implement** your change with tests
+3. **Run checks**: `bun run check && bun test`
+4. **Commit** with a descriptive message referencing the issue: `Fixes #123`
+5. **Push** and open a PR
+
+### PR Description Template
+
+```
+## What
+What this PR does.
+
+## Why
+Why the change is needed.
+
+## Changes
+- `path/to/file.ts` — what changed and why
+
+## Ref
+Fixes #N
+```
+
+## Adding a Changeset
+
+For any user-facing change (feat, fix, breaking change), add a changeset:
+
+```bash
+bun changeset
+```
+
+This creates a markdown file in `.changeset/` describing the change. It will be consumed on the next release to bump versions and generate CHANGELOG entries.
+
+## Project Structure
+
+```
+packages/
+  workflow-protocol/      # Shared types and JSON Schema
+  workflow-util/          # Encoding, IDs, logging, frontmatter
+  workflow-util-agent/    # createAgent factory, extract pipeline
+  workflow-agent-hermes/  # Hermes ACP agent
+  workflow-agent-builtin/ # Built-in LLM agent
+  workflow-agent-claude-code/ # Claude Code agent
+  cli-workflow/           # uwf CLI binary
+  workflow-dashboard/     # Web UI (private, alpha)
+```
+
+Dependency flows downward — lower layers have no dependency on higher layers. See [CLAUDE.md](CLAUDE.md) for the full architecture.
+
+## License
+
+By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE).
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Uncaged
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -1,93 +1,115 @@
 # @uncaged/workflow

-A stateless workflow engine driven by a single-step CLI. Workflows are YAML definitions with roles, JSONata routing conditions, and a directed graph. Threads are immutable CAS-linked chains — each `uwf thread step` runs one moderator→agent→extract cycle and exits.
+[![CI](https://github.com/shazhou-ww/uncaged-workflow/actions/workflows/ci.yml/badge.svg)](https://github.com/shazhou-ww/uncaged-workflow/actions/workflows/ci.yml)
+[![npm](https://img.shields.io/npm/v/@uncaged/cli-workflow?label=%40uncaged%2Fcli-workflow)](https://www.npmjs.com/package/@uncaged/cli-workflow)
+[![npm](https://img.shields.io/npm/v/@uncaged/workflow-protocol?label=%40uncaged%2Fworkflow-protocol)](https://www.npmjs.com/package/@uncaged/workflow-protocol)
+[![npm](https://img.shields.io/npm/v/@uncaged/workflow-util-agent?label=%40uncaged%2Fworkflow-util-agent)](https://www.npmjs.com/package/@uncaged/workflow-util-agent)

-## Package Map
+A stateless workflow engine driven by a single-step CLI. Workflows are YAML definitions with roles, status-based routing, and a directed graph. Threads are immutable CAS-linked chains — each `uwf thread step` runs one moderator→agent→extract cycle and exits.

-| Package | npm | Role |
-|---------|-----|------|
-| `cli-workflow` | `@uncaged/cli-workflow` | `uwf` CLI binary — thread lifecycle, workflow registry, CAS inspection, setup |
-| `workflow-protocol` | `@uncaged/workflow-protocol` | Shared TypeScript types (`WorkflowPayload`, `StepNodePayload`, `WorkflowConfig`, etc.) |
-| `workflow-moderator` | `@uncaged/workflow-moderator` | JSONata graph evaluator — determines next role or `$END` |
-| `workflow-agent-kit` | `@uncaged/workflow-agent-kit` | `createAgent` factory, context builder, two-layer extract pipeline |
-| `workflow-agent-hermes` | `@uncaged/workflow-agent-hermes` | `uwf-hermes` agent — spawns Hermes chat, captures session |
-| `workflow-util` | `@uncaged/workflow-util` | Crockford Base32, ULID, logger, frontmatter parsing |
+## Overview

-External: [`@uncaged/json-cas`](https://www.npmjs.com/package/@uncaged/json-cas) (CAS store + JSON Schema validation) + `@uncaged/json-cas-fs` (filesystem backend).
+This monorepo implements **uwf**, a workflow engine with no long-running daemon. You register YAML workflow definitions in a content-addressed store (CAS), start a thread with an initial prompt, then invoke `uwf thread step` repeatedly until the moderator routes to `$END`. Each step is a complete process: the moderator evaluates status-based routing to pick the next role, an external agent CLI produces frontmatter markdown output, and an extract pipeline validates or structures that output against the role's JSON Schema.
+
+Workflow state lives entirely on disk under `~/.uncaged/workflow/`: CAS nodes for definitions and step payloads, `registry.yaml` for workflow name→hash mappings, and `threads.yaml` for active thread head pointers. Completed threads are archived to `history.jsonl`. Because there is no server process, workflows are easy to debug, fork, and inspect with ordinary CLI tools.
+
+Agents are pluggable CLI binaries (`uwf-hermes`, `uwf-builtin`, `uwf-claude-code`, or custom commands). The engine spawns the configured agent with `<thread-id>` and `<role>`, sets `UWF_EDGE_PROMPT` from the graph transition, and captures both the agent's markdown output and a detail CAS node for session replay.
+
+## Install
+
+```bash
+npm install -g @uncaged/cli-workflow
+```
+
+Requires [Bun](https://bun.sh/) runtime (used internally for TypeScript execution).

 ## Quick Start

 ```bash
-# 1. Configure provider and model
+# 1. Configure provider, model, and default agent
 uwf setup

 # 2. Register a workflow from YAML
-uwf workflow put examples/solve-issue.yaml
+uwf workflow add examples/solve-issue.yaml

-# 3. Start a thread
+# 3. Start a thread (creates head pointer; does not execute)
 uwf thread start solve-issue -p "Fix the login redirect bug"

 # 4. Execute steps (one at a time, until done)
-uwf thread step <thread-id>
+uwf thread exec <thread-id>
 ```

-## CLI Commands
+Use `-c, --count <number>` on `thread exec` to run multiple steps in one invocation. Override the agent with `--agent <cmd>`.

-### Thread
+## Architecture

-| Command | Description |
-|---------|-------------|
-| `uwf thread start <workflow> -p <prompt>` | Create a thread (no execution) |
-| `uwf thread step <thread-id> [--agent <cmd>]` | Execute one moderator→agent→extract cycle |
-| `uwf thread show <thread-id>` | Show head pointer and done status |
-| `uwf thread list [--all]` | List threads (`--all` includes archived) |
-| `uwf thread steps <thread-id>` | List all steps chronologically |
-| `uwf thread read <thread-id> [--quota N]` | Render thread as readable markdown |
-| `uwf thread fork <step-hash>` | Fork from a specific step |
-| `uwf thread step-details <step-hash>` | Dump full detail node |
-| `uwf thread kill <thread-id>` | Terminate and archive |
+Dependency layers (lower layers have no dependency on higher layers):

-### Workflow
+```
+Layer 0 — Contract
+  workflow-protocol          Shared types and JSON Schema definitions

-| Command | Description |
-|---------|-------------|
-| `uwf workflow put <file.yaml>` | Register a workflow from YAML |
-| `uwf workflow show <name-or-hash>` | Show workflow definition |
-| `uwf workflow list` | List registered workflows |
+Layer 1 — Shared infra
+  workflow-util              Encoding, IDs, logging, frontmatter, paths

-### CAS
+Layer 2 — Agent framework
+  workflow-util-agent         createAgent factory, context builder, extract pipeline

-| Command | Description |
-|---------|-------------|
-| `uwf cas get <hash>` | Read a CAS node |
-| `uwf cas put <type-hash> <data>` | Store a node |
-| `uwf cas has <hash>` | Check existence |
-| `uwf cas refs <hash>` | List direct references |
-| `uwf cas walk <hash>` | Recursive traversal |
-| `uwf cas reindex` | Rebuild type index |
-| `uwf cas schema list` | List schemas |
-| `uwf cas schema get <hash>` | Show a schema |
+Layer 3 — Agent implementations
+  workflow-agent-hermes      Hermes ACP agent (uwf-hermes)
+  workflow-agent-builtin     Built-in LLM + tools agent (uwf-builtin)
+  workflow-agent-claude-code Claude Code agent (uwf-claude-code)

-### Setup
+Layer 4 — CLI
+  cli-workflow               uwf binary — thread lifecycle, registry, CAS, setup (includes status-based moderator)

-| Command | Description |
-|---------|-------------|
-| `uwf setup` | Interactive provider/model/agent configuration |
-| `uwf setup --provider ... --base-url ... --api-key ... --model ...` | Non-interactive setup |
+App (uses protocol; not in the runtime engine stack)
+  workflow-dashboard         Web UI for visual workflow editing
+```

-Config stored in `~/.uncaged/workflow/config.yaml`. API keys in `~/.uncaged/workflow/.env`.
+External CAS: [`@uncaged/json-cas`](https://www.npmjs.com/package/@uncaged/json-cas) (store API, hashing, schema validation) + `@uncaged/json-cas-fs` (filesystem backend).
+
+See [docs/architecture.md](docs/architecture.md) for the full design — three-phase engine loop, CAS node types, storage layout, agent CLI protocol, and design decisions.
+
+## Packages
+
+| Package | npm | Description | Type | README |
+|---------|-----|-------------|------|--------|
+| `cli-workflow` | `@uncaged/cli-workflow` | `uwf` CLI — thread lifecycle, workflow registry, CAS inspection, setup | cli | [README](packages/cli-workflow/README.md) |
+| `workflow-protocol` | `@uncaged/workflow-protocol` | Shared TypeScript types and JSON Schema constants | lib | [README](packages/workflow-protocol/README.md) |
+| `workflow-util-agent` | `@uncaged/workflow-util-agent` | `createAgent` factory, context builder, extract pipeline | lib | [README](packages/workflow-util-agent/README.md) |
+| `workflow-util` | `@uncaged/workflow-util` | Crockford Base32, ULID, logger, frontmatter parsing, storage paths | lib | [README](packages/workflow-util/README.md) |
+| `workflow-agent-hermes` | `@uncaged/workflow-agent-hermes` | `uwf-hermes` — spawns Hermes chat via ACP | agent | [README](packages/workflow-agent-hermes/README.md) |
+| `workflow-agent-builtin` | `@uncaged/workflow-agent-builtin` | `uwf-builtin` — built-in LLM agent with file/shell tools | agent | [README](packages/workflow-agent-builtin/README.md) |
+| `workflow-agent-claude-code` | `@uncaged/workflow-agent-claude-code` | `uwf-claude-code` — spawns Claude Code CLI | agent | [README](packages/workflow-agent-claude-code/README.md) |
+| `workflow-dashboard` | `@uncaged/workflow-dashboard` | Web graph editor for workflow YAML (private, alpha) | app | [README](packages/workflow-dashboard/README.md) |
+
+## CLI Reference
+
+Global options: `-V, --version`, `--format <json|yaml>`, `-h, --help`.
+
+| Group | Commands |
+|-------|----------|
+| **thread** | `start`, `exec`, `show`, `list`, `stop`, `cancel`, `read` |
+| **step** | `list`, `show`, `read`, `fork` |
+| **workflow** | `add`, `show`, `list` |
+| **cas** | `get`, `put`, `put-text`, `has`, `refs`, `walk`, `reindex`, `schema list`, `schema get` |
+| **setup** | Interactive or `--provider`, `--base-url`, `--api-key`, `--model`, `--agent` |
+| **skill** | `cli` — print markdown reference of all uwf commands |
+| **log** | `list`, `show`, `clean` — process-level debug logs |
+
+Config is stored in `~/.uncaged/workflow/config.yaml`. API keys go in `~/.uncaged/workflow/.env`.
+
+Detailed command usage, options, and examples: [packages/cli-workflow/README.md](packages/cli-workflow/README.md).

 ## Development

 ```bash
 bun install --no-cache     # Install dependencies
+bun run build              # tsc --build (all packages)
 bun run check              # tsc + biome + lint-log-tags
 bun run format             # Auto-format with Biome
 bun test                   # Run all tests
 ```

 Managed with **bun workspace**. See [CLAUDE.md](CLAUDE.md) for coding conventions.
-
-## Architecture
-
-See [docs/architecture.md](docs/architecture.md) for the full design — three-phase engine loop, CAS node types, storage layout, agent CLI protocol, and design decisions.
@@ -5,6 +5,8 @@
      "**",
      "!**/dist",
      "!**/node_modules",
+      "!**/legacy-packages",
+      "!scripts",
      "!packages/workflow/workflow",
      "!xiaoju/scripts/bundle.ts"
    ]
@@ -15,6 +17,15 @@
    "indentWidth": 2,
    "lineWidth": 100
  },
+  "css": {
+    "parser": {
+      "cssModules": true,
+      "tailwindDirectives": true
+    },
+    "linter": {
+      "enabled": false
+    }
+  },
  "javascript": {
    "formatter": {
      "quoteStyle": "double",
@@ -36,7 +47,7 @@
      }
    },
    {
-      "includes": ["**/*.d.ts"],
+      "includes": ["**/*.d.ts", "**/vitest.config.*"],
      "linter": {
        "rules": {
          "style": {
@@ -44,6 +55,16 @@
          }
        }
      }
+    },
+    {
+      "includes": ["**/cli.ts", "**/setup.ts"],
+      "linter": {
+        "rules": {
+          "suspicious": {
+            "noConsole": "off"
+          }
+        }
+      }
    }
  ],
  "linter": {
@@ -8,7 +8,7 @@

 A stateless workflow engine driven by a single-step CLI. Workflows are YAML definitions stored as CAS nodes; threads are immutable chains of CAS-linked step nodes. No daemon — each `uwf thread step` invocation runs one moderator→agent→extract cycle and exits.

-The implementation lives in **6** active packages under `packages/`, plus two external CAS packages (`@uncaged/json-cas`, `@uncaged/json-cas-fs`). Legacy packages reside in `legacy-packages/` and are not part of the active stack.
+The implementation lives in **5** active packages under `packages/`, plus two external CAS packages (`@uncaged/json-cas`, `@uncaged/json-cas-fs`). Legacy packages reside in `legacy-packages/` and are not part of the active stack.

 ## Package map

@@ -16,10 +16,9 @@ The implementation lives in **6** active packages under `packages/`, plus two ex
 |-------|---------|---------------|
 | Contract | `@uncaged/workflow-protocol` → `workflow-protocol` | Shared TypeScript types (`WorkflowPayload`, `StepNodePayload`, `ModeratorContext`, `WorkflowConfig`, etc.). No runtime deps beyond `@uncaged/json-cas-fs`. |
 | Shared infra | `@uncaged/workflow-util` → `workflow-util` | Crockford Base32, ULID generation, `createLogger`, frontmatter parsing/validation. |
-| Moderator | `@uncaged/workflow-moderator` → `workflow-moderator` | JSONata-based graph evaluator: given a `WorkflowPayload` and `ModeratorContext`, returns the next role or `$END`. |
-| Agent framework | `@uncaged/workflow-agent-kit` → `workflow-agent-kit` | `createAgent` entrypoint factory, context builder, frontmatter fast-path extractor, LLM extract fallback, output format instruction builder. |
+| Agent framework | `@uncaged/workflow-util-agent` → `workflow-util-agent` | `createAgent` entrypoint factory, context builder, frontmatter fast-path extractor, LLM extract fallback, output format instruction builder. |
 | Agent: Hermes | `@uncaged/workflow-agent-hermes` → `workflow-agent-hermes` | `uwf-hermes` CLI binary — spawns `hermes chat`, pipes prompt, captures session detail. |
-| CLI | `@uncaged/cli-workflow` → `cli-workflow` | `uwf` binary — thread lifecycle, workflow registry, CAS inspection, setup. |
+| CLI | `@uncaged/cli-workflow` → `cli-workflow` | `uwf` binary — thread lifecycle, workflow registry, CAS inspection, setup. Includes status-based graph evaluator in `src/moderator/` (next role or `$END`). |

 ### External dependencies

@@ -27,7 +26,7 @@ The implementation lives in **6** active packages under `packages/`, plus two ex
 |---------|------|
 | `@uncaged/json-cas` | Content-addressed store API, XXH64 hashing, JSON Schema registration and validation. |
 | `@uncaged/json-cas-fs` | Filesystem backend for `json-cas`. |
-| `jsonata` | JSONata expression evaluator (used by `workflow-moderator`). |
+| `mustache` | Template renderer for edge prompts (used by `cli-workflow` moderator). |
 | `commander` | CLI argument parsing (used by `cli-workflow`). |
 | `dotenv` | Loads `.env` files for API keys. |
 | `yaml` | YAML parse/stringify. |
@@ -45,10 +44,9 @@ flowchart BT
  end
  subgraph L1["Layer 1 — shared"]
    util["@uncaged/workflow-util"]
-    moderator["@uncaged/workflow-moderator"]
  end
  subgraph L2["Layer 2 — agent framework"]
-    kit["@uncaged/workflow-agent-kit"]
+    kit["@uncaged/workflow-util-agent"]
  end
  subgraph L3["Layer 3 — agent implementations"]
    hermes["@uncaged/workflow-agent-hermes"]
@@ -58,7 +56,6 @@ flowchart BT
  end
  protocol --> jcasfs
  util --> protocol
-  moderator --> protocol
  kit --> protocol
  kit --> util
  kit --> jcas
@@ -68,7 +65,6 @@ flowchart BT
  cli --> protocol
  cli --> util
  cli --> kit
-  cli --> moderator
  cli --> jcas
  cli --> jcasfs
 ```
@@ -85,8 +81,13 @@ description: "End-to-end issue resolution"
 roles:
  planner:
    description: "Creates implementation plan"
-    systemPrompt: "You are a planning agent. Analyze the issue and create a step-by-step plan."
-    outputSchema:
+    goal: "You are a planning agent. Analyze the issue and create a step-by-step plan."
+    capabilities:
+      - issue-analysis
+      - planning
+    procedure: "Analyze the issue and create a detailed, actionable implementation plan."
+    output: "Output the plan summary and list of concrete steps."
+    meta:
      type: object
      properties:
        plan: { type: string }
@@ -94,8 +95,13 @@ roles:
      required: [plan, steps]
  developer:
    description: "Implements code changes"
-    systemPrompt: "You are a developer agent. Implement the plan."
-    outputSchema:
+    goal: "You are a developer agent. Implement the plan."
+    capabilities:
+      - file-edit
+      - shell
+    procedure: "Implement the plan. Write code, tests, and ensure existing tests pass."
+    output: "List all files changed and provide a summary of the implementation."
+    meta:
      type: object
      properties:
        filesChanged: { type: array, items: { type: string } }
@@ -103,8 +109,12 @@ roles:
      required: [filesChanged, summary]
  reviewer:
    description: "Reviews code changes"
-    systemPrompt: "You are a code reviewer. Review the implementation."
-    outputSchema:
+    goal: "You are a code reviewer. Review the implementation."
+    capabilities:
+      - code-review
+    procedure: "Review the implementation against the plan."
+    output: "Approve or reject with detailed comments."
+    meta:
      type: object
      properties:
        approved: { type: boolean }
@@ -133,9 +143,8 @@ graph:

 Key properties:

- **`roles`** — inline role definitions; each `outputSchema` is a JSON Schema (stored as its own CAS node on registration)
- **`conditions`** — named JSONata expressions evaluated against the `ModeratorContext`
- **`graph`** — `Record<Role | "$START", Transition[]>` — first matching transition wins; `condition: null` = fallback
+- **`roles`** — inline role definitions; each `meta` is a JSON Schema (stored as its own CAS node on registration)
+- **`graph`** — `Record<Role | "$START", Record<Status, Target>>` — status-based routing; each role maps statuses to targets
 - **No agent binding** — agent selection is a deployment concern, configured in `config.yaml`
 - **No Zod** — all schemas are JSON Schema, validated through `@uncaged/json-cas`

@@ -145,8 +154,8 @@ Each `uwf thread step` runs exactly one cycle: moderator → agent → extract.

 ```
 ┌─→ Phase 1: MODERATOR
-│   Input:  WorkflowPayload + ModeratorContext { start, steps[] }
-│   Engine: JSONata conditions evaluated against the graph
+│   Input:  graph + lastRole + lastOutput
+│   Engine: Status-based map lookup against lastOutput.status
 │   Output: next role name | $END
 │
 │   Phase 2: AGENT
@@ -156,7 +165,7 @@ Each `uwf thread step` runs exactly one cycle: moderator → agent → extract.
 │   Output: raw string (frontmatter markdown)
 │
 │   Phase 3: EXTRACT
-│   Input:  raw agent output + role's outputSchema
+│   Input:  raw agent output + role's meta schema
 │   Engine: two-layer extract (frontmatter fast path → LLM fallback)
 │   Output: CasRef to structured output node
 │
@@ -193,7 +202,7 @@ type AgentContext = ModeratorContext & {

 ### Key properties

- **Moderator** — pure JSONata evaluation; no LLM call, no I/O beyond CAS reads. Evaluates `workflow.graph[currentRole]` transitions in order, returns first match.
+- **Moderator** — pure status-based map lookup; no LLM call, no I/O beyond CAS reads. Looks up `graph[lastRole][lastOutput.status]` to get the next target.
 - **Agent** — receives `AgentContext` with thread history + role system prompt + output format instruction. Raw output is frontmatter markdown.
 - **Extractor** — two-layer: tries frontmatter fast-path first (zero LLM cost), falls back to LLM extract if frontmatter is absent or invalid.
 - **Stateless** — each `uwf thread step` is an atomic, self-contained operation. No in-memory state between steps.
@@ -209,11 +218,11 @@ Each agent is an external command invoked by `uwf thread step`:
 Contract:
 1. `uwf thread step` determines the next role via the moderator
 2. Agent CLI is spawned with `(thread-id, role)` as positional args
-3. `workflow-agent-kit` (`createAgent`) handles the boilerplate:
+3. `workflow-util-agent` (`createAgent`) handles the boilerplate:
   - Parses argv
   - Loads `.env` from storage root
   - Builds `AgentContext` by walking the CAS chain from `threads.yaml` head
-   - Resolves the role's `outputSchema` and builds `outputFormatInstruction`
+   - Resolves the role's `meta` schema and builds `outputFormatInstruction`
   - Calls the agent's `run` function
   - Runs two-layer extract on the raw output
   - Writes `StepNode` to CAS (output + detail + prev link)
@@ -242,18 +251,18 @@ scope: role
 Fixed the login redirect by updating the auth middleware...
 ```

-The `outputFormatInstruction` (built by `buildOutputFormatInstruction` in `workflow-agent-kit`) is prepended to the role's system prompt, so the deliverable format is the first thing the agent sees. It lists the expected frontmatter fields derived from the role's JSON Schema.
+The `outputFormatInstruction` (built by `buildOutputFormatInstruction` in `workflow-util-agent`) is prepended to the role's system prompt, so the deliverable format is the first thing the agent sees. It lists the expected frontmatter fields derived from the role's `meta` JSON Schema.

 ## Two-layer extract

-Structured output extraction uses a two-layer strategy (`workflow-agent-kit`):
+Structured output extraction uses a two-layer strategy (`workflow-util-agent`):

 ### Layer 1: frontmatter fast path (`frontmatter.ts`)

 1. Parse YAML frontmatter from raw agent output (`parseFrontmatterMarkdown`)
 2. Validate required fields (`validateFrontmatter`)
 3. Build a candidate object from frontmatter fields (`status`, `next`, `confidence`, `artifacts`, `scope`)
-4. `store.put()` the candidate against the role's `outputSchema`
+4. `store.put()` the candidate against the role's `meta` schema
 5. Validate with `json-cas` schema validation
 6. If valid → return `outputHash` (zero LLM cost)

@@ -270,9 +279,9 @@ If the fast path returns `null` (no frontmatter, invalid, or doesn't satisfy sch

 ## Prompt injection

-`workflow-agent-kit` prepends two pieces of context to the agent's system prompt:
+`workflow-util-agent` prepends two pieces of context to the agent's system prompt:

-1. **Deliverable format instruction** — generated from the role's `outputSchema`, tells the agent exactly what frontmatter fields to produce and the expected format
+1. **Deliverable format instruction** — generated from the role's `meta` schema, tells the agent exactly what frontmatter fields to produce and the expected format
 2. **Scope constraint** — "Focus exclusively on YOUR role's deliverable. Do not perform actions outside your role's scope."

 This ensures agents produce parseable frontmatter output without requiring per-agent format knowledge.
@@ -289,8 +298,11 @@ payload:
  roles:
    planner:
      description: "Creates implementation plan"
-      systemPrompt: "You are a planning agent..."
-      outputSchema: "5GWKR8TN1V3JA"    # cas_ref → JSON Schema node
+      goal: "You are a planning agent..."
+      capabilities: [planning, issue-analysis]
+      procedure: "Analyze the issue and create a plan."
+      output: "Output the plan summary."
+      meta: "5GWKR8TN1V3JA"    # cas_ref → JSON Schema node
  conditions:
    notApproved:
      description: "Reviewer rejected"
@@ -318,7 +330,7 @@ payload:
  start: "4TNVW8KR2B3MA"      # cas_ref → StartNode
  prev: "2MXBG6PN4A8JR"       # cas_ref → previous StepNode (null for first step)
  role: "developer"
-  output: "9KRVW3TN5F1QA"     # cas_ref → structured output (validated against outputSchema)
+  output: "9KRVW3TN5F1QA"     # cas_ref → structured output (validated against meta schema)
  detail: "7BQST3VW9F2MA"     # cas_ref → execution detail (raw turns, session data)
  agent: "uwf-hermes"         # agent command used (plain string)
 ```
@@ -468,7 +480,7 @@ Binary: `uwf`
 | **YAML workflow definitions** | Human-readable, versionable, no build step required. JSON Schema inline in YAML, registered as CAS nodes on `workflow put`. |
 | **Stateless single-step CLI** | Each `uwf thread step` is atomic — no in-memory state, no daemon, no long-running process. OS handles lifecycle. |
 | **CAS-backed thread state** | Immutable linked nodes enable fork, replay, and GC without copying data. Content-addressed deduplication across threads. |
-| **JSONata moderator** | Declarative condition expressions evaluated against thread history. No LLM cost for routing decisions. |
+| **Status-based moderator** | Status-based map routing — `graph[role][status]` lookup against last output. No LLM cost for routing decisions. |
 | **Frontmatter markdown output** | Agents produce structured meta (YAML frontmatter) alongside free-form content (markdown body). Enables zero-cost extraction when frontmatter is well-formed. |
 | **Two-layer extract** | Fast path avoids LLM calls when agents follow the format; LLM fallback handles messy output gracefully. |
 | **Prompt injection for format** | Output format instruction prepended to system prompt ensures agents produce parseable output without per-agent configuration. |
@@ -0,0 +1,779 @@
+# Built-in Role Agent 调研
+
+## 目标
+
+实现一个内置的 role agent（暂称 `uwf-builtin`），不依赖 hermes/openclaw 等外部 agent 进程。
+直接使用 workflow config 中配置的 model，自己实现 agent run loop 和关键 toolkit。
+
+---
+
+## 关键问题
+
+### Q1: Agent 接口协议
+
+现有 agent 是怎么被 CLI 调用的？输入（argv、环境变量）和输出（stdout、CAS）格式是什么？
+
+**调研要点：**
+- `cli-workflow` 里 `spawnAgent` 的完整实现
+- AgentConfig 类型定义
+- agent 进程的 exit code 约定
+- 环境变量传递（UWF_STORAGE_ROOT 等）
+
+**答案：**
+
+#### 调用链
+
+`uwf thread step` → `cmdThreadStepOnce` → moderator 求值下一 role → `resolveAgentConfig` → `spawnAgent`。
+
+#### AgentConfig 类型
+
+```146:149:packages/workflow-protocol/src/types.ts
+export type AgentConfig = {
+  command: string;
+  args: string[];
+};
+```
+
+在 `config.yaml` 的 `agents` 段注册，例如 `hermes: { command: "uwf-hermes", args: [] }`。
+
+#### spawnAgent 行为
+
+```627:653:packages/cli-workflow/src/commands/thread.ts
+function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): CasRef {
+  const argv = [...agent.args, threadId, role];
+  let stdout: string;
+  try {
+    stdout = execFileSync(agent.command, argv, {
+      encoding: "utf8",
+      env: process.env,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+  } catch (e) {
+  // ... stderr 拼进 fail 消息
+  }
+
+  const line = stdout.trim().split("\n").pop()?.trim() ?? "";
+  if (!isCasRef(line)) {
+    fail(`agent stdout is not a valid CAS hash: ${line || "(empty)"}`);
+  }
+  return line;
+}
+```
+
+| 项目 | 约定 |
+|------|------|
+| **argv** | `[...agent.args, <thread-id>, <role>]`，即 `process.argv[2]`=threadId，`process.argv[3]`=role（与 `createAgent` 的 `parseArgv` 一致） |
+| **stdin** | 忽略 |
+| **stdout** | 纯文本，**最后一行**必须是新 `StepNode` 的 CAS hash（13 字符 Crockford Base32） |
+| **stderr** | 失败时 CLI 会附带 stderr；成功时无约定 |
+| **exit code** | `0` = 成功；非 0 时 `execFileSync` 抛错，step 失败 |
+| **环境变量** | 继承父进程 `process.env`（含 storage root、API key 等） |
+| **链头更新** | **不由 agent 负责**；agent 只写 CAS StepNode，CLI 在拿到 stdout hash 后更新 `threads.yaml` |
+
+Agent 解析优先级（`resolveAgentConfig`）：
+
+1. CLI `--agent` override（整段 command + args 字符串）
+2. `config.agentOverrides[workflow.name][role]`
+3. `config.defaultAgent`
+
+#### 环境变量：Storage Root
+
+文档中写的 `UWF_STORAGE_ROOT` **在当前代码中不存在**。实际优先级（`workflow-util-agent` / `cli-workflow` 一致）：
+
+```33:43:packages/workflow-util-agent/src/storage.ts
+export function resolveStorageRoot(): string {
+  const internal = process.env.UNCAGED_WORKFLOW_STORAGE_ROOT;
+  if (internal !== undefined && internal !== "") {
+    return internal;
+  }
+  const userOverride = process.env.WORKFLOW_STORAGE_ROOT;
+  if (userOverride !== undefined && userOverride !== "") {
+    return userOverride;
+  }
+  return getDefaultStorageRoot();
+}
+```
+
+Agent 子进程通过继承的 `process.env` 与父 CLI 共享同一 storage root；`createAgent` 内还会 `loadDotenv({ path: getEnvPath(storageRoot) })` 加载 `~/.uncaged/workflow/.env`。
+
+#### Agent 侧职责（设计文档 + 实现）
+
+- 读 `threads.yaml` 链头，构建 context，执行 role
+- 将 `StepNode` 写入 CAS（`output` / `detail` / `agent` / `prev` / `start`）
+- stdout 打印 step hash
+- **不**更新 `threads.yaml`
+
+---
+
+### Q2: createAgent 工厂
+
+workflow-util-agent 的 `createAgent` 做了什么？它的完整生命周期是什么？
+
+**调研要点：**
+- `AgentOptions` 类型的 `run` 和 `continue` 回调签名
+- `AgentRunResult` 的完整定义
+- retry 逻辑（frontmatter 校验失败后的重试机制）
+- `persistStep` 写入 CAS 的 StepNode 结构
+
+**答案：**
+
+#### 类型定义
+
+```4:35:packages/workflow-util-agent/src/types.ts
+export type AgentContext = ModeratorContext & {
+  threadId: ThreadId;
+  role: string;
+  store: Store;
+  workflow: WorkflowPayload;
+  outputFormatInstruction: string;
+};
+
+export type AgentRunResult = {
+  output: string;
+  detailHash: CasRef;
+  sessionId: string;
+};
+
+export type AgentContinueFn = (
+  sessionId: string,
+  message: string,
+  store: AgentContext["store"],
+) => Promise<AgentRunResult>;
+
+export type AgentRunFn = (ctx: AgentContext) => Promise<AgentRunResult>;
+
+export type AgentOptions = {
+  name: string;
+  run: AgentRunFn;
+  continue: AgentContinueFn;
+};
+```
+
+- **`run(ctx)`**：首次执行，返回原始 agent 文本 `output`、审计用 `detailHash`、用于续聊的 `sessionId`。
+- **`continue(sessionId, message, store)`**：在同一 session 上追加用户消息（用于 frontmatter 纠错），再次返回 `AgentRunResult`。
+
+`createAgent(options)` 返回 `() => Promise<void>`，作为 agent CLI 的 `main`（见 `uwf-hermes` 的 `cli.ts`）。
+
+#### 生命周期（按执行顺序）
+
+```101:152:packages/workflow-util-agent/src/run.ts
+export function createAgent(options: AgentOptions): () => Promise<void> {
+  return async function main(): Promise<void> {
+    const { threadId, role } = parseArgv(process.argv);
+    const storageRoot = resolveStorageRoot();
+    loadDotenv({ path: getEnvPath(storageRoot) });
+
+    const ctx = await buildContextWithMeta(threadId, role);
+    // 1. 校验 role 存在
+    // 2. 从 CAS 取 frontmatter JSON Schema → buildOutputFormatInstruction → ctx.outputFormatInstruction
+
+    let agentResult = await options.run(ctx);
+
+    let outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
+
+    for (let retry = 0; retry < MAX_FRONTMATTER_RETRIES && outputHash === null; retry++) {
+      const correctionMessage = "Your previous response did not contain valid YAML frontmatter...";
+      agentResult = await options.continue(agentResult.sessionId, correctionMessage, ctx.meta.store);
+      outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
+    }
+
+    if (outputHash === null) { fail(...); }
+
+    const stepHash = await persistStep({ ctx, outputHash, detailHash: agentResult.detailHash, agentName });
+    process.stdout.write(`${stepHash}\n`);
+  };
+}
+```
+
+| 阶段 | 行为 |
+|------|------|
+| 解析 argv | `argv[2]=threadId`, `argv[3]=role`，缺失则 `stderr` + `exit(1)` |
+| Context | `buildContextWithMeta` + 可选 `outputFormatInstruction` |
+| Run | `options.run(ctx)` |
+| Extract | **仅** `tryFrontmatterFastPath`（见 Q4）；**不**调用 `extract()` LLM fallback |
+| Retry | 最多 `MAX_FRONTMATTER_RETRIES = 2` 次 `continue` + 再试 fast-path |
+| Persist | `persistStep` → `writeStepNode` |
+| 输出 | stdout 一行 step CAS hash |
+
+#### StepNode 写入结构
+
+```44:68:packages/workflow-util-agent/src/run.ts
+async function writeStepNode(options: {
+  store: AgentStore["store"];
+  schemas: AgentStore["schemas"];
+  startHash: CasRef;
+  prevHash: CasRef | null;
+  role: string;
+  outputHash: CasRef;
+  detailHash: CasRef;
+  agentName: string;
+}): Promise<CasRef> {
+  const payload: StepNodePayload = {
+    start: options.startHash,
+    prev: options.prevHash,
+    role: options.role,
+    output: options.outputHash,
+    detail: options.detailHash,
+    agent: options.agentName,
+  };
+  // store.put(stepNode schema) + validate
+}
+```
+
+`agentName` 经 `agentLabel(name)` 规范化：已有 `uwf-` 前缀则原样，否则加 `uwf-`（如 `hermes` → `uwf-hermes`）。
+
+`prevHash`：若链头仍是 `StartNode` 则为 `null`，否则为当前 head step hash。
+
+---
+
+### Q3: Context Builder
+
+`buildContextWithMeta` 构建了什么上下文给 agent？
+
+**调研要点：**
+- `AgentContext` 完整类型定义（所有字段）
+- context 构建过程（CAS chain walk）
+- `outputFormatInstruction` 怎么生成的
+- role definition 怎么获取（从 workflow YAML）
+
+**答案：**
+
+#### AgentContext 字段
+
+继承 `ModeratorContext`：
+
+```60:68:packages/workflow-protocol/src/types.ts
+export type ModeratorContext = {
+  start: StartNodePayload;
+  steps: StepContext[];
+};
+```
+
+```48:51:packages/workflow-protocol/src/types.ts
+export type StartNodePayload = {
+  workflow: CasRef;
+  prompt: string;
+};
+```
+
+```61:63:packages/workflow-protocol/src/types.ts
+export type StepContext = Omit<StepRecord, "output"> & {
+  output: unknown;
+};
+```
+
+`AgentContext` 额外字段：
+
+| 字段 | 类型 | 含义 |
+|------|------|------|
+| `threadId` | `ThreadId` | 当前线程 |
+| `role` | `string` | 本步要执行的角色名 |
+| `store` | `Store` | CAS store（读写节点） |
+| `workflow` | `WorkflowPayload` | 已从 CAS 加载的 workflow 定义 |
+| `outputFormatInstruction` | `string` | 由 `createAgent` 根据 role 的 frontmatter schema 生成；`buildContext*` 初始为 `""` |
+
+`buildContextWithMeta` 还返回 `meta`：
+
+```148:154:packages/workflow-util-agent/src/context.ts
+export type BuildContextMeta = {
+  storageRoot: string;
+  store: Store;
+  schemas: AgentStore["schemas"];
+  headHash: CasRef;
+  chain: ChainState;
+};
+```
+
+#### CAS chain walk
+
+1. 从 `threads.yaml[threadId]` 取 `headHash`
+2. `walkChain`：若 head 是 `StartNode`，`stepsNewestFirst=[]`；否则沿 `prev` 收集所有 `StepNode`， newest-first
+3. `buildHistory`：反转为时间序，`expandOutput` 把每步 `output` CasRef 展开为 JSON payload（供 prompt / moderator 使用）
+4. `loadWorkflow`：从 `start.workflow` CasRef 加载 `WorkflowPayload`
+
+#### Role definition 来源
+
+- 作者写在 workflow YAML 的 `roles.<name>`（`goal`, `capabilities`, `procedure`, `output`, `frontmatter` 等）
+- `uwf workflow put` 时 `frontmatter` 内联 JSON Schema 经 `putSchema` 存入 CAS，workflow 里存的是 **CasRef**
+- Agent 运行时：`ctx.workflow.roles[ctx.role]` → `RoleDefinition`
+
+#### outputFormatInstruction
+
+在 `createAgent` 中，若 `getSchema(store, roleDef.frontmatter)` 非空，则：
+
+```typescript
+ctx.outputFormatInstruction = buildOutputFormatInstruction(frontmatterSchema);
+```
+
+`buildOutputFormatInstruction` 根据 JSON Schema 的 `properties` 生成「必须以 `---` YAML frontmatter 开头」的说明和示例字段列表（见 `build-output-format-instruction.ts`）。
+
+各 agent 实现（Hermes / Claude Code）在组装 prompt 时把该块放在最前，再接 `buildRolePrompt(roleDef)`。
+
+---
+
+### Q4: Extract Pipeline
+
+agent 输出怎么被处理成结构化数据？
+
+**调研要点：**
+- frontmatter fast-path 的完整逻辑
+- LLM extract fallback 的实现（`extract.ts`）
+- frontmatter schema 从哪里来（role 定义里的 `frontmatter` 字段）
+- 校验失败时的 correction prompt 是什么
+
+**答案：**
+
+#### Schema 来源
+
+Workflow YAML 中每个 role 的 `frontmatter:` 段是 JSON Schema 对象；注册时：
+
+```66:76:packages/cli-workflow/src/commands/workflow.ts
+async function resolveFrontmatterRef(..., frontmatter: unknown): Promise<CasRef> {
+  // 校验为 JSON Schema → putSchema → 返回 CasRef
+}
+```
+
+运行时 `roleDef.frontmatter` 即该 schema 的 CAS hash；structured `output` 节点用**同一 schema** 写入 CAS。
+
+#### Frontmatter fast-path（createAgent 实际使用的路径）
+
+```148:195:packages/workflow-util-agent/src/frontmatter.ts
+export async function tryFrontmatterFastPath(
+  raw: string,
+  outputSchema: CasRef,
+  store: Store,
+): Promise<FrontmatterFastPathResult | null>
+```
+
+流程：
+
+1. `parseFrontmatterMarkdown(raw)` → 标准 agent 字段（`status`, `next`, `confidence`, `artifacts`, `scope`）+ body
+2. `validateFrontmatter` 失败 → `null`
+3. `getSchema(store, outputSchema)` + `extractSchemaFields` 得到 role 需要的属性名
+4. `buildCandidate`：从标准 frontmatter + YAML 原始字段拼出符合 schema 的对象
+5. `store.put(outputSchema, candidate)` + `validate` → 成功则 `{ body, outputHash }`
+
+**永不抛错**，失败返回 `null`。
+
+#### LLM extract fallback（已实现但未接入 createAgent）
+
+```135:181:packages/workflow-util-agent/src/extract.ts
+export async function extract(
+  rawOutput: string,
+  outputSchema: CasRef,
+  config: WorkflowConfig,
+): Promise<ExtractResult>
+```
+
+- 模型：`resolveExtractModelAlias(config)` → `modelOverrides.extract` → `models.extract` → `models.default` → `defaultModel`
+- HTTP：`POST {baseUrl}/chat/completions`，`response_format: { type: "json_object" }`
+- System：要求按 JSON Schema 从 agent 输出提取单个 JSON 对象
+- 校验通过后 `store.put(outputSchema, structured)`
+
+**重要：`createAgent` 当前未调用 `extract()`**。fast-path 失败且 2 次 `continue` 仍失败则直接 `fail()`。builtin agent 若希望无 frontmatter 也能跑，需在 kit 或 builtin 层显式接入 `extract()`。
+
+#### Correction prompt（retry）
+
+```125:128:packages/workflow-util-agent/src/run.ts
+const correctionMessage =
+  "Your previous response did not contain valid YAML frontmatter matching the role schema.\n" +
+  "You MUST begin your response with a YAML frontmatter block (--- delimited).\n" +
+  "Please output ONLY the corrected frontmatter block followed by your work.";
+```
+
+通过 `options.continue(sessionId, correctionMessage, store)` 发给外部 agent；builtin 需在自有 message 历史里 append 同等语义的 user 消息。
+
+---
+
+### Q5: Model 配置与 LLM 调用
+
+workflow 怎么配置和使用 model？
+
+**调研要点：**
+- `WorkflowConfig` 中 providers/models/defaultModel/modelOverrides 的完整定义
+- `resolveModel` 函数的实现
+- `chatCompletionText` 的实现（OpenAI 兼容 HTTP 客户端）
+- 有没有 streaming 支持？tool calling 支持？
+
+**答案：**
+
+#### WorkflowConfig
+
+```136:160:packages/workflow-protocol/src/types.ts
+export type ProviderConfig = {
+  baseUrl: string;
+  apiKeyEnv: string;
+};
+
+export type ModelConfig = {
+  provider: ProviderAlias;
+  name: string;
+};
+
+export type WorkflowConfig = {
+  providers: Record<ProviderAlias, ProviderConfig>;
+  models: Record<ModelAlias, ModelConfig>;
+  agents: Record<AgentAlias, AgentConfig>;
+  defaultAgent: AgentAlias;
+  agentOverrides: Record<WorkflowName, Record<RoleName, AgentAlias>> | null;
+  defaultModel: ModelAlias;
+  modelOverrides: Record<Scenario, ModelAlias> | null;
+};
+```
+
+示例见 `docs/architecture.md`（`providers` / `models` / `defaultModel` / `modelOverrides.extract`）。
+
+#### resolveModel
+
+```32:50:packages/workflow-util-agent/src/extract.ts
+export function resolveModel(config: WorkflowConfig, alias: ModelAlias): ResolvedLlmProvider {
+  const modelEntry = config.models[alias];
+  const providerEntry = config.providers[modelEntry.provider];
+  const apiKey = process.env[providerEntry.apiKeyEnv];
+  return { baseUrl: providerEntry.baseUrl, apiKey, model: modelEntry.name };
+}
+```
+
+`ResolvedLlmProvider = { baseUrl, apiKey, model }`。
+
+Extract 专用别名解析：
+
+```18:30:packages/workflow-util-agent/src/extract.ts
+export function resolveExtractModelAlias(config: WorkflowConfig): ModelAlias {
+  return config.modelOverrides?.extract ?? (config.models.extract ? "extract" : config.models.default ? "default" : config.defaultModel);
+}
+```
+
+**尚无** `modelOverrides` 按 role/workflow 解析 agent 主模型的函数；builtin 首版可用 `config.defaultModel`，扩展时可加 `modelOverrides.agent` 或与 `agentOverrides` 对称的表。
+
+#### chatCompletionText
+
+```87:124:packages/workflow-util-agent/src/extract.ts
+async function chatCompletionText(
+  provider: ResolvedLlmProvider,
+  messages: Array<{ role: "system" | "user"; content: string }>,
+): Promise<string>
+```
+
+| 能力 | 现状 |
+|------|------|
+| 协议 | OpenAI 兼容 `POST /chat/completions` |
+| Streaming | **无**（一次性 `response.text()`） |
+| Tool calling | **无**（无 `tools` / `tool_calls` 字段） |
+| 多模态 | **无**（仅 text `content`） |
+| Extract 专用 | `response_format: { type: "json_object" }` |
+
+builtin agent 的 run loop 需要**新写**带 `tools` 的 completion 客户端（可放在 `workflow-agent-builtin` 或扩展 `workflow-util-agent` 的 `llm/` 模块），不能复用当前 `chatCompletionText` 而不改。
+
+---
+
+### Q6: Hermes Agent 参考实现
+
+`uwf-hermes` 是怎么实现 `run` 和 `continue` 的？
+
+**调研要点：**
+- prompt 怎么组装的（outputFormatInstruction + rolePrompt + task + history）
+- hermes CLI 的调用参数
+- session management（resume）
+- 输出怎么捕获
+
+**答案：**
+
+#### Prompt 组装
+
+```40:53:packages/workflow-agent-hermes/src/hermes.ts
+export function buildHermesPrompt(ctx: AgentContext): string {
+  const roleDef = ctx.workflow.roles[ctx.role];
+  const rolePrompt = roleDef !== undefined ? buildRolePrompt(roleDef) : "";
+  const parts: string[] = [];
+  if (ctx.outputFormatInstruction !== "") {
+    parts.push(ctx.outputFormatInstruction, "");
+  }
+  parts.push(rolePrompt, "", "## Task", ctx.start.prompt);
+  const historyBlock = buildHistorySummary(ctx.steps);
+  if (historyBlock !== "") {
+    parts.push("", historyBlock);
+  }
+  return parts.join("\n");
+}
+```
+
+`buildRolePrompt` 生成 `## Goal` / `## Capabilities` / `## Prepare`（含 `generateCliReference()`）/ `## Procedure` / `## Output`。
+
+`buildHistorySummary`：每步 `role`、`JSON.stringify(step.output)`、`agent`。
+
+Hermes 把**整段 prompt 作为单条 user 消息**传给 `hermes chat -q`（无独立 system channel）。
+
+#### Hermes CLI 参数
+
+首次：
+
+```88:97:packages/workflow-agent-hermes/src/hermes.ts
+spawnHermes(["chat", "-q", prompt, "--yolo", "--max-turns", "90", "--quiet"]);
+```
+
+续聊：
+
+```100:114:packages/workflow-agent-hermes/src/hermes.ts
+spawnHermes(["chat", "--resume", sessionId, "-q", message, "--yolo", "--max-turns", "90", "--quiet"]);
+```
+
+#### Session
+
+- stdout/stderr 中解析 `session_id: <id>`（`parseSessionIdFromStdout`）
+- 会话文件：`~/.hermes/sessions/session_<id>.json`
+- `loadHermesSession` → `storeHermesSessionDetail`：每 assistant/tool 消息写成 CAS turn 节点，汇总为 `detail`；**output 文本** = 最后一条非空 `assistant` 的 `content`
+
+#### 与 createAgent 的衔接
+
+```157:164:packages/workflow-agent-hermes/src/hermes.ts
+export function createHermesAgent(): () => Promise<void> {
+  return createAgent({ name: "hermes", run: runHermes, continue: continueHermes });
+}
+```
+
+`uwf-hermes` 入口：`createHermesAgent()` 即 main。
+
+Claude Code 包（`workflow-agent-claude-code`）结构相同：`buildClaudeCodePrompt` 同构，`claude -p` + `--resume` + JSON stdout 解析。
+
+---
+
+### Q7: Toolkit 需求分析
+
+要实现一个自给自足的 agent，最少需要哪些 tool？
+
+**调研要点：**
+- 现有 workflow example（solve-issue.yaml）里 role 都做什么任务
+- hermes agent 在 workflow 场景下常用哪些 tool
+- 哪些 tool 是 agent loop 必须的（如 file read/write、shell exec、web fetch）
+
+**答案：**
+
+#### solve-issue.yaml 角色能力
+
+| Role | capabilities | 隐含需求 |
+|------|----------------|----------|
+| planner | issue-analysis, planning | 读上下文/仓库、总结，通常不需写代码 |
+| developer | file-edit, shell, testing | **读文件、写文件、执行命令** |
+| reviewer | code-review, static-analysis | 读 diff/文件、静态分析（可读+可选 shell） |
+
+#### Hermes 侧
+
+Hermes 自带完整 agent runtime（`--yolo`、max-turns），tool 集由 Hermes 项目定义，workflow 不配置。从 session JSON 可见 `tool_calls` 被记入 detail，常见包括文件与 shell 类工具。
+
+#### Builtin 最小 toolkit 建议
+
+| 优先级 | Tool | 用途 |
+|--------|------|------|
+| P0 | `read_file` | 读仓库/配置/issue 上下文 |
+| P0 | `write_file` / `edit_file` | developer 改代码 |
+| P0 | `run_command` | 测试、构建、git（需 cwd + timeout + 输出截断） |
+| P1 | `list_dir` / `glob` | 导航代码库 |
+| P1 | `grep` | 搜索符号/引用 |
+| P2 | `fetch_url` | 查文档（planner 偶尔需要） |
+
+**不需要**在 builtin 里实现 moderator / workflow 路由工具——仍由 `uwf thread step` + status-based moderator 负责。
+
+#### Agent loop 必须能力
+
+1. 多轮 LLM 调用 + **OpenAI-style tool_calls** 解析与执行
+2. 将 tool 结果 append 回 messages
+3. 终止条件：模型不再请求 tool，或达到 `maxTurns`
+4. 最终响应须含合法 YAML frontmatter（满足 Q4），供 `createAgent` fast-path
+
+---
+
+## 方案草案
+
+（调研完成后基于以上答案撰写）
+
+### 架构设计
+
+```mermaid
+flowchart TB
+  subgraph cli ["cli-workflow"]
+    Step["uwf thread step"]
+    Spawn["spawnAgent(uwf-builtin, threadId, role)"]
+    Step --> Spawn
+  end
+
+  subgraph builtin_pkg ["@uncaged/workflow-agent-builtin"]
+    Main["createBuiltinAgent() = createAgent({...})"]
+    Prompt["buildBuiltinPrompt(ctx)"]
+    Loop["runBuiltinLoop(provider, messages, tools)"]
+    Tools["Toolkit: read/write/exec/..."]
+    Detail["storeBuiltinDetail(turns)"]
+    Main --> Prompt
+    Main --> Loop
+    Loop --> Tools
+    Loop --> Detail
+  end
+
+  subgraph kit ["workflow-util-agent"]
+    Ctx["buildContextWithMeta"]
+    FM["tryFrontmatterFastPath"]
+    Persist["persistStep"]
+    Ctx --> Main
+    Main --> FM
+    FM --> Persist
+  end
+
+  subgraph cas ["CAS / config"]
+    Config["config.yaml models/providers"]
+    CAS["cas/ + threads.yaml"]
+  end
+
+  Spawn --> Main
+  Config --> Loop
+  CAS --> Ctx
+  Persist --> CAS
+  Spawn -->|"stdout: step hash"| Step
+```
+
+**新包**：`packages/workflow-agent-builtin`，bin `uwf-builtin`，仅依赖 `workflow-util-agent`、`workflow-protocol`、`workflow-util`（可选 `@uncaged/json-cas` 写 detail schema）。
+
+**分层**：
+
+| 层 | 职责 |
+|----|------|
+| `createAgent`（kit） | argv、context、frontmatter extract、StepNode、stdout 协议 — **不变** |
+| `builtin/agent.ts` | `run` / `continue` 实现 |
+| `builtin/llm.ts` | OpenAI 兼容 chat + tools（可后续抽到 kit） |
+| `builtin/tools/*.ts` | 各 tool 的 JSON Schema + handler |
+| `builtin/prompt.ts` | 复用 Hermes 的 prompt 拼接逻辑（或抽到 kit 的 `buildAgentPrompt`） |
+| `builtin/detail.ts` | 类似 Hermes：每轮 assistant/tool 写入 CAS detail |
+
+**配置集成**：
+
+```yaml
+agents:
+  builtin:
+    command: "uwf-builtin"
+    args: []
+defaultAgent: "builtin"   # 或 agentOverrides 按 role 指定
+```
+
+模型：首版 `resolveModel(config, config.defaultModel)`；后续可增加 `modelOverrides.agent` 或 per-role 映射。
+
+---
+
+### Agent Run Loop
+
+伪代码（单次 `run(ctx)`）：
+
+```
+1. provider ← resolveModel(loadWorkflowConfig(), defaultModel)
+2. system ← buildBuiltinPrompt(ctx)   // outputFormatInstruction + buildRolePrompt + Task + History
+3. messages ← [{ role: "system", content: system }]
+4. sessionId ← newULID()              // 内存或临时目录，供 continue 使用
+5. turns ← []
+
+6. for turn in 1..MAX_TURNS:
+     response ← chatCompletionWithTools(provider, messages, TOOL_DEFINITIONS)
+     record assistant message + tool_calls in turns
+
+     if response has no tool_calls:
+       finalText ← response.content
+       break
+
+     for each tool_call:
+       result ← executeTool(tool_call, { cwd: process.cwd() })
+       messages.push tool result
+       record in turns
+
+7. if no finalText with valid frontmatter after loop:
+     optionally one-shot "finalize" message without tools
+
+8. detailHash ← storeBuiltinDetail(store, sessionId, turns, metadata)
+9. return { output: finalText, detailHash, sessionId }
+```
+
+**`continue(sessionId, message, store)`**：
+
+- 从内存/磁盘恢复 `messages` + `turns`
+- `messages.push({ role: "user", content: message })`（correction 或续聊）
+- 从步骤 6 继续，步数上限可单独设小一点（如 3）
+- 返回新的 `AgentRunResult`
+
+**与 frontmatter 的配合**：
+
+- system prompt 已含 `outputFormatInstruction`；最后一轮可强制 user：`Now output your final answer with YAML frontmatter only if you have not yet.`
+- 仍依赖 `createAgent` 的 fast-path + 最多 2 次 continue
+
+**安全**：
+
+- `run_command`：白名单或需 `UWF_BUILTIN_ALLOW_SHELL=1`，默认工作区限定在 `process.cwd()` 或 `start` 中将来扩展的 `workspace` 字段
+- 路径：禁止 `..` 逃逸出 workspace root
+
+---
+
+### Toolkit 设计
+
+统一注册表：
+
+```typescript
+type BuiltinTool = {
+  name: string;
+  description: string;
+  parameters: JSONSchema; // object type
+  execute: (args: unknown, ctx: ToolContext) => Promise<string>;
+};
+
+type ToolContext = {
+  cwd: string;
+  storageRoot: string;
+};
+```
+
+| Tool name | OpenAI function | 行为摘要 |
+|-----------|-----------------|----------|
+| `read_file` | `read_file` | `{ path }` → UTF-8 文本，大小上限 |
+| `write_file` | `write_file` | `{ path, content }` → 写盘，返回确认 |
+| `edit_file` | 可选 | search/replace 块，减少 token |
+| `run_command` | `run_command` | `{ command, cwd? }` → stdout/stderr 截断 |
+| `list_dir` | `list_dir` | `{ path }` → 条目列表 |
+| `grep` | `grep` | `{ pattern, path? }` → 匹配行 |
+
+**LLM 请求形状**（扩展 extract 客户端）：
+
+```json
+{
+  "model": "...",
+  "messages": [...],
+  "tools": [{ "type": "function", "function": { "name", "description", "parameters" } }],
+  "tool_choice": "auto"
+}
+```
+
+解析 `choices[0].message.tool_calls`，执行后以 `{ role: "tool", tool_call_id, content }` 回传。
+
+**不提供** streaming 首版；detail CAS 记录每轮 tool 名/参数/结果摘要供 `uwf thread step-details` 调试。
+
+---
+
+### 与现有架构的集成
+
+| 集成点 | 方式 |
+|--------|------|
+| CLI 协议 | 实现标准 agent CLI：`uwf-builtin <thread-id> <role>`，stdout 一行 step hash，exit 0/1 |
+| 工厂 | `export function createBuiltinAgent()` → `createAgent({ name: "builtin", run, continue })` |
+| Context / Prompt | 复用 `buildContextWithMeta`、`buildRolePrompt`、`buildOutputFormatInstruction`；prompt 布局对齐 `buildHermesPrompt` |
+| 结构化输出 | 优先 YAML frontmatter fast-path；可选后续在 `createAgent` 增加 `extract()` fallback 开关 |
+| 配置 | `config.yaml` 增加 `agents.builtin`；`uwf setup` 可选默认 agent |
+| 存储 | `resolveStorageRoot()` + `loadWorkflowConfig` + `getEnvPath`；与 Hermes 相同，**不**改 `threads.yaml` 写入方 |
+| 测试 | 单元测试：tool handlers、prompt 组装、mock LLM tool loop；集成测试：临时 storage root + fake provider |
+| 发布 | 新包 `@uncaged/workflow-agent-builtin`，bin `uwf-builtin`，加入 `scripts/publish-all.mjs` |
+
+**明确不做**：
+
+- 不替代 moderator / 不在 agent 内调用 `uwf thread step`
+- 不依赖 Hermes/OpenClaw/Claude Code 二进制
+- 首版不实现 streaming、不实现 MCP
+
+**建议实现顺序**：
+
+1. `llm.ts`：tool calling HTTP 客户端 + 单测
+2. P0 tools + `runBuiltinLoop`
+3. `createBuiltinAgent` + detail CAS
+4. `config` / docs / `examples` 可选 `agentOverrides` 演示
+5. （可选）`createAgent` 接入 `extract()` fallback
@@ -0,0 +1,73 @@
+# Issue #418: ACP session/resume 返回空文本
+
+## 调研日期: 2026-05-23
+
+## 根因
+
+`session/resume` 在 restore 路径下 `_make_agent()` 失败，异常被静默吞掉。
+
+### 完整调用链
+
+```
+resume_session(sid)
+  → update_cwd(sid)
+    → get_session(sid) → _restore(sid)
+      → _make_agent()
+        → resolve_runtime_provider("custom") 失败（line 548-561）
+        → AIAgent() 抛出 "No LLM provider configured"（line 564）
+      → except Exception 静默吞掉（line 482-484）→ return None
+    → return None
+  → state is None → fallback: create_session()（新 sid，无历史）
+```
+
+### 关键代码位置（acp_adapter/session.py）
+
+- `_restore()` line 426-498: 从 DB 恢复 session，但 except 太宽泛
+- `_make_agent()` line 520-568: provider 解析在 restore 路径下不完整
+- Line 548-561: `resolve_runtime_provider("custom")` 失败后，`base_url` 虽然从 DB 取到了但没传给 AIAgent
+
+### 实测行为
+
+1. Phase 1: `session/new` + `prompt` → 正常，有 `agent_message_chunk`
+2. Phase 2: `session/resume` + `prompt`
+   - resume 返回成功，但 `available_commands_update` 里 sessionId 是新的（create_session fallback）
+   - 用原始 sid 发 prompt → `stopReason: "refusal"`（session 不在内存中）
+   - 用新 sid 发 prompt → 能跑但无历史（agent 回答"不知道 secret code"）
+
+### 验证脚本
+
+```python
+# 直接调用 _restore 验证
+cd ~/.hermes/hermes-agent
+python3 -c "
+import sys; sys.path.insert(0, '.')
+from acp_adapter.session import SessionManager
+sm = SessionManager()
+result = sm._restore('SESSION_ID_HERE')
+print(result)  # None — _make_agent 抛异常被吞掉
+"
+```
+
+### 两个 bug
+
+1. **`_make_agent` provider fallback 不完整**: restore 时 DB 里有 `base_url` 和 `api_mode`，但 `resolve_runtime_provider` 失败后这些值没被正确传递给 AIAgent
+2. **`_restore` 的 except 太宽泛**: 静默吞掉所有异常，连 warning 都只在 debug 级别，导致 resume 失败完全无感知
+
+### Hermes 版本
+
+- v0.10.0 (2026.4.16) — 初始测试
+- v0.14.0 (2026.5.16) — 更新后重新测试，bug 仍在
+- 代码路径: ~/.hermes/hermes-agent/acp_adapter/session.py
+
+### v0.14.0 测试结果 (2026-05-23)
+
+- `_restore` 仍因 `custom` provider 解析失败返回 None
+- 日志更清晰了：`WARNING: Failed to recreate agent for ACP session ...`
+- resume fallback 创建新 session（新 sid），但 agent 居然能回答之前的问题（可能通过 memory/session search）
+- 核心问题不变：sessionId 变了，client 用旧 sid 发 prompt → refusal
+
+### 上游 Issue
+
+- https://github.com/NousResearch/hermes-agent/issues/13489 — 已评论根因分析
+- https://github.com/NousResearch/hermes-agent/issues/8083 — resume 静默创建新 session
+- https://github.com/NousResearch/hermes-agent/issues/18452 — _make_agent fallback 不完整
@@ -0,0 +1,27 @@
+---
+description: Ban dynamic import() in production code — use static imports instead
+globs: packages/*/src/**/*.ts
+alwaysApply: true
+---
+
+# No Dynamic Import in Production Code
+
+## Rule
+
+Do NOT use `await import()` or dynamic `import()` expressions in production source code.
+Always use static top-level `import` statements.
+
+## Exception (must include a comment explaining why)
+
+1. **Bundle loader** — loads user-authored workflow bundles whose paths are only known at runtime
+
+When suppressing, add a comment directly above:
+
+```ts
+// Dynamic import required: user bundle path resolved at runtime
+const mod = await import(bundlePath);
+```
+
+## Test Files
+
+Test files (`__tests__/**`) are exempt.
@@ -0,0 +1,67 @@
+# Sync README
+
+When updating README.md files in this monorepo, follow these conventions.
+
+## Scope
+
+- Root `README.md` — project overview and navigation hub
+- Per-package `packages/*/README.md` — each package self-contained
+
+## Root README Structure
+
+The root README should have these sections in order:
+
+1. **Title and one-liner** — stateless workflow engine driven by single-step CLI
+2. **Overview** — 2-3 paragraphs explaining what it does and key concepts
+3. **Architecture** — dependency layer diagram (text-based)
+4. **Packages** — table with ALL packages from packages/ directory, columns: Package, Description, Type (cli/lib/agent/app)
+5. **Quick Start** — install, build, register workflow, start thread, run step
+6. **CLI Reference** — brief command list, detailed usage in cli-workflow README
+7. **Development** — bun install / build / check / test
+
+## Per-Package README Structure
+
+Each package README should have:
+
+1. **Title** — package name
+2. **One-line description** — matching package.json
+3. **Overview** — what it does, where it sits in the architecture, dependencies
+4. **Installation** — bun add (for libs) or "included as binary" (for cli/agents)
+5. **API** (lib packages) — all exports from src/index.ts with type signatures, grouped by category, minimal usage examples
+6. **CLI Usage** (cli/agent packages) — command reference with examples
+7. **Internal Structure** — brief src/ file organization
+8. **Configuration** (if applicable)
+
+## Execution Steps
+
+### Step 1: Gather current state
+For each package read:
+- package.json (name, version, description, dependencies, bin)
+- src/index.ts (public API exports)
+- Existing README.md (preserve hand-written content worth keeping)
+
+### Step 2: Update root README
+- Ensure ALL packages in packages/ directory are listed in the table
+- Update CLI command reference from uwf --help output
+- Keep Quick Start examples valid
+
+### Step 3: Write/update each package README
+- Follow the per-package structure
+- API section MUST match actual src/index.ts exports — never invent
+- For agent packages: document CLI binary name, how it is invoked
+- For lib packages: document exported types and functions
+- Internal structure: list actual files in src/
+
+### Step 4: Verify
+- All relative links work
+- Package names match package.json
+- No references to removed/renamed packages
+- bun run build still passes
+
+## Guidelines
+
+- Only document what src/index.ts actually exports
+- Root README summarizes, package READMEs go into detail
+- Verify CLI examples against actual commands
+- Preserve existing good prose when updating
+- English for all README content
@@ -75,7 +75,7 @@ uwf thread step 01J7K9M2XNPQR5VWBCDF8G3H4T --agent "bunx uwf-cursor"
 **做的事：**
 1. 读链头 → 当前 StepNode（或 StartNode）
 2. 收集 thread 历史（遍历链）
-3. 调 moderator：评估 JSONata conditions → 得到下一个 role（或 END）
+3. 调 moderator：status-based map lookup → 得到下一个 role（或 END）
 4. 若 END → 归档 thread，输出最后链头，退出
 5. 确定 agent command（`--agent` override > config.yaml per-workflow/role > config.yaml defaultAgent）
 6. 调用：`<agent-cmd> <thread-id> <role>`，捕获 stdout 得到新 StepNode hash
@@ -112,8 +112,8 @@ uwf-hermes <thread-id> <role>

 **约定：**
 - `uwf step` 负责 moderator 决策，将 role 传给 agent CLI
- agent-kit 根据 thread + role 从 CAS 读 systemPrompt / outputSchema
- agent-kit 组装完整 prompt（role systemPrompt + thread context + user prompt from StartNode）
+- agent-kit 根据 thread + role 从 CAS 读 goal / capabilities / procedure / output / meta
+- agent-kit 组装完整 prompt（role goal/capabilities/procedure/output + thread context + user prompt from StartNode）
 - agent 执行实际逻辑，agent-kit 负责 extract
 - agent 将 StepNode 写入 CAS（含 output、detail、agent、prev），但**不挪链头指针**
 - stdout 输出新 StepNode 的 CAS hash（纯文本，一行）
@@ -143,7 +143,7 @@ uwf-hermes <thread-id> <role>

 #### `Workflow`

-Roles 和 moderator 内联在 Workflow 中，只有 outputSchema 独立为 CAS 节点（方便 json-cas 校验）。
+Roles 和 moderator 内联在 Workflow 中，只有 meta 独立为 CAS 节点（方便 json-cas 校验）。

 ```yaml
 type: <workflow-schema-hash>
@@ -153,16 +153,25 @@ payload:
  roles:
    planner:
      description: "Creates implementation plan"
-      systemPrompt: "You are a planning agent..."
-      outputSchema: "5GWKR8TN1V3JA"    # cas_ref → JSON Schema 节点（json-cas 内置）
+      goal: "You are a planning agent..."
+      capabilities: [planning, issue-analysis]
+      procedure: "Analyze the issue and create a plan."
+      output: "Output the plan summary."
+      meta: "5GWKR8TN1V3JA"    # cas_ref → JSON Schema 节点（json-cas 内置）
    developer:
      description: "Implements code changes"
-      systemPrompt: "You are a developer agent..."
-      outputSchema: "8CNWT4KR6D1HV"    # cas_ref → JSON Schema 节点
+      goal: "You are a developer agent..."
+      capabilities: [file-edit, shell]
+      procedure: "Implement the plan."
+      output: "List all files changed."
+      meta: "8CNWT4KR6D1HV"    # cas_ref → JSON Schema 节点
    reviewer:
      description: "Reviews code changes"
-      systemPrompt: "You are a code reviewer..."
-      outputSchema: "1VPBG9SM5E7WK"    # cas_ref → JSON Schema 节点
+      goal: "You are a code reviewer..."
+      capabilities: [code-review]
+      procedure: "Review the implementation."
+      output: "Approve or reject with comments."
+      meta: "1VPBG9SM5E7WK"    # cas_ref → JSON Schema 节点
  conditions:
    needsClarification:
      description: "Planner requests clarification from user"
@@ -189,30 +198,22 @@ payload:
        condition: null
 ```

- `roles` — 内联定义，每个 role 的 `outputSchema` 是独立的 cas_ref（指向 json-cas 内置 JSON Schema 节点）
- `conditions` — `Record<Name, JSONata>`，命名条件，方便画图描述
- `graph` — `Record<Role | "$START", Transition[]>`，每个 Transition = `{ role, condition }`
- `condition` 引用 conditions 中的 key，`null` = fallback
- 按数组顺序求值，第一个匹配的 transition 胜出
+- `roles` — 内联定义，每个 role 的 `meta` 是独立的 cas_ref（指向 json-cas 内置 JSON Schema 节点）
+- `graph` — `Record<Role | "$START", Record<Status, Target>>`，每个 Target = `{ role, prompt }`
+- Status 来自上一个 role 输出的 `status` 字段，`$START` 用 `_` 作为初始 status
+- Prompt 模板使用 Mustache 渲染，变量来自 lastOutput
 - 不含 agent binding — agent 配置在 `~/.uncaged/workflow/config.yaml` 中管理

-JSONata 表达式的求值上下文：
+Moderator 的求值逻辑：

-```jsonc
-{
-  "start": {                          // StartNode 信息
-    "workflow": "4KNM2PXR3B1QW",
-    "prompt": "Fix the login bug..."
-  },
-  "steps": [                          // 所有已完成 steps，从旧到新
-    { "role": "planner", "output": { "phases": [...] }, "detail": "7BQST3VW9F2MA", "agent": "uwf-hermes" },
-    { "role": "developer", "output": { "filesChanged": ["src/auth.ts"], "summary": "Fixed redirect" }, "detail": "9KRVW3TN5F1QA", "agent": "uwf-cursor" },
-    { "role": "reviewer", "output": { "approved": false }, "detail": "2MXBG6PN4A8JR", "agent": "uwf-hermes" }
-  ]
-}
+```typescript
+evaluate(graph, lastRole, lastOutput) → { role, prompt }
+// 1. status = lastRole === "$START" ? "_" : lastOutput.status
+// 2. target = graph[lastRole][status]
+// 3. prompt = mustache.render(target.prompt, lastOutput)
 ```

-注：`output` 在上下文中会被自动展开为实际的 CAS 节点内容（而非 hash），方便 JSONata 表达式直接访问字段。
+注：routing 基于 `lastOutput.status` 字段的值，直接在 graph map 中查找对应的 Target。

 #### `StartNode`（Thread 起点）

@@ -234,14 +235,14 @@ payload:
  start: "4TNVW8KR2B3MA"          # cas_ref → StartNode（每个 step 都引用）
  prev: "2MXBG6PN4A8JR"           # cas_ref → 前一个 StepNode，第一步为 null
  role: "developer"
-  output: "9KRVW3TN5F1QA"         # cas_ref → 结构化输出节点（符合 role 的 outputSchema）
+  output: "9KRVW3TN5F1QA"         # cas_ref → 结构化输出节点（符合 role 的 meta schema）
  detail: "7BQST3VW9F2MA"         # cas_ref → 执行详情（content node / 子 workflow terminal StepNode / ...）
  agent: "uwf-cursor"              # 实际使用的 agent 命令（纯字符串）
 ```

 - `start` — 每个 StepNode 都直接引用 StartNode，方便随机访问
 - `prev` — 前一个 StepNode 的 cas_ref，第一步为 `null`（不指向 StartNode）
- `output` — cas_ref，指向符合 role outputSchema 的 CAS 节点，可用 json-cas 校验
+- `output` — cas_ref，指向符合 role meta schema 的 CAS 节点，可用 json-cas 校验
 - `detail` — cas_ref，指向执行详情。可以是原始 agent 输出（content node），也可以是子 workflow thread 的 terminal StepNode（workflowAsAgent 场景）
 - `agent` — 纯字符串，不是 CAS 节点

@@ -340,9 +341,8 @@ OPENROUTER_API_KEY=sk-or-...

 ```
 packages/
-├── cli-workflow/              # @uncaged/cli-workflow — uwf CLI（thread/workflow 命令）
-├── workflow-moderator/        # @uncaged/workflow-moderator — JSONata moderator 引擎
-├── workflow-agent-kit/        # @uncaged/workflow-agent-kit — Agent CLI 框架（含 extractor）
+├── cli-workflow/              # @uncaged/cli-workflow — uwf CLI（thread/workflow 命令，含 src/moderator/）
+├── workflow-util-agent/       # @uncaged/workflow-util-agent — Agent CLI 框架（含 extractor）
 ├── workflow-agent-hermes/     # @uncaged/workflow-agent-hermes — uwf-hermes CLI
 ├── workflow-agent-cursor/ # @uncaged/workflow-agent-cursor — uwf-cursor CLI
 └── workflow-protocol/         # @uncaged/workflow-protocol — 共享类型定义
@@ -358,7 +358,7 @@ packages/

 ## 4. 关键数据类型

-JSONata 求值上下文本质上是 thread 链表的线性化表达。StepNode payload 和上下文中的 step 共享大量字段，提取为公共类型。
+Moderator 通过 status-based map lookup 进行路由。StepNode payload 和上下文中的 step 共享大量字段，提取为公共类型。

 ### 4.1 公共类型

@@ -369,10 +369,10 @@ type CasRef = string;
 /** Thread ID — ULID, 26-char Crockford Base32 */
 type ThreadId = string;

-/** 一个 step 的核心数据，被 StepNode payload 和 JSONata 上下文共享 */
+/** 一个 step 的核心数据，被 StepNode payload 和 moderator 上下文共享 */
 type StepRecord = {
  role: string;
-  output: CasRef;                    // cas_ref → 结构化输出节点（符合 role outputSchema）
+  output: CasRef;                    // cas_ref → 结构化输出节点（符合 role meta schema）
  detail: CasRef;                    // cas_ref → 执行详情（content node / 子 workflow terminal StepNode）
  agent: string;                     // 实际使用的 agent 命令（纯字符串）
 };
@@ -383,26 +383,23 @@ type StepRecord = {
 ```typescript
 type RoleDefinition = {
  description: string;
-  systemPrompt: string;
-  outputSchema: CasRef;              // cas_ref → json-cas 内置 JSON Schema 节点
+  goal: string;
+  capabilities: string[];
+  procedure: string;
+  output: string;
+  meta: CasRef;                      // cas_ref → json-cas 内置 JSON Schema 节点
 };

-type Transition = {
+type Target = {
  role: string;                      // 目标 role 名 或 "$END"
-  condition: string | null;          // 引用 conditions 中的 key，null = fallback
-};
-
-type ConditionDefinition = {
-  description: string;
-  expression: string;                           // JSONata expression
+  prompt: string;                    // Mustache 模板，渲染时注入 lastOutput
 };

 type WorkflowPayload = {
  name: string;
  description: string;
  roles: Record<string, RoleDefinition>;
-  conditions: Record<string, ConditionDefinition>;
-  graph: Record<string, Transition[]>;          // Record<Role | "$START", Transition[]>
+  graph: Record<string, Record<string, Target>>;  // Record<Role | "$START", Record<Status, Target>>
 };
 ```

@@ -420,20 +417,14 @@ type StepNodePayload = StepRecord & {
 };
 ```

-### 4.4 JSONata 求值上下文
+### 4.4 Moderator 求值

-Thread 链表的线性化。`steps[n]` 的字段和 `StepRecord` 一致，但 `output` 被展开为实际内容。
+Moderator 使用 `evaluate(graph, lastRole, lastOutput)` 进行同步 status-based routing：

 ```typescript
-/** JSONata 上下文中的 step — output 被展开 */
-type StepContext = Omit<StepRecord, "output"> & {
-  output: unknown;                   // 展开后的 CAS 节点内容，非 hash
-};
-
-type ModeratorContext = {
-  start: StartNodePayload;
-  steps: StepContext[];              // 从旧到新
-};
+// graph[lastRole][lastOutput.status] → Target { role, prompt }
+// $START 角色使用 "_" 作为初始 status
+// prompt 通过 Mustache 模板渲染，变量来自 lastOutput
 ```

 ### 4.5 CLI 输出
@@ -522,6 +513,5 @@ StepNodePayload ──extends──→ StepRecord ←──maps to──→ Step
    │
    └── start.workflow → WorkflowPayload
                             ├── roles: Record<name, RoleDefinition>
-                             ├── conditions: Record<name, JSONata>
-                             └── graph: Record<role, Transition[]>
+                             └── graph: Record<role, Record<status, Target>>
 ```
@@ -3,24 +3,27 @@ description: "Single-role topic analysis using four-phase role description"
 roles:
  analyst:
    description: "Analyzes a given topic and produces a structured summary"
-    identity: |
+    goal: |
      You are a research analyst with expertise in breaking down complex topics
      into clear, structured summaries. You think critically and cite key points.
-    prepare: |
-      Review the topic carefully. Consider multiple perspectives and identify
-      the core question being asked.
-    execute: |
+    capabilities:
+      - research
+      - critical-thinking
+      - structured-writing
+    procedure: |
      Analyze the topic by:
      1. Identifying the main thesis or question
      2. Listing 3-5 key points with brief explanations
      3. Noting any counterarguments or caveats
      Keep your analysis concise (under 500 words).
-    report: |
+    output: |
      Provide your analysis as markdown under the frontmatter.
      The frontmatter must include your structured findings.
-    outputSchema:
+    frontmatter:
      type: object
      properties:
+        $status:
+          enum: ["_"]
        thesis:
          type: string
        keyPoints:
@@ -29,12 +32,9 @@ roles:
            type: string
        caveats:
          type: string
-      required: [thesis, keyPoints]
-conditions: {}
+      required: [$status, thesis, keyPoints]
 graph:
  $START:
-    - role: "analyst"
-      condition: null
+    _: { role: "analyst", prompt: "Analyze the topic in the task and produce a structured summary with key points." }
  analyst:
-    - role: "$END"
-      condition: null
+    _: { role: "$END", prompt: "Analysis complete. Finish the workflow." }
@@ -0,0 +1,62 @@
+name: "debate"
+description: "Structured debate between two sides. Tests cross-process session resume."
+roles:
+  against:
+    description: "Argues against the proposition"
+    goal: |
+      You are a skilled debater arguing AGAINST the proposition.
+      Be logical, cite evidence, and directly address your opponent's points.
+      Keep each argument concise (under 200 words).
+    capabilities:
+      - argumentation
+      - critical-thinking
+    procedure: |
+      1. If this is the opening, present your strongest argument against the proposition.
+      2. If responding to the other side, directly counter their points with evidence and logic.
+      3. If you find yourself genuinely convinced by the other side, you may concede.
+    output: |
+      Provide your argument in the frontmatter.
+      Set status to "conceded" ONLY if you are genuinely convinced and wish to stop debating.
+      Otherwise set status to "continue".
+    frontmatter:
+      type: object
+      properties:
+        $status:
+          enum: ["continue", "conceded"]
+        argument:
+          type: string
+      required: [$status, argument]
+  for:
+    description: "Argues for the proposition"
+    goal: |
+      You are a skilled debater arguing FOR the proposition.
+      Be logical, cite evidence, and directly address your opponent's points.
+      Keep each argument concise (under 200 words).
+    capabilities:
+      - argumentation
+      - critical-thinking
+    procedure: |
+      1. Read the opposing side's latest argument carefully.
+      2. Counter their points with evidence and logic.
+      3. If you find yourself genuinely convinced by the other side, you may concede.
+    output: |
+      Provide your argument in the frontmatter.
+      Set status to "conceded" ONLY if you are genuinely convinced and wish to stop debating.
+      Otherwise set status to "continue".
+    frontmatter:
+      type: object
+      properties:
+        $status:
+          enum: ["continue", "conceded"]
+        argument:
+          type: string
+      required: [$status, argument]
+graph:
+  $START:
+    _: { role: "against", prompt: "Present your opening argument against the proposition." }
+  against:
+    conceded: { role: "$END", prompt: "The against side conceded. Debate over." }
+    continue: { role: "for", prompt: "Counter the opposing argument: {{{argument}}}" }
+  for:
+    conceded: { role: "$END", prompt: "The for side conceded. Debate over." }
+    continue: { role: "against", prompt: "Counter the opposing argument: {{{argument}}}" }
@@ -1,68 +1,198 @@
 name: "solve-issue"
-description: "End-to-end issue resolution"
+description: "TDD-driven issue resolution for small, focused changes. Loop protection relies on engine maxRounds."
 roles:
  planner:
-    description: "Creates implementation plan"
-    identity: "You are a planning agent. You analyze issues and create step-by-step plans."
-    prepare: "Read the issue description and any linked context carefully."
-    execute: "Analyze the issue and create a detailed, actionable implementation plan."
-    report: "Output the plan summary and list of concrete steps."
-    outputSchema:
-      type: object
-      properties:
-        plan:
-          type: string
-        steps:
-          type: array
-          items:
-            type: string
-      required: [plan, steps]
+    description: "Analyzes issue and outputs a TDD test spec"
+    goal: "You are a planning agent. You analyze Gitea issues and produce a TDD test specification that downstream roles will implement and verify."
+    capabilities:
+      - issue-analysis
+      - planning
+    procedure: |
+      On first run (no previous steps):
+      1. Read the issue and all comments from Gitea using `tea issues <number> -r <owner/repo>`
+      2. Look for project conventions files (CLAUDE.md, CONTRIBUTING.md, .cursor/rules/) in the repo
+      3. Assess whether the issue has enough information to produce a test spec
+      4. If insufficient info: comment on the issue via `echo "..." | tea comment <number> -r <owner/repo>` (skip if you already commented), then output $status=insufficient_info
+      5. If sufficient: produce a detailed TDD test spec in markdown covering all scenarios
+
+      On subsequent runs (bounced back by tester with fix_spec):
+      1. Read the tester's output from the previous step to understand what's wrong with the spec
+      2. Revise the test spec accordingly
+
+      After producing the test spec:
+      1. Store it via `uwf cas put-text "<markdown content>"` and capture the returned hash
+      2. Put the hash in frontmatter.plan (required when $status=ready)
+      3. Set repoPath to the absolute path of the repository root
+    output: "Output a brief summary of the test spec. Set $status to ready (with plan hash and repoPath) or insufficient_info."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "ready" }
+            plan: { type: string }
+            repoPath: { type: string }
+          required: [$status, plan, repoPath]
+        - properties:
+            $status: { const: "insufficient_info" }
+          required: [$status]
  developer:
-    description: "Implements code changes"
-    identity: "You are a developer agent. You implement code changes according to plans."
-    prepare: "Load coding tools and review the project structure and conventions."
-    execute: "Implement the plan. Write code, tests, and ensure existing tests pass."
-    report: "List all files changed and provide a summary of the implementation."
-    outputSchema:
-      type: object
-      properties:
-        filesChanged:
-          type: array
-          items:
-            type: string
-        summary:
-          type: string
-      required: [filesChanged, summary]
+    description: "TDD implementation per test spec"
+    goal: "You are a developer agent. You implement code changes following TDD — write tests first, then implementation."
+    capabilities:
+      - coding
+    procedure: |
+      IMPORTANT: Always work in a git worktree, NEVER modify the main working directory directly.
+      The repo path and other details are provided in your task prompt.
+
+      Before starting any work, set up an isolated worktree:
+      1. cd into the repo path provided in your task prompt
+      2. `git fetch origin` to get latest refs
+      3. First time (no existing branch):
+         - `git worktree add .worktrees/fix/<issue-number>-<short-slug> -b fix/<issue-number>-<short-slug> origin/main`
+         - `cd .worktrees/fix/<issue-number>-<short-slug> && bun install`
+      4. If bounced back from reviewer or tester (branch already exists):
+         - cd into the existing worktree under `.worktrees/fix/<issue-number>-<short-slug>`
+         - `git fetch origin && git rebase origin/main`
+      5. ALL subsequent work must happen inside the worktree directory.
+
+      Then implement TDD:
+      6. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the planner's output in your task prompt)
+      7. If bounced back from reviewer or tester: read the previous role's feedback in your task prompt
+      8. Write tests first based on the spec
+      9. Implement the code to make tests pass
+      10. Ensure `bun run build` passes with no errors
+      11. Run `bun test` to verify all tests pass
+
+      If you cannot complete the implementation (e.g. the issue is too complex, blocked by external factors,
+      or repeated attempts fail), set $status=failed with a reason.
+    output: "List all files changed and provide a summary. Set $status to done (with branch/worktree), or failed (with reason)."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "done" }
+            branch: { type: string }
+            worktree: { type: string }
+          required: [$status, branch, worktree]
+        - properties:
+            $status: { const: "failed" }
+            reason: { type: string }
+          required: [$status, reason]
  reviewer:
-    description: "Reviews code changes"
-    identity: "You are a code reviewer. You review implementations for correctness and quality."
-    prepare: "Review the project's coding standards and conventions."
-    execute: "Review the implementation against the plan. Check for bugs, edge cases, and style."
-    report: "Approve or reject with detailed comments explaining your decision."
-    outputSchema:
-      type: object
-      properties:
-        approved:
-          type: boolean
-        comments:
-          type: string
-      required: [approved, comments]
-conditions:
-  notApproved:
-    description: "Reviewer rejected the implementation"
-    expression: "steps[-1].output.approved = false"
+    description: "Code standards compliance check"
+    goal: "You are a code reviewer. You verify code standards compliance — NOT functionality (that's the tester's job)."
+    capabilities:
+      - code-review
+      - static-analysis
+    procedure: |
+      The worktree path is provided in your task prompt. cd into it first.
+
+      Before reviewing, verify the git branch:
+      1. Run `git branch --show-current` — confirm the branch name references the issue number being worked on
+      2. If the branch doesn't correspond to the issue, flag it in your output and reject
+
+      Then perform code review:
+      Hard checks (must all pass):
+      3. `bun run build` — no build errors
+      4. `bunx biome check` — no lint violations
+      5. TypeScript strict mode — no type errors
+
+      Soft checks (review against project conventions if CLAUDE.md / .cursor/rules exist):
+      - Naming conventions, module boundaries, code style
+      - No `console.log` in production code
+      - No dynamic imports in production code
+
+      Only review standards compliance. Do NOT test functionality.
+      If rejecting, you MUST explain the specific reason in your output.
+    output: "Explain your decision with specific file/line references. Set $status to approved (with branch/worktree) or rejected (with comments)."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "approved" }
+            branch: { type: string }
+            worktree: { type: string }
+          required: [$status, branch, worktree]
+        - properties:
+            $status: { const: "rejected" }
+            comments: { type: string }
+            worktree: { type: string }
+          required: [$status, comments, worktree]
+  tester:
+    description: "Functional correctness verification"
+    goal: "You are a tester agent. You verify that the implementation correctly satisfies every scenario in the test spec."
+    capabilities:
+      - testing
+    procedure: |
+      The worktree path is provided in your task prompt. cd into it first.
+
+      1. Run `bun test` for automated test verification
+      2. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the planner step in the thread history)
+      3. Verify each scenario in the spec is covered and passing
+      4. Determine outcome:
+         - passed: all scenarios verified, tests pass
+         - fix_code: tests fail or implementation doesn't match spec → send back to developer
+         - fix_spec: the spec itself is wrong or incomplete → send back to planner
+    output: "Report test results per scenario. Set $status to passed (with branch/worktree), fix_code (with report), or fix_spec (with report)."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "passed" }
+            branch: { type: string }
+            worktree: { type: string }
+          required: [$status, branch, worktree]
+        - properties:
+            $status: { const: "fix_code" }
+            report: { type: string }
+          required: [$status, report]
+        - properties:
+            $status: { const: "fix_spec" }
+            report: { type: string }
+          required: [$status, report]
+  committer:
+    description: "Commits and creates PR"
+    goal: "You are a committer agent. You create a clean commit and push a PR linking the original issue."
+    capabilities: []
+    procedure: |
+      The worktree path, branch name, and repo info are provided in your task prompt.
+      cd into the worktree first.
+
+      Note: You inherit the developer's worktree and branch. Do NOT create a new branch.
+      1. Stage all changes: `git add -A`
+      2. Commit with a descriptive message referencing the issue: `git commit -m "type: description\n\nFixes #N"`
+      3. Push the branch: `git push -u origin <branch-name>`
+         - If push hook fails: capture the error log in your output, mark hook_failed
+      4. On push success: create a PR via `tea pr create --repo <owner/repo> --title "..." --description "..."`
+         - Extract owner/repo from: `git remote get-url origin | sed 's/.*[:/]\([^/]*\/[^.]*\).*/\1/'`
+         - PR description must include: What / Why / Changes / Ref sections, with `Fixes #N` in Ref
+         - On tea failure: capture stderr/stdout, include PR details for manual creation, mark hook_failed
+      5. After PR creation, clean up the worktree:
+         - cd to the repo root (parent of .worktrees)
+         - `git worktree remove <worktree-path>`
+    output: "Include PR URL on success or error log on failure. Set $status to committed (with prUrl) or hook_failed (with error)."
+    frontmatter:
+      oneOf:
+        - properties:
+            $status: { const: "committed" }
+            prUrl: { type: string }
+          required: [$status, prUrl]
+        - properties:
+            $status: { const: "hook_failed" }
+            error: { type: string }
+          required: [$status, error]
 graph:
  $START:
-    - role: "planner"
-      condition: null
+    _: { role: "planner", prompt: "Analyze the issue and produce an implementation plan." }
  planner:
-    - role: "developer"
-      condition: null
+    insufficient_info: { role: "$END", prompt: "Insufficient information to proceed; end the workflow." }
+    ready: { role: "developer", prompt: "Implement the TDD test spec (CAS hash: {{{plan}}}) in repo {{{repoPath}}}." }
  developer:
-    - role: "reviewer"
-      condition: null
+    done: { role: "reviewer", prompt: "Review branch {{{branch}}} at {{{worktree}}} for code standards compliance." }
+    failed: { role: "$END", prompt: "Developer failed: {{{reason}}}. Ending workflow." }
  reviewer:
-    - role: "developer"
-      condition: "notApproved"
-    - role: "$END"
-      condition: null
+    rejected: { role: "developer", prompt: "Reviewer rejected: {{{comments}}}. Fix the issues in repo {{{worktree}}}." }
+    approved: { role: "tester", prompt: "Review passed. Run tests on branch {{{branch}}} at {{{worktree}}}." }
+  tester:
+    fix_code: { role: "developer", prompt: "Tests found code issues: {{{report}}}. Fix and re-submit." }
+    fix_spec: { role: "planner", prompt: "Tests found spec issues: {{{report}}}. Revise the test spec." }
+    passed: { role: "committer", prompt: "All tests passed. Commit and push branch {{{branch}}} from {{{worktree}}}." }
+  committer:
+    hook_failed: { role: "developer", prompt: "Push hook failed: {{{error}}}. Fix and re-submit." }
+    committed: { role: "$END", prompt: "PR created: {{{prUrl}}}. Workflow complete." }
@@ -1,6 +1,6 @@
 import { describe, expect, test } from "bun:test";
-import { packageDescriptor } from "../src/package-descriptor.js";
 import { createDocxDiffAgent } from "../src/agent.js";
+import { packageDescriptor } from "../src/package-descriptor.js";

 describe("createDocxDiffAgent", () => {
  test("returns an AdapterFn (function)", () => {
@@ -1,8 +1,8 @@
-import { mkdirSync, writeFileSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
 import { describe, expect, mock, test } from "bun:test";
-import { ok, err } from "@uncaged/workflow-util";
+import { mkdirSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { err, ok } from "@uncaged/workflow-util";
 import type { SpawnCliConfig } from "@uncaged/workflow-util-agent";
 import { runDocxDiff } from "../src/runner.js";

@@ -74,7 +74,12 @@ describe("runDocxDiff", () => {
  test("exit 2: throws error", async () => {
    const dir = tempDir();
    const spawnFn = makeSpawn(
-      err({ kind: "non_zero_exit", exitCode: 2, stdout: "", stderr: "fatal error" }) as MockSpawnResult,
+      err({
+        kind: "non_zero_exit",
+        exitCode: 2,
+        stdout: "",
+        stderr: "fatal error",
+      }) as MockSpawnResult,
    );

    await expect(
@@ -1,7 +1,11 @@
 {
  "name": "@uncaged/workflow-agent-docx-diff",
  "version": "0.1.0",
-  "files": ["src", "dist", "package.json"],
+  "files": [
+    "src",
+    "dist",
+    "package.json"
+  ],
  "type": "module",
  "types": "src/index.ts",
  "exports": {
@@ -1,7 +1,12 @@
-import * as z from "zod/v4";
 import { dirname, join } from "node:path";
-import type { AdapterFn, RoleResult, ThreadContext, WorkflowRuntime } from "@uncaged/workflow-runtime";
+import type {
+  AdapterFn,
+  RoleResult,
+  ThreadContext,
+  WorkflowRuntime,
+} from "@uncaged/workflow-runtime";
 import type { WriterMeta } from "@uncaged/workflow-template-document";
+import type * as z from "zod/v4";
 import { runDocxDiff } from "./runner.js";
 import type { DocxDiffAgentConfig } from "./types.js";

@@ -12,16 +17,10 @@ export function createDocxDiffAgent(config: DocxDiffAgentConfig): AdapterFn {
      if (writerStep === undefined) throw new Error("differ: no writer step found");

      const writerMeta = writerStep.meta as WriterMeta;
-      if (writerMeta.mode !== "edit")
-        throw new Error("differ: writer did not run in edit mode");
+      if (writerMeta.mode !== "edit") throw new Error("differ: writer did not run in edit mode");

      const diffDocx = join(dirname(writerMeta.outputDocx), "diff.docx");
-      const raw = await runDocxDiff(
-        config,
-        writerMeta.sourceDocx,
-        writerMeta.outputDocx,
-        diffDocx,
-      );
+      const raw = await runDocxDiff(config, writerMeta.sourceDocx, writerMeta.outputDocx, diffDocx);

      const meta = schema.parse(JSON.parse(raw)) as T;
      return { meta, childThread: null };
@@ -1,6 +1,6 @@
 import { stat } from "node:fs/promises";
-import { spawnCli } from "@uncaged/workflow-util-agent";
 import type { SpawnCliError } from "@uncaged/workflow-util-agent";
+import { spawnCli } from "@uncaged/workflow-util-agent";
 import type { DocxDiffAgentConfig } from "./types.js";

 type SpawnCliFn = typeof spawnCli;
@@ -8,8 +8,7 @@ type SpawnCliFn = typeof spawnCli;
 function throwSpawnError(e: SpawnCliError): never {
  if (e.kind === "non_zero_exit")
    throw new Error(`docx-diff failed (exit ${e.exitCode}): ${e.stderr}`);
-  if (e.kind === "timeout")
-    throw new Error("docx-diff: timed out");
+  if (e.kind === "timeout") throw new Error("docx-diff: timed out");
  throw new Error(`docx-diff: spawn failed: ${e.message}`);
 }

@@ -1,6 +1,6 @@
 import { describe, expect, test } from "bun:test";
-import { packageDescriptor } from "../src/package-descriptor.js";
 import { createOfficeAgent } from "../src/agent.js";
+import { packageDescriptor } from "../src/package-descriptor.js";

 describe("createOfficeAgent", () => {
  test("returns an AdapterFn (function)", () => {
@@ -1,8 +1,8 @@
-import { mkdirSync, writeFileSync } from "node:fs";
-import { join } from "node:path";
-import { tmpdir } from "node:os";
 import { describe, expect, mock, test } from "bun:test";
-import { ok, err } from "@uncaged/workflow-util";
+import { mkdirSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { err, ok } from "@uncaged/workflow-util";
 import type { SpawnCliConfig } from "@uncaged/workflow-util-agent";
 import { editDocument, generateDocument } from "../src/runner.js";

@@ -123,7 +123,13 @@ describe("editDocument", () => {
    );

    await expect(
-      editDocument({ outputDir: base, command: null, timeout: null }, "te2", "edit", inputFile, spawnFn),
+      editDocument(
+        { outputDir: base, command: null, timeout: null },
+        "te2",
+        "edit",
+        inputFile,
+        spawnFn,
+      ),
    ).rejects.toThrow("spawn failed");
  });
 });
@@ -1,7 +1,11 @@
 {
  "name": "@uncaged/workflow-agent-office",
  "version": "0.1.0",
-  "files": ["src", "dist", "package.json"],
+  "files": [
+    "src",
+    "dist",
+    "package.json"
+  ],
  "type": "module",
  "types": "src/index.ts",
  "exports": {
@@ -1,6 +1,11 @@
-import * as z from "zod/v4";
-import type { AdapterFn, RoleResult, ThreadContext, WorkflowRuntime } from "@uncaged/workflow-runtime";
+import type {
+  AdapterFn,
+  RoleResult,
+  ThreadContext,
+  WorkflowRuntime,
+} from "@uncaged/workflow-runtime";
 import { createLogger } from "@uncaged/workflow-util";
+import type * as z from "zod/v4";
 import { editDocument, generateDocument } from "./runner.js";
 import type { OfficeAgentConfig } from "./types.js";

@@ -27,7 +32,10 @@ export function createOfficeAgent(config: OfficeAgentConfig): AdapterFn {
  return <T>(_systemPrompt: string, schema: z.ZodType<T>) =>
    async (ctx: ThreadContext, _runtime: WorkflowRuntime): Promise<RoleResult<T>> => {
      const { prompt, inputDocx } = parseStartInput(ctx.start.content);
-      log("8FQKP3NV", `office-agent: mode=${inputDocx === null ? "generate" : "edit"} thread=${ctx.threadId}`);
+      log(
+        "8FQKP3NV",
+        `office-agent: mode=${inputDocx === null ? "generate" : "edit"} thread=${ctx.threadId}`,
+      );

      let raw: string;
      if (inputDocx === null) {
@@ -35,7 +43,11 @@ export function createOfficeAgent(config: OfficeAgentConfig): AdapterFn {
        raw = JSON.stringify({ mode: "generate", outputDocx: result.outputDocx, sourceDocx: null });
      } else {
        const result = await editDocument(config, ctx.threadId, prompt, inputDocx);
-        raw = JSON.stringify({ mode: "edit", outputDocx: result.outputDocx, sourceDocx: result.sourceDocx });
+        raw = JSON.stringify({
+          mode: "edit",
+          outputDocx: result.outputDocx,
+          sourceDocx: result.sourceDocx,
+        });
      }

      const meta = schema.parse(JSON.parse(raw)) as T;
@@ -1,7 +1,7 @@
 import { copyFile, mkdir, stat } from "node:fs/promises";
 import { join } from "node:path";
-import { spawnCli } from "@uncaged/workflow-util-agent";
 import type { SpawnCliError } from "@uncaged/workflow-util-agent";
+import { spawnCli } from "@uncaged/workflow-util-agent";
 import type { OfficeAgentConfig } from "./types.js";

 type SpawnCliFn = typeof spawnCli;
@@ -9,8 +9,7 @@ type SpawnCliFn = typeof spawnCli;
 function throwSpawnError(e: SpawnCliError): never {
  if (e.kind === "non_zero_exit")
    throw new Error(`office-agent failed (exit ${e.exitCode}): ${e.stderr}`);
-  if (e.kind === "timeout")
-    throw new Error("office-agent: timed out");
+  if (e.kind === "timeout") throw new Error("office-agent: timed out");
  throw new Error(`office-agent: spawn failed: ${e.message}`);
 }

@@ -5,7 +5,7 @@
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Workflow Dashboard</title>
    <script>
-      (function () {
+      (() => {
        var t = localStorage.getItem("theme");
        if (t === "dark" || (!t && matchMedia("(prefers-color-scheme: dark)").matches)) {
          document.documentElement.classList.add("dark");
@@ -54,10 +54,14 @@ type CallExpression = {
  arguments: Array<AstExpression>;
 };

-type AstExpression = Identifier | MemberExpression | CallExpression | {
-  type: string;
-  [key: string]: unknown;
-};
+type AstExpression =
+  | Identifier
+  | MemberExpression
+  | CallExpression
+  | {
+      type: string;
+      [key: string]: unknown;
+    };

 type VariableDeclarator = {
  id: Identifier | null;
@@ -258,15 +262,21 @@ function createLimitResolver(options: LimitLineOptions): (id: string) => Resolve
 }

 function shouldProcess(id: string, options: LimitLineOptions): boolean {
-  return options.include.test(id) && !id.includes("node_modules") && (options.exclude === null || !options.exclude.test(id));
+  return (
+    options.include.test(id) &&
+    !id.includes("node_modules") &&
+    (options.exclude === null || !options.exclude.test(id))
+  );
 }

 // --- Plugin ---

-function viteLimitLinePlugin(
-  userOptions: Partial<LimitLineOptions> = {},
-): Array<Plugin> {
-  const options: LimitLineOptions = { ...DEFAULT_OPTIONS, ...userOptions, overrides: userOptions.overrides ?? [] };
+function viteLimitLinePlugin(userOptions: Partial<LimitLineOptions> = {}): Array<Plugin> {
+  const options: LimitLineOptions = {
+    ...DEFAULT_OPTIONS,
+    ...userOptions,
+    overrides: userOptions.overrides ?? [],
+  };
  const resolve = createLimitResolver(options);

  const rawCodeCache = new Map<string, string>();
@@ -358,5 +368,5 @@ function viteLimitLinePlugin(
  ];
 }

-export { viteLimitLinePlugin };
 export type { LimitLineOptions, LimitLineOverride };
+export { viteLimitLinePlugin };
@@ -55,10 +55,7 @@ export function ResizablePanel({
  }, []);

  return (
-    <div
-      className={cn("relative shrink-0", className)}
-      style={{ ...style, width }}
-    >
+    <div className={cn("relative shrink-0", className)} style={{ ...style, width }}>
      {children}
      <div
        className="absolute top-0 -right-1 w-2 h-full cursor-col-resize z-10 group"
@@ -531,13 +531,25 @@ export async function executeThread(
      timestamp: nowMs,
      parentState: options.parentStateHash,
    },
-    steps: input.steps.map((out, i) => ({
-      role: out.role,
-      contentHash: out.contentHash,
-      meta: out.meta,
-      refs: out.refs,
-      timestamp: replayTs?.[i] ?? prefilled?.[i]?.timestamp ?? nowMs + i,
-    })),
+    steps: await Promise.all(
+      input.steps.map(async (out, i) => {
+        // Resolve content for the last step (most relevant for the next agent).
+        // Earlier steps only carry meta summaries to avoid bloating the prompt.
+        const isLast = i === input.steps.length - 1;
+        let content: string | null = null;
+        if (isLast) {
+          content = await getContentMerklePayload(io.cas, out.contentHash);
+        }
+        return {
+          role: out.role,
+          contentHash: out.contentHash,
+          content,
+          meta: out.meta,
+          refs: out.refs,
+          timestamp: replayTs?.[i] ?? prefilled?.[i]?.timestamp ?? nowMs + i,
+        };
+      }),
+    ),
  };

  const runtime: WorkflowRuntime = {
@@ -0,0 +1,61 @@
+# @uncaged/workflow-moderator
+
+Status-based graph evaluator — determines the next role or `$END` with zero LLM cost.
+
+## Overview
+
+The moderator (Layer 1) performs a status-based map lookup on the workflow graph. Given the last role and its output, it looks up `graph[lastRole][lastOutput.status]` to find the next `Target` (role + prompt template). The prompt is rendered via Mustache with `lastOutput` as the template context. For `$START`, the unit status `_` is used.
+
+**Dependencies:** `@uncaged/workflow-protocol`, `mustache`
+
+## Installation
+
+```bash
+bun add @uncaged/workflow-moderator
+```
+
+## API
+
+### Functions
+
+```typescript
+function evaluate(
+  graph: Record<string, Record<string, Target>>,
+  lastRole: string,
+  lastOutput: Record<string, unknown> & { status: string },
+): Result<EvaluateResult, Error>
+```
+
+Returns `{ ok: true, value: { role, prompt } }` where `role` is the next role name or `"$END"`, and `prompt` is the rendered edge instruction for the agent.
+
+### Types
+
+```typescript
+type EvaluateResult = {
+  role: string;
+  prompt: string;
+};
+```
+
+The `Result<T, E>` type is local to this package (`{ ok: true; value: T } | { ok: false; error: E }`), not re-exported from `index.ts`.
+
+## Usage
+
+```typescript
+import { evaluate } from "@uncaged/workflow-moderator";
+import type { Target } from "@uncaged/workflow-protocol";
+
+const result = evaluate(graph, lastRole, lastOutput);
+if (result.ok && result.value.role !== "$END") {
+  console.log(`Next role: ${result.value.role}, prompt: ${result.value.prompt}`);
+}
+```
+
+## Internal Structure
+
+```
+src/
+├── index.ts      Public exports
+├── evaluate.ts   Status-based map lookup + Mustache prompt rendering
+└── types.ts      EvaluateResult, Result
+```
@@ -0,0 +1,132 @@
+import { describe, expect, test } from "bun:test";
+import type { Target, WorkflowPayload } from "@uncaged/workflow-protocol";
+
+import { evaluate } from "../src/evaluate.js";
+
+const solveIssueGraph: WorkflowPayload["graph"] = {
+  $START: {
+    _: { role: "planner", prompt: "Start planning from the issue in the task." },
+  },
+  planner: {
+    _: { role: "developer", prompt: "Implement the plan: {{plan}}" },
+  },
+  developer: {
+    _: { role: "reviewer", prompt: "Review the changes: {{summary}}" },
+  },
+  reviewer: {
+    approved: { role: "$END", prompt: "Done." },
+    rejected: { role: "developer", prompt: "Fix: {{comments}}" },
+  },
+};
+
+describe("evaluate", () => {
+  test("$START → first role (unit status _)", () => {
+    const result = evaluate(solveIssueGraph, "$START", { $status: "_" });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "planner", prompt: "Start planning from the issue in the task." },
+    });
+  });
+
+  test("status-based routing (reviewer rejected → developer)", () => {
+    const result = evaluate(solveIssueGraph, "reviewer", {
+      $status: "rejected",
+      comments: "missing tests",
+    });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "developer", prompt: "Fix: missing tests" },
+    });
+  });
+
+  test("status-based routing (reviewer approved → $END)", () => {
+    const result = evaluate(solveIssueGraph, "reviewer", { $status: "approved" });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "$END", prompt: "Done." },
+    });
+  });
+
+  test("missing role in graph → error", () => {
+    const result = evaluate(solveIssueGraph, "unknown-role", { $status: "_" });
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.message).toBe('no transitions defined for role "unknown-role"');
+    }
+  });
+
+  test("missing status in graph → error", () => {
+    const result = evaluate(solveIssueGraph, "reviewer", { $status: "pending" });
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.message).toBe('no transition for role "reviewer" with status "pending"');
+    }
+  });
+
+  test("mustache template rendering with simple fields", () => {
+    const result = evaluate(solveIssueGraph, "planner", {
+      $status: "_",
+      plan: "Add auth middleware",
+    });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "developer", prompt: "Implement the plan: Add auth middleware" },
+    });
+  });
+
+  test("mustache does not HTML-escape prompt content", () => {
+    const result = evaluate(solveIssueGraph, "reviewer", {
+      $status: "rejected",
+      comments: 'use <T> & "Result<T, E>" types',
+    });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "developer", prompt: 'Fix: use <T> & "Result<T, E>" types' },
+    });
+  });
+
+  test("triple mustache also works for unescaped output", () => {
+    const graph: Record<string, Record<string, Target>> = {
+      reviewer: {
+        _: { role: "developer", prompt: "Fix: {{{comments}}}" },
+      },
+    };
+    const result = evaluate(graph, "reviewer", {
+      $status: "_",
+      comments: "<script>alert(1)</script>",
+    });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "developer", prompt: "Fix: <script>alert(1)</script>" },
+    });
+  });
+
+  test("missing $status defaults to _ (unit routing)", () => {
+    const result = evaluate(solveIssueGraph, "planner", {
+      plan: "Add auth middleware",
+    });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "developer", prompt: "Implement the plan: Add auth middleware" },
+    });
+  });
+
+  test("mustache template with nested object paths", () => {
+    const graph: Record<string, Record<string, Target>> = {
+      reviewer: {
+        _: {
+          role: "developer",
+          prompt: "Address: {{review.comments}}",
+        },
+      },
+    };
+    const result = evaluate(graph, "reviewer", {
+      $status: "_",
+      review: { comments: "refactor the handler" },
+    });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "developer", prompt: "Address: refactor the handler" },
+    });
+  });
+});
@@ -15,16 +15,28 @@
    }
  },
  "scripts": {
-    "test": "bun test"
+    "test": "bun test",
+    "test:ci": "bun test"
  },
  "dependencies": {
    "@uncaged/workflow-protocol": "workspace:^",
-    "jsonata": "^1.8.7"
+    "mustache": "^4.2.0"
  },
  "devDependencies": {
+    "@types/mustache": "^4.2.6",
    "typescript": "^5.8.3"
  },
  "publishConfig": {
    "access": "public"
-  }
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/shazhou-ww/uncaged-workflow.git",
+    "directory": "legacy-packages/workflow-moderator"
+  },
+  "homepage": "https://github.com/shazhou-ww/uncaged-workflow#readme",
+  "bugs": {
+    "url": "https://github.com/shazhou-ww/uncaged-workflow/issues"
+  },
+  "license": "MIT"
 }
@@ -0,0 +1,53 @@
+import type { Target } from "@uncaged/workflow-protocol";
+import mustache from "mustache";
+
+import type { EvaluateResult, Result } from "./types.js";
+
+// Disable HTML escaping — prompts are plain text, not HTML.
+mustache.escape = (text: string) => text;
+
+const START_ROLE = "$START";
+const UNIT_STATUS = "_";
+
+type LastOutput = Record<string, unknown>;
+
+const STATUS_KEY = "$status";
+
+export function evaluate(
+  graph: Record<string, Record<string, Target>>,
+  lastRole: string,
+  lastOutput: LastOutput,
+): Result<EvaluateResult, Error> {
+  const status =
+    lastRole === START_ROLE
+      ? UNIT_STATUS
+      : typeof lastOutput[STATUS_KEY] === "string"
+        ? (lastOutput[STATUS_KEY] as string)
+        : UNIT_STATUS;
+
+  const roleTargets = graph[lastRole];
+  if (roleTargets === undefined) {
+    return {
+      ok: false,
+      error: new Error(`no transitions defined for role "${lastRole}"`),
+    };
+  }
+
+  const target = roleTargets[status];
+  if (target === undefined) {
+    return {
+      ok: false,
+      error: new Error(`no transition for role "${lastRole}" with status "${status}"`),
+    };
+  }
+
+  try {
+    const prompt = mustache.render(target.prompt, lastOutput);
+    return { ok: true, value: { role: target.role, prompt } };
+  } catch (error) {
+    return {
+      ok: false,
+      error: error instanceof Error ? error : new Error(String(error)),
+    };
+  }
+}
@@ -0,0 +1,2 @@
+export { evaluate } from "./evaluate.js";
+export type { EvaluateResult } from "./types.js";
@@ -0,0 +1,7 @@
+export type Result<T, E> = { ok: true; value: T } | { ok: false; error: E };
+
+/** The result of moderator evaluation — which role to go to, and the edge prompt. */
+export type EvaluateResult = {
+  role: string;
+  prompt: string;
+};
@@ -71,6 +71,7 @@ export type RoleStep<M extends RoleMeta> = {
    role: K;
    meta: M[K];
    contentHash: string;
+    content: string | null;
    refs: string[];
    timestamp: number;
  };
@@ -71,7 +71,8 @@ async function buildRoleStepsFromStates<M extends RoleMeta>(
  cas: CasStore,
 ): Promise<RoleStep<M>[]> {
  const steps: RoleStep<M>[] = [];
-  for (const st of chronologicalStates) {
+  for (let idx = 0; idx < chronologicalStates.length; idx++) {
+    const st = chronologicalStates[idx];
    if (st.payload.role === END) {
      continue;
    }
@@ -79,10 +80,13 @@ async function buildRoleStepsFromStates<M extends RoleMeta>(
    if (contentParsed === null || contentParsed.kind !== "content") {
      throw new Error(`buildThreadContext: expected content node at ${st.payload.content}`);
    }
+    // Resolve full text content for the last step only
+    const isLast = idx === chronologicalStates.length - 1;
    steps.push({
      role: st.payload.role,
      meta: st.payload.meta,
      contentHash: st.payload.content,
+      content: isLast ? contentParsed.node.payload : null,
      refs: [...contentParsed.node.refs],
      timestamp: st.payload.timestamp,
    } as RoleStep<M>);
@@ -88,6 +88,7 @@ async function advanceOneRound<M extends RoleMeta>(
  const step = {
    role: next,
    contentHash,
+    content: contentPayload,
    meta,
    refs,
    timestamp: Date.now(),
@@ -9,9 +9,7 @@ import type { DocumentMeta } from "../src/roles.js";

 const documentModerator = tableToModerator(documentTable);

-function makeCtx(
-  steps: ModeratorContext<DocumentMeta>["steps"],
-): ModeratorContext<DocumentMeta> {
+function makeCtx(steps: ModeratorContext<DocumentMeta>["steps"]): ModeratorContext<DocumentMeta> {
  return {
    threadId: "01TEST000000000000000000TR",
    depth: 0,
@@ -25,7 +23,11 @@ function writerGenerateStep(): RoleStep<DocumentMeta> {
  return {
    role: "writer",
    contentHash: "STUBHASHWRITER001",
-    meta: { mode: "generate", outputDocx: "/out/output.docx", sourceDocx: null } satisfies WriterMeta,
+    meta: {
+      mode: "generate",
+      outputDocx: "/out/output.docx",
+      sourceDocx: null,
+    } satisfies WriterMeta,
    refs: [],
    timestamp: 1,
  };
@@ -35,7 +37,11 @@ function writerEditStep(): RoleStep<DocumentMeta> {
  return {
    role: "writer",
    contentHash: "STUBHASHWRITER002",
-    meta: { mode: "edit", outputDocx: "/out/modified.docx", sourceDocx: "/out/original.docx" } satisfies WriterMeta,
+    meta: {
+      mode: "edit",
+      outputDocx: "/out/modified.docx",
+      sourceDocx: "/out/original.docx",
+    } satisfies WriterMeta,
    refs: [],
    timestamp: 1,
  };
@@ -1,7 +1,11 @@
 {
  "name": "@uncaged/workflow-template-document",
  "version": "0.1.0",
-  "files": ["src", "dist", "package.json"],
+  "files": [
+    "src",
+    "dist",
+    "package.json"
+  ],
  "type": "module",
  "types": "src/index.ts",
  "exports": {
@@ -30,7 +30,7 @@ describe("buildAgentPrompt", () => {
    expect(text).not.toContain("## Tools");
  });

-  test("single step shows hash and meta, and includes tools", async () => {
+  test("single step shows meta and content, and includes tools", async () => {
    const onlyHash = "01HASHSINGLESTEP0000000001";
    const ctx: AgentContext = {
      start: startTask("user task"),
@@ -42,6 +42,7 @@ describe("buildAgentPrompt", () => {
        {
          role: "coder",
          contentHash: onlyHash,
+          content: "Here is my implementation of the feature.",
          meta: { files: ["a.ts"] },
          refs: [onlyHash],
          timestamp: 2,
@@ -52,13 +53,39 @@ describe("buildAgentPrompt", () => {
    expect(text).toContain("## Task");
    expect(text).toContain("user task");
    expect(text).toContain("## Step: coder");
-    expect(text).toContain(`ContentHash: ${onlyHash}`);
    expect(text).toContain('Meta: {"files":["a.ts"]}');
+    expect(text).toContain("<output>");
+    expect(text).toContain("Here is my implementation of the feature.");
+    expect(text).toContain("</output>");
    expect(text).toContain("## Tools");
    expect(text).toContain("uncaged-workflow thread 01TEST000000000000000000TR");
  });

-  test("two or more steps: previous steps are meta-only; latest step includes hash", async () => {
+  test("single step with null content omits output tag", async () => {
+    const onlyHash = "01HASHSINGLESTEP0000000001";
+    const ctx: AgentContext = {
+      start: startTask("user task"),
+      depth: 0,
+      bundleHash: "TESTHASH00001",
+      threadId: "01TEST000000000000000000TR",
+      currentRole: { name: "coder", systemPrompt: "Be helpful." },
+      steps: [
+        {
+          role: "coder",
+          contentHash: onlyHash,
+          content: null,
+          meta: { files: ["a.ts"] },
+          refs: [onlyHash],
+          timestamp: 2,
+        },
+      ],
+    };
+    const text = await buildAgentPrompt(ctx);
+    expect(text).not.toContain("<output>");
+    expect(text).toContain('Meta: {"files":["a.ts"]}');
+  });
+
+  test("two or more steps: previous steps are meta-only; latest step includes content", async () => {
    const plannerHash = "01HASHPLANNER0000000000001";
    const coderHash = "01HASHCODER0000000000000001";
    const ctx: AgentContext = {
@@ -71,6 +98,7 @@ describe("buildAgentPrompt", () => {
        {
          role: "planner",
          contentHash: plannerHash,
+          content: null,
          meta: { plan: "short" },
          refs: [plannerHash],
          timestamp: 2,
@@ -78,6 +106,7 @@ describe("buildAgentPrompt", () => {
        {
          role: "coder",
          contentHash: coderHash,
+          content: "I reviewed the code and found 4 lint issues:\n1. Missing semicolon on line 42\n2. Unused import on line 3",
          meta: { done: true },
          refs: [coderHash],
          timestamp: 3,
@@ -90,10 +119,11 @@ describe("buildAgentPrompt", () => {
    expect(text).toContain("### Step 1: planner");
    expect(text).toContain('Summary: {"plan":"short"}');
    expect(text).toContain("## Latest Step: coder");
-    expect(text).toContain(`ContentHash: ${coderHash}`);
    expect(text).toContain('Meta: {"done":true}');
+    expect(text).toContain("<output>");
+    expect(text).toContain("I reviewed the code and found 4 lint issues:");
+    expect(text).toContain("</output>");
    expect(text).toContain("## Tools");
-    expect(text).toContain("uncaged-workflow thread 01TEST000000000000000000TR");
  });

  test("parentState null omits Parent Context section", async () => {
@@ -125,7 +155,7 @@ describe("buildAgentPrompt", () => {
    expect(text).toContain(`uncaged-workflow cas get ${parentHash}`);
  });

-  test("middle steps show meta summary only and latest shows hash", async () => {
+  test("middle steps show meta summary only and latest shows content", async () => {
    const ha = "01HASHA00000000000000000001";
    const hb = "01HASHB00000000000000000001";
    const hc = "01HASHC00000000000000000001";
@@ -139,6 +169,7 @@ describe("buildAgentPrompt", () => {
        {
          role: "a",
          contentHash: ha,
+          content: null,
          meta: { n: 1 },
          refs: [ha],
          timestamp: 2,
@@ -146,6 +177,7 @@ describe("buildAgentPrompt", () => {
        {
          role: "b",
          contentHash: hb,
+          content: null,
          meta: { n: 2 },
          refs: [hb],
          timestamp: 3,
@@ -153,6 +185,7 @@ describe("buildAgentPrompt", () => {
        {
          role: "c",
          contentHash: hc,
+          content: "Final output from role c",
          meta: { n: 3 },
          refs: [hc],
          timestamp: 4,
@@ -162,7 +195,35 @@ describe("buildAgentPrompt", () => {
    const text = await buildAgentPrompt(ctx);
    expect(text).toContain('Summary: {"n":1}');
    expect(text).toContain('Summary: {"n":2}');
-    expect(text).toContain(`ContentHash: ${hc}`);
    expect(text).toContain("## Latest Step: c");
+    expect(text).toContain("<output>");
+    expect(text).toContain("Final output from role c");
+    expect(text).toContain("</output>");
+  });
+
+  test("content is truncated when exceeding quota", async () => {
+    const longContent = "x".repeat(20_000);
+    const hash = "01HASHLONG000000000000000001";
+    const ctx: AgentContext = {
+      start: startTask("task"),
+      depth: 0,
+      bundleHash: "TESTHASH00001",
+      threadId: "01TEST000000000000000000TR",
+      currentRole: { name: "r", systemPrompt: "S" },
+      steps: [
+        {
+          role: "r",
+          contentHash: hash,
+          content: longContent,
+          meta: {},
+          refs: [],
+          timestamp: 2,
+        },
+      ],
+    };
+    const text = await buildAgentPrompt(ctx);
+    expect(text).toContain("<output>");
+    expect(text).toContain("... (truncated)");
+    expect(text.length).toBeLessThan(20_000);
  });
 });
@@ -5,20 +5,33 @@
    "packages/*"
  ],
  "scripts": {
+    "uwf": "bun packages/cli-workflow/src/cli.ts",
    "build": "bunx tsc --build",
    "check": "bunx tsc --build && biome check . && bash scripts/lint-log-tags.sh",
    "typecheck": "bunx tsc --build",
    "format": "biome format --write .",
-    "test": "bun run --filter '*' test",
+    "test": "bun run --filter './packages/*' test",
+    "test:ci": "bun run --filter './packages/*' test:ci",
    "changeset": "bunx changeset",
    "version": "bunx changeset version",
    "release": "bun run build && bun test && node scripts/publish-all.mjs"
  },
  "devDependencies": {
+    "@agentclientprotocol/sdk": "^0.22.1",
    "@biomejs/biome": "^2.4.14",
    "@changesets/cli": "^2.31.0",
    "@types/node": "^25.7.0",
    "@types/xxhashjs": "^0.2.4",
+    "@uncaged/workflow-agent-hermes": "workspace:*",
    "bun-types": "^1.3.13"
-  }
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/shazhou-ww/uncaged-workflow.git"
+  },
+  "homepage": "https://github.com/shazhou-ww/uncaged-workflow#readme",
+  "bugs": {
+    "url": "https://github.com/shazhou-ww/uncaged-workflow/issues"
+  },
+  "license": "MIT"
 }
@@ -0,0 +1,212 @@
+# @uncaged/cli-workflow
+
+`uwf` CLI — thread lifecycle, workflow registry, CAS inspection, and setup.
+
+## Overview
+
+Layer 4 entry point for the workflow engine. The `uwf` binary orchestrates one step per invocation: load thread head from `threads.yaml`, run the moderator, spawn the configured agent CLI, run extract, append a CAS step node, and update the head pointer (or archive when `$END`).
+
+### Four-Layer Architecture
+
+```
+workflow → thread → step → turn
+模板定义   执行实例   单步结果   agent内部交互
+```
+
+- **Workflow** (layer 1): YAML template with roles and routing graph
+- **Thread** (layer 2): Single workflow execution instance
+- **Step** (layer 3): One moderator→agent→extract cycle
+- **Turn** (layer 4): Agent-internal interactions (use `step show` or CAS to inspect)
+
+This package has no library `src/index.ts` — it is consumed as a CLI binary only.
+
+**Dependencies:** `@uncaged/json-cas`, `@uncaged/json-cas-fs`, `@uncaged/workflow-util-agent`, `@uncaged/workflow-protocol`, `@uncaged/workflow-util`, `commander`, `dotenv`, `mustache`, `yaml`
+
+## Installation
+
+Included as the `uwf` binary when you install `@uncaged/cli-workflow`:
+
+```bash
+bun add -g @uncaged/cli-workflow
+# or from the monorepo:
+bun link packages/cli-workflow
+```
+
+## CLI Usage
+
+### Global options
+
+```
+-V, --version          Show version
+--format <json|yaml>   Output format (default: json)
+-h, --help             Show help
+```
+
+### Thread (Layer 2: Execution Instances)
+
+| Command | Description |
+|---------|-------------|
+| `uwf thread start <workflow> -p <prompt>` | Create a thread without executing |
+| `uwf thread exec <thread-id> [--agent <cmd>] [-c <count>] [--background]` | Execute one or more moderator→agent→extract cycles |
+| `uwf thread show <thread-id>` | Show thread head pointer |
+| `uwf thread list [--status <status>] [--after <date>] [--before <date>] [--skip <n>] [--take <n>]` | List threads filtered by status (idle, running, completed, active, or comma-separated), time range (ISO or relative like '7d'), with pagination |
+| `uwf thread read <thread-id> [--quota N] [--before <hash>] [--start]` | Render thread as readable markdown |
+
+`thread read`, `step list`, and `step show` work on both active and completed threads.
+| `uwf thread stop <thread-id>` | Stop background execution (keep thread active) |
+| `uwf thread cancel <thread-id>` | Cancel thread (stop + archive to history) |
+
+Examples:
+
+```bash
+uwf thread start solve-issue -p "Fix the login redirect bug"
+uwf thread exec 01ARZ3NDEKTSV4RRFFQ69G5FAV
+uwf thread exec 01ARZ3NDEKTSV4RRFFQ69G5FAV -c 3 --agent uwf-builtin
+uwf thread exec 01ARZ3NDEKTSV4RRFFQ69G5FAV --background
+uwf thread list --status running
+uwf thread list --status active
+uwf thread list --status idle,completed
+uwf thread list --after 7d --take 10
+uwf thread read 01ARZ3NDEKTSV4RRFFQ69G5FAV --quota 8000
+uwf thread stop 01ARZ3NDEKTSV4RRFFQ69G5FAV
+```
+
+### Step (Layer 3: Single Cycle Results)
+
+| Command | Description |
+|---------|-------------|
+| `uwf step list <thread-id>` | List all steps in a thread chronologically |
+| `uwf step show <step-hash>` | Show step metadata and frontmatter |
+| `uwf step read <step-hash> [--quota <chars>]` | Read a step's turns as human-readable markdown |
+| `uwf step fork <step-hash>` | Fork a thread from a specific step |
+
+Examples:
+
+```bash
+uwf step list 01ARZ3NDEKTSV4RRFFQ69G5FAV
+uwf step show 32GCDE899RRQ3
+uwf step read 32GCDE899RRQ3 --quota 2000
+uwf step fork 32GCDE899RRQ3
+```
+
+### Workflow (Layer 1: Templates)
+
+| Command | Description |
+|---------|-------------|
+| `uwf workflow add <file.yaml>` | Register a workflow from YAML |
+| `uwf workflow show <name-or-hash>` | Show workflow definition |
+| `uwf workflow list` | List registered workflows |
+
+### CAS
+
+| Command | Description |
+|---------|-------------|
+| `uwf cas get <hash> [--timestamp]` | Read a CAS node |
+| `uwf cas put <type-hash> <data>` | Store a node, print hash |
+| `uwf cas put-text <text>` | Store plain text, print hash |
+| `uwf cas has <hash>` | Check existence |
+| `uwf cas refs <hash>` | List direct references |
+| `uwf cas walk <hash>` | Recursive traversal |
+| `uwf cas reindex` | Rebuild type index |
+| `uwf cas schema list` | List registered schemas |
+| `uwf cas schema get <hash>` | Show a schema |
+
+### Setup
+
+```bash
+uwf setup
+uwf setup --provider openai --base-url https://api.openai.com/v1 \
+  --api-key sk-... --model gpt-4o --agent hermes
+```
+
+Config: `~/.uncaged/workflow/config.yaml`. API keys: `~/.uncaged/workflow/.env`.
+
+### Skill
+
+| Command | Description |
+|---------|-------------|
+| `uwf skill cli` | Print markdown reference of all uwf commands (for agent skills) |
+
+### Log
+
+| Command | Description |
+|---------|-------------|
+| `uwf log list` | List log files with sizes |
+| `uwf log show [--thread <id>] [--process <pid>] [--date YYYY-MM-DD]` | Show filtered log entries |
+| `uwf log clean [--before YYYY-MM-DD]` | Delete old log files |
+
+## Migration Guide
+
+### Breaking Changes (v0.x → v1.x)
+
+The CLI was reorganized to clarify the four-layer architecture. **No backward compatibility** — old commands have been removed.
+
+#### Renamed Commands
+
+| Old Command | New Command | Notes |
+|------------|-------------|-------|
+| `workflow put` | `workflow add` | More intuitive verb |
+| `thread step` | `thread exec` | Eliminates ambiguity with "step" noun |
+| `thread list --all` | `thread list --status completed` | Unified status filtering |
+
+#### Removed Commands (Merged)
+
+| Old Command | New Command | Notes |
+|------------|-------------|-------|
+| `thread running` | `thread list --status running` | Merged into unified list |
+
+#### Removed Commands (Split)
+
+| Old Command | New Commands | Notes |
+|------------|-------------|-------|
+| `thread kill` | `thread stop` or `thread cancel` | `stop` keeps thread active, `cancel` archives it |
+
+#### Moved Commands
+
+| Old Command | New Command | Notes |
+|------------|-------------|-------|
+| `thread steps` | `step list` | Moved to step layer |
+| `thread step-details` | `step show` | Moved to step layer |
+| `thread fork` | `step fork` | Moved to step layer (forks are step-based) |
+
+#### Deprecation Errors
+
+Old commands now show helpful error messages:
+
+```bash
+$ uwf thread step 01ARZ3NDEKTSV4RRFFQ69G5FAV
+Error: Command 'thread step' has been removed.
+Use 'thread exec' instead.
+
+For more information, see: uwf help thread exec
+```
+
+## Internal Structure
+
+```
+src/
+├── cli.ts              Commander entrypoint, command registration
+├── format.ts           JSON/YAML output formatting
+├── store.ts            CAS store + registry initialization
+├── validate.ts         Workflow YAML validation
+├── schemas.ts          CLI-local schema registration
+├── moderator/          Status-based graph evaluator (next role or $END)
+└── commands/
+    ├── thread.ts       Thread lifecycle and exec
+    ├── step.ts         Step operations (list/show/read/fork)
+    ├── workflow.ts     Workflow registry (add/show/list)
+    ├── cas.ts          CAS inspection and schema ops
+    ├── setup.ts        Interactive/non-interactive setup
+    ├── skill.ts        Built-in skill references
+    └── log.ts          Process debug log management
+```
+
+## Configuration
+
+| File | Purpose |
+|------|---------|
+| `~/.uncaged/workflow/config.yaml` | Providers, models, default agent |
+| `~/.uncaged/workflow/.env` | API keys (referenced by `apiKeyEnv` in config) |
+| `~/.uncaged/workflow/registry.yaml` | Workflow name → CAS hash |
+| `~/.uncaged/workflow/threads.yaml` | Active thread head pointers |
+| `~/.uncaged/workflow/cas/` | Content-addressed node storage |
@@ -11,23 +11,35 @@
    "uwf": "./src/cli.ts"
  },
  "dependencies": {
-    "@uncaged/json-cas": "^0.4.0",
-    "@uncaged/json-cas-fs": "^0.4.0",
-    "@uncaged/workflow-agent-kit": "workspace:^",
-    "@uncaged/workflow-moderator": "workspace:^",
+    "@uncaged/json-cas": "^0.5.3",
+    "@uncaged/json-cas-fs": "^0.5.3",
    "@uncaged/workflow-protocol": "workspace:^",
    "@uncaged/workflow-util": "workspace:^",
+    "@uncaged/workflow-util-agent": "workspace:^",
    "commander": "^14.0.3",
    "dotenv": "^16.6.1",
+    "mustache": "^4.2.0",
    "yaml": "^2.8.4"
  },
  "scripts": {
-    "test": "vitest run"
+    "test": "vitest run",
+    "test:ci": "vitest run"
  },
  "publishConfig": {
    "access": "public"
  },
  "devDependencies": {
+    "@types/mustache": "^4.2.6",
    "vitest": "^4.1.6"
-  }
+  },
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/shazhou-ww/uncaged-workflow.git",
+    "directory": "packages/cli-workflow"
+  },
+  "homepage": "https://github.com/shazhou-ww/uncaged-workflow#readme",
+  "bugs": {
+    "url": "https://github.com/shazhou-ww/uncaged-workflow/issues"
+  },
+  "license": "MIT"
 }
@@ -0,0 +1,152 @@
+import { execSync } from "node:child_process";
+import { mkdir, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { cmdCasPutText } from "../commands/cas.js";
+
+let storageRoot: string;
+let uwfPath: string;
+
+beforeEach(async () => {
+  storageRoot = join(
+    tmpdir(),
+    `uwf-cas-exit-test-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+  );
+  await mkdir(storageRoot, { recursive: true });
+
+  // Find the uwf CLI path
+  uwfPath = join(__dirname, "../../src/cli.ts");
+});
+
+afterEach(async () => {
+  await rm(storageRoot, { recursive: true, force: true });
+});
+
+type ExecResult = {
+  stdout: string;
+  stderr: string;
+  exitCode: number;
+};
+
+function execUwf(args: string[]): ExecResult {
+  try {
+    const stdout = execSync(`bun ${uwfPath} ${args.join(" ")}`, {
+      env: { ...process.env, WORKFLOW_STORAGE_ROOT: storageRoot },
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+    });
+    return { stdout, stderr: "", exitCode: 0 };
+  } catch (error: unknown) {
+    if (
+      error &&
+      typeof error === "object" &&
+      "stdout" in error &&
+      "stderr" in error &&
+      "status" in error
+    ) {
+      return {
+        stdout: (error.stdout as Buffer | string).toString(),
+        stderr: (error.stderr as Buffer | string).toString(),
+        exitCode: error.status as number,
+      };
+    }
+    throw error;
+  }
+}
+
+describe("uwf cas has CLI exit codes", () => {
+  test("exits 0 when hash exists", async () => {
+    // Setup: Create a temp storage root, put a text node, capture hash
+    const putResult = await cmdCasPutText(storageRoot, "test content");
+    const hash = putResult.hash;
+
+    // Execute: uwf cas has <hash>
+    const result = execUwf(["cas", "has", hash]);
+
+    // Assert: stdout contains {"exists":true}, exit code === 0
+    expect(result.stdout).toContain('"exists":true');
+    expect(result.exitCode).toBe(0);
+  });
+
+  test("exits 1 when hash does not exist", () => {
+    // Setup: Create a temp storage root (empty CAS store)
+    // Execute: uwf cas has NOSUCHHASH123
+    const result = execUwf(["cas", "has", "NOSUCHHASH123"]);
+
+    // Assert: stdout contains {"exists":false}, exit code === 1
+    expect(result.stdout).toContain('"exists":false');
+    expect(result.exitCode).toBe(1);
+  });
+
+  test("JSON output format unchanged for exists=true", async () => {
+    // Setup: Create store, put node
+    const putResult = await cmdCasPutText(storageRoot, "test");
+    const hash = putResult.hash;
+
+    // Execute: uwf cas has <hash>
+    const result = execUwf(["cas", "has", hash]);
+
+    // Assert: stdout JSON parses correctly to {exists: true}
+    const parsed = JSON.parse(result.stdout.trim());
+    expect(parsed).toEqual({ exists: true });
+  });
+
+  test("JSON output format unchanged for exists=false", () => {
+    // Setup: Create empty store
+    // Execute: uwf cas has INVALID
+    const result = execUwf(["cas", "has", "INVALID"]);
+
+    // Assert: stdout JSON parses correctly to {exists: false}
+    const parsed = JSON.parse(result.stdout.trim());
+    expect(parsed).toEqual({ exists: false });
+  });
+
+  test("YAML output format preserves exit code behavior for exists=true", async () => {
+    // Setup: Create store with node
+    const putResult = await cmdCasPutText(storageRoot, "test");
+    const hash = putResult.hash;
+
+    // Execute: uwf --format yaml cas has <hash>
+    const result = execUwf(["--format", "yaml", "cas", "has", hash]);
+
+    // Assert: exit code === 0, output is YAML format
+    expect(result.exitCode).toBe(0);
+    expect(result.stdout).toContain("exists:");
+    expect(result.stdout).toContain("true");
+  });
+
+  test("YAML output format preserves exit code behavior for exists=false", () => {
+    // Setup: Create empty store
+    // Execute: uwf --format yaml cas has INVALID
+    const result = execUwf(["--format", "yaml", "cas", "has", "INVALID"]);
+
+    // Assert: exit code === 1, output is YAML format
+    expect(result.exitCode).toBe(1);
+    expect(result.stdout).toContain("exists:");
+    expect(result.stdout).toContain("false");
+  });
+});
+
+describe("regression: other cas commands unaffected", () => {
+  test("uwf cas get still exits 1 on not-found with error message", () => {
+    // Execute: uwf cas get NOSUCHHASH
+    const result = execUwf(["cas", "get", "NOSUCHHASH"]);
+
+    // Assert: exit code === 1, stderr contains "Node not found"
+    expect(result.exitCode).toBe(1);
+    expect(result.stderr).toContain("Node not found");
+  });
+
+  test("uwf cas put-text behavior unchanged", () => {
+    // Execute: uwf cas put-text "hello"
+    const result = execUwf(["cas", "put-text", "hello"]);
+
+    // Assert: exit code === 0, returns hash
+    expect(result.exitCode).toBe(0);
+    const parsed = JSON.parse(result.stdout.trim());
+    expect(parsed).toHaveProperty("hash");
+    expect(typeof parsed.hash).toBe("string");
+    expect(parsed.hash.length).toBe(13); // Crockford Base32 XXH64 hash length
+  });
+});
@@ -0,0 +1,74 @@
+import { mkdir, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { cmdCasHas, cmdCasPutText } from "../commands/cas.js";
+
+let storageRoot: string;
+
+beforeEach(async () => {
+  storageRoot = join(tmpdir(), `uwf-cas-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  await mkdir(storageRoot, { recursive: true });
+});
+
+afterEach(async () => {
+  await rm(storageRoot, { recursive: true, force: true });
+});
+
+describe("cmdCasHas", () => {
+  test("returns {exists: true} for existing hash", async () => {
+    // Setup: Create a test store, put a node, get its hash
+    const putResult = await cmdCasPutText(storageRoot, "test content");
+    const hash = putResult.hash;
+
+    // Execute: Call cmdCasHas with the valid hash
+    const result = await cmdCasHas(storageRoot, hash);
+
+    // Assert: Result equals {exists: true}
+    expect(result).toEqual({ exists: true });
+  });
+
+  test("returns {exists: false} for non-existent hash", async () => {
+    // Setup: Create an empty test store
+    // (storageRoot already created in beforeEach)
+
+    // Execute: Call cmdCasHas with an invalid hash
+    const result = await cmdCasHas(storageRoot, "INVALIDHASH12");
+
+    // Assert: Result equals {exists: false}
+    expect(result).toEqual({ exists: false });
+  });
+
+  test("does not throw for non-existent hash", async () => {
+    // Setup: Create an empty test store
+    // Execute & Assert: Does not throw, returns {exists: false}
+    await expect(cmdCasHas(storageRoot, "NOSUCHHASH123")).resolves.toEqual({
+      exists: false,
+    });
+  });
+
+  test("handles malformed hash gracefully", async () => {
+    // Setup: Create a test store
+    // Execute: Call cmdCasHas with a too-short hash
+    const result = await cmdCasHas(storageRoot, "xyz");
+
+    // Assert: Returns {exists: false} (store.has() returns false)
+    expect(result).toEqual({ exists: false });
+  });
+
+  test("handles empty hash string", async () => {
+    // Execute: Call cmdCasHas with an empty string
+    const result = await cmdCasHas(storageRoot, "");
+
+    // Assert: Returns {exists: false}
+    expect(result).toEqual({ exists: false });
+  });
+
+  test("handles hash with special characters", async () => {
+    // Execute: Call cmdCasHas with special characters
+    const result = await cmdCasHas(storageRoot, "HASH!@#");
+
+    // Assert: Returns {exists: false}
+    expect(result).toEqual({ exists: false });
+  });
+});
@@ -0,0 +1,181 @@
+import { mkdir, readdir, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { cmdLogClean, cmdLogList, cmdLogShow } from "../commands/log.js";
+
+let storageRoot: string;
+
+beforeEach(async () => {
+  storageRoot = join(tmpdir(), `uwf-log-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  await mkdir(join(storageRoot, "logs"), { recursive: true });
+});
+
+afterEach(async () => {
+  await rm(storageRoot, { recursive: true, force: true });
+});
+
+const entry1 = JSON.stringify({
+  ts: "2026-05-20T10:00:00.000Z",
+  pid: "1716200000000-1234",
+  tag: "W9F3RK2M",
+  msg: "process start",
+  thread: "01J1234ABCDEF",
+  workflow: "solve-issue",
+});
+
+const entry2 = JSON.stringify({
+  ts: "2026-05-20T10:00:01.000Z",
+  pid: "1716200000000-1234",
+  tag: "ABC12345",
+  msg: "step executed",
+  thread: "01J1234ABCDEF",
+  workflow: "solve-issue",
+});
+
+const entry3 = JSON.stringify({
+  ts: "2026-05-20T10:00:02.000Z",
+  pid: "1716200000000-5678",
+  tag: "XYZ98765",
+  msg: "different process",
+  thread: "01JOTHER000000",
+  workflow: "review-code",
+});
+
+const oldEntry = JSON.stringify({
+  ts: "2026-05-19T08:00:00.000Z",
+  pid: "1716200000000-9999",
+  tag: "OLD1TAG1",
+  msg: "old entry",
+  thread: "01JOLD0000000",
+  workflow: "solve-issue",
+});
+
+const olderEntry = JSON.stringify({
+  ts: "2026-05-18T08:00:00.000Z",
+  pid: "1716200000000-0001",
+  tag: "OLD2TAG2",
+  msg: "older entry",
+  thread: "01JOLDER00000",
+  workflow: "review-code",
+});
+
+async function writeLogFiles(): Promise<void> {
+  const logsDir = join(storageRoot, "logs");
+  await writeFile(join(logsDir, "2026-05-20.jsonl"), `${[entry1, entry2, entry3].join("\n")}\n`);
+  await writeFile(join(logsDir, "2026-05-19.jsonl"), `${oldEntry}\n`);
+  await writeFile(join(logsDir, "2026-05-18.jsonl"), `${olderEntry}\n`);
+}
+
+describe("cmdLogList", () => {
+  test("lists log files with sizes sorted by date descending", async () => {
+    await writeLogFiles();
+    const result = await cmdLogList(storageRoot);
+    expect(result).toHaveLength(3);
+    expect(result[0].name).toBe("2026-05-20.jsonl");
+    expect(result[0].date).toBe("2026-05-20");
+    expect(result[0].size).toBeGreaterThan(0);
+    expect(result[1].name).toBe("2026-05-19.jsonl");
+    expect(result[2].name).toBe("2026-05-18.jsonl");
+  });
+
+  test("returns empty array when no log files exist", async () => {
+    const result = await cmdLogList(storageRoot);
+    expect(result).toEqual([]);
+  });
+
+  test("returns empty array when logs directory does not exist", async () => {
+    const noLogsRoot = join(storageRoot, "nonexistent");
+    await mkdir(noLogsRoot, { recursive: true });
+    const result = await cmdLogList(noLogsRoot);
+    expect(result).toEqual([]);
+  });
+});
+
+describe("cmdLogShow", () => {
+  test("filters by thread ID", async () => {
+    await writeLogFiles();
+    const result = await cmdLogShow(storageRoot, {
+      thread: "01J1234ABCDEF",
+      process: null,
+      date: null,
+    });
+    expect(result).toHaveLength(2);
+    expect(result.every((e) => e.thread === "01J1234ABCDEF")).toBe(true);
+  });
+
+  test("filters by process ID", async () => {
+    await writeLogFiles();
+    const result = await cmdLogShow(storageRoot, {
+      thread: null,
+      process: "1716200000000-1234",
+      date: null,
+    });
+    expect(result).toHaveLength(2);
+    expect(result.every((e) => e.pid === "1716200000000-1234")).toBe(true);
+  });
+
+  test("filters by date", async () => {
+    await writeLogFiles();
+    const result = await cmdLogShow(storageRoot, {
+      thread: null,
+      process: null,
+      date: "2026-05-19",
+    });
+    expect(result).toHaveLength(1);
+    expect(result[0].msg).toBe("old entry");
+  });
+
+  test("reads all files when no date filter", async () => {
+    await writeLogFiles();
+    const result = await cmdLogShow(storageRoot, { thread: null, process: null, date: null });
+    expect(result).toHaveLength(5);
+    // sorted by ts ascending
+    expect(result[0].ts).toBe("2026-05-18T08:00:00.000Z");
+    expect(result[4].ts).toBe("2026-05-20T10:00:02.000Z");
+  });
+
+  test("returns empty when no matches", async () => {
+    await writeLogFiles();
+    const result = await cmdLogShow(storageRoot, {
+      thread: "NONEXISTENT",
+      process: null,
+      date: null,
+    });
+    expect(result).toEqual([]);
+  });
+
+  test("combined thread + date filter", async () => {
+    await writeLogFiles();
+    const result = await cmdLogShow(storageRoot, {
+      thread: "01J1234ABCDEF",
+      process: null,
+      date: "2026-05-20",
+    });
+    expect(result).toHaveLength(2);
+    expect(result.every((e) => e.thread === "01J1234ABCDEF")).toBe(true);
+  });
+});
+
+describe("cmdLogClean", () => {
+  test("deletes files before given date", async () => {
+    await writeLogFiles();
+    const result = await cmdLogClean(storageRoot, "2026-05-20");
+    expect(result.deleted).toBe(2);
+    const remaining = await readdir(join(storageRoot, "logs"));
+    expect(remaining).toEqual(["2026-05-20.jsonl"]);
+  });
+
+  test("deletes nothing when all files are newer", async () => {
+    await writeLogFiles();
+    const result = await cmdLogClean(storageRoot, "2026-05-18");
+    expect(result.deleted).toBe(0);
+  });
+
+  test("handles missing logs directory gracefully", async () => {
+    const noLogsRoot = join(storageRoot, "nonexistent");
+    await mkdir(noLogsRoot, { recursive: true });
+    const result = await cmdLogClean(noLogsRoot, "2026-05-20");
+    expect(result).toEqual({ deleted: 0 });
+  });
+});
@@ -0,0 +1,132 @@
+import type { Target, WorkflowPayload } from "@uncaged/workflow-protocol";
+import { describe, expect, test } from "vitest";
+
+import { evaluate } from "../moderator/evaluate.js";
+
+const solveIssueGraph: WorkflowPayload["graph"] = {
+  $START: {
+    _: { role: "planner", prompt: "Start planning from the issue in the task." },
+  },
+  planner: {
+    _: { role: "developer", prompt: "Implement the plan: {{plan}}" },
+  },
+  developer: {
+    _: { role: "reviewer", prompt: "Review the changes: {{summary}}" },
+  },
+  reviewer: {
+    approved: { role: "$END", prompt: "Done." },
+    rejected: { role: "developer", prompt: "Fix: {{comments}}" },
+  },
+};
+
+describe("evaluate", () => {
+  test("$START → first role (unit status _)", () => {
+    const result = evaluate(solveIssueGraph, "$START", { $status: "_" });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "planner", prompt: "Start planning from the issue in the task." },
+    });
+  });
+
+  test("status-based routing (reviewer rejected → developer)", () => {
+    const result = evaluate(solveIssueGraph, "reviewer", {
+      $status: "rejected",
+      comments: "missing tests",
+    });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "developer", prompt: "Fix: missing tests" },
+    });
+  });
+
+  test("status-based routing (reviewer approved → $END)", () => {
+    const result = evaluate(solveIssueGraph, "reviewer", { $status: "approved" });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "$END", prompt: "Done." },
+    });
+  });
+
+  test("missing role in graph → error", () => {
+    const result = evaluate(solveIssueGraph, "unknown-role", { $status: "_" });
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.message).toBe('no transitions defined for role "unknown-role"');
+    }
+  });
+
+  test("missing status in graph → error", () => {
+    const result = evaluate(solveIssueGraph, "reviewer", { $status: "pending" });
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.message).toBe('no transition for role "reviewer" with status "pending"');
+    }
+  });
+
+  test("mustache template rendering with simple fields", () => {
+    const result = evaluate(solveIssueGraph, "planner", {
+      $status: "_",
+      plan: "Add auth middleware",
+    });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "developer", prompt: "Implement the plan: Add auth middleware" },
+    });
+  });
+
+  test("mustache does not HTML-escape prompt content", () => {
+    const result = evaluate(solveIssueGraph, "reviewer", {
+      $status: "rejected",
+      comments: 'use <T> & "Result<T, E>" types',
+    });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "developer", prompt: 'Fix: use <T> & "Result<T, E>" types' },
+    });
+  });
+
+  test("triple mustache also works for unescaped output", () => {
+    const graph: Record<string, Record<string, Target>> = {
+      reviewer: {
+        _: { role: "developer", prompt: "Fix: {{{comments}}}" },
+      },
+    };
+    const result = evaluate(graph, "reviewer", {
+      $status: "_",
+      comments: "<script>alert(1)</script>",
+    });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "developer", prompt: "Fix: <script>alert(1)</script>" },
+    });
+  });
+
+  test("missing $status defaults to _ (unit routing)", () => {
+    const result = evaluate(solveIssueGraph, "planner", {
+      plan: "Add auth middleware",
+    });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "developer", prompt: "Implement the plan: Add auth middleware" },
+    });
+  });
+
+  test("mustache template with nested object paths", () => {
+    const graph: Record<string, Record<string, Target>> = {
+      reviewer: {
+        _: {
+          role: "developer",
+          prompt: "Address: {{review.comments}}",
+        },
+      },
+    };
+    const result = evaluate(graph, "reviewer", {
+      $status: "_",
+      review: { comments: "refactor the handler" },
+    });
+    expect(result).toEqual({
+      ok: true,
+      value: { role: "developer", prompt: "Address: refactor the handler" },
+    });
+  });
+});
@@ -0,0 +1,108 @@
+import { mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { resolveHeadHash } from "../commands/shared.js";
+import { appendThreadHistory, saveThreadsIndex } from "../store.js";
+
+let tmpDir: string;
+
+beforeEach(async () => {
+  tmpDir = await mkdtemp(join(tmpdir(), "cli-uwf-resolve-head-"));
+});
+
+afterEach(async () => {
+  await rm(tmpDir, { recursive: true, force: true });
+});
+
+describe("resolveHeadHash", () => {
+  test("returns head hash from threads.yaml for active thread", async () => {
+    const threadId = "01JTEST0000000000000000001" as ThreadId;
+    const headHash = "active_hash_123" as CasRef;
+
+    await saveThreadsIndex(tmpDir, { [threadId]: headHash });
+
+    const result = await resolveHeadHash(tmpDir, threadId);
+
+    expect(result).toBe(headHash);
+  });
+
+  test("falls back to history.jsonl when thread not in threads.yaml", async () => {
+    const threadId = "01JTEST0000000000000000002" as ThreadId;
+    const headHash = "completed_hash_456" as CasRef;
+    const workflowHash = "workflow_hash_789" as CasRef;
+
+    // No entry in threads.yaml, only in history.jsonl
+    await saveThreadsIndex(tmpDir, {});
+    await appendThreadHistory(tmpDir, {
+      thread: threadId,
+      workflow: workflowHash,
+      head: headHash,
+      completedAt: Date.now(),
+    });
+
+    const result = await resolveHeadHash(tmpDir, threadId);
+
+    expect(result).toBe(headHash);
+  });
+
+  // Note: Testing the error case requires CLI-level testing because resolveHeadHash
+  // calls fail() which does process.exit(1), terminating the test runner.
+  // The error behavior is tested in integration tests below via CLI invocation.
+
+  test("prioritizes active thread over history when thread exists in both", async () => {
+    const threadId = "01JTEST0000000000000000004" as ThreadId;
+    const activeHash = "active_hash_v2" as CasRef;
+    const historicalHash = "historical_hash_v1" as CasRef;
+    const workflowHash = "workflow_hash_xyz" as CasRef;
+
+    // Thread exists in both locations (should not happen normally, but test the precedence)
+    await saveThreadsIndex(tmpDir, { [threadId]: activeHash });
+    await appendThreadHistory(tmpDir, {
+      thread: threadId,
+      workflow: workflowHash,
+      head: historicalHash,
+      completedAt: Date.now(),
+    });
+
+    const result = await resolveHeadHash(tmpDir, threadId);
+
+    // Should return the active head, not the historical one
+    expect(result).toBe(activeHash);
+  });
+
+  test("finds thread from multiple history entries", async () => {
+    const threadId1 = "01JTEST0000000000000000005" as ThreadId;
+    const threadId2 = "01JTEST0000000000000000006" as ThreadId;
+    const threadId3 = "01JTEST0000000000000000007" as ThreadId;
+    const hash1 = "hash_thread1" as CasRef;
+    const hash2 = "hash_thread2" as CasRef;
+    const hash3 = "hash_thread3" as CasRef;
+    const workflowHash = "workflow_hash_abc" as CasRef;
+
+    await saveThreadsIndex(tmpDir, {});
+    await appendThreadHistory(tmpDir, {
+      thread: threadId1,
+      workflow: workflowHash,
+      head: hash1,
+      completedAt: Date.now() - 2000,
+    });
+    await appendThreadHistory(tmpDir, {
+      thread: threadId2,
+      workflow: workflowHash,
+      head: hash2,
+      completedAt: Date.now() - 1000,
+    });
+    await appendThreadHistory(tmpDir, {
+      thread: threadId3,
+      workflow: workflowHash,
+      head: hash3,
+      completedAt: Date.now(),
+    });
+
+    const result = await resolveHeadHash(tmpDir, threadId2);
+
+    expect(result).toBe(hash2);
+  });
+});
@@ -0,0 +1,137 @@
+import { readFileSync } from "node:fs";
+import { mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
+import { parse } from "yaml";
+import { _agentNameFromBinary, _printAgentMenu, cmdSetup } from "../commands/setup.js";
+
+// ─── _agentNameFromBinary ────────────────────────────────────────────────────
+
+describe("_agentNameFromBinary", () => {
+  test("strips uwf- prefix", () => {
+    expect(_agentNameFromBinary("uwf-hermes")).toBe("hermes");
+  });
+
+  test("strips uwf- prefix for compound names", () => {
+    expect(_agentNameFromBinary("uwf-claude-code")).toBe("claude-code");
+  });
+
+  test("returns as-is when no uwf- prefix", () => {
+    expect(_agentNameFromBinary("hermes")).toBe("hermes");
+  });
+
+  test("handles uwf-builtin", () => {
+    expect(_agentNameFromBinary("uwf-builtin")).toBe("builtin");
+  });
+});
+
+// ─── _printAgentMenu ─────────────────────────────────────────────────────────
+
+describe("_printAgentMenu", () => {
+  test("prints known agents with labels", () => {
+    const logs: string[] = [];
+    vi.spyOn(console, "log").mockImplementation((...args: unknown[]) => {
+      logs.push(args.join(" "));
+    });
+
+    _printAgentMenu(["uwf-hermes", "uwf-claude-code"]);
+
+    expect(logs.some((l) => l.includes("Hermes"))).toBe(true);
+    expect(logs.some((l) => l.includes("Claude Code"))).toBe(true);
+
+    vi.restoreAllMocks();
+  });
+
+  test("prints unknown agents with binary name as label", () => {
+    const logs: string[] = [];
+    vi.spyOn(console, "log").mockImplementation((...args: unknown[]) => {
+      logs.push(args.join(" "));
+    });
+
+    _printAgentMenu(["uwf-custom-agent"]);
+
+    expect(logs.some((l) => l.includes("uwf-custom-agent"))).toBe(true);
+
+    vi.restoreAllMocks();
+  });
+});
+
+// ─── cmdSetup agent config ───────────────────────────────────────────────────
+
+describe("cmdSetup agent configuration", () => {
+  let storageRoot: string;
+
+  beforeEach(async () => {
+    storageRoot = await mkdtemp(join(tmpdir(), "uwf-setup-agent-"));
+  });
+
+  afterEach(async () => {
+    vi.restoreAllMocks();
+    await rm(storageRoot, { recursive: true, force: true });
+  });
+
+  const baseArgs = () => ({
+    provider: "testprovider",
+    baseUrl: "https://api.test.com/v1",
+    apiKey: "sk-test",
+    model: "test-model",
+    storageRoot,
+  });
+
+  test("defaults to hermes agent when no agent specified", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({}), { status: 200 }),
+    );
+
+    const result = await cmdSetup(baseArgs());
+
+    expect(result.defaultAgent).toBe("hermes");
+    const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
+    expect(config.agents.hermes).toEqual({ command: "uwf-hermes", args: [] });
+    expect(config.defaultAgent).toBe("hermes");
+  });
+
+  test("writes specified agent as default", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({}), { status: 200 }),
+    );
+
+    const result = await cmdSetup({ ...baseArgs(), agent: "claude-code" });
+
+    expect(result.defaultAgent).toBe("claude-code");
+    const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
+    expect(config.agents["claude-code"]).toEqual({ command: "uwf-claude-code", args: [] });
+    expect(config.defaultAgent).toBe("claude-code");
+  });
+
+  test("preserves existing agents when adding new one", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({}), { status: 200 }),
+    );
+
+    // First setup with hermes
+    await cmdSetup(baseArgs());
+    // Second setup with claude-code
+    await cmdSetup({ ...baseArgs(), agent: "claude-code" });
+
+    const config = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
+    expect(config.agents.hermes).toBeDefined();
+    expect(config.agents["claude-code"]).toBeDefined();
+    expect(config.defaultAgent).toBe("claude-code");
+  });
+
+  test("updates defaultAgent on re-run with different agent", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({}), { status: 200 }),
+    );
+
+    await cmdSetup(baseArgs());
+    const config1 = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
+    expect(config1.defaultAgent).toBe("hermes");
+
+    await cmdSetup({ ...baseArgs(), agent: "builtin" });
+    const config2 = parse(readFileSync(join(storageRoot, "config.yaml"), "utf8"));
+    expect(config2.defaultAgent).toBe("builtin");
+  });
+});
@@ -0,0 +1,381 @@
+import { mkdirSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, describe, expect, test, vi } from "vitest";
+import {
+  _discoverAgents,
+  _isBackspace,
+  _isTerminator,
+  _parseWhichOutput,
+  _printModelMenu,
+  _printProviderMenu,
+  _printValidationResult,
+  _resolveModelChoice,
+  _resolveProviderChoice,
+  _searchPathDirs,
+} from "../commands/setup.js";
+
+// ──────────────────────────────────────────────────────────────────────────────
+// 1a. _searchPathDirs
+// ──────────────────────────────────────────────────────────────────────────────
+
+describe("_searchPathDirs", () => {
+  test("returns empty array for empty PATH", async () => {
+    const result = await _searchPathDirs("");
+    expect(result).toEqual([]);
+  });
+
+  test("finds uwf-hermes in a single dir", async () => {
+    const dir = mkdirSync(join(tmpdir(), `uwf-test-${Date.now()}`), { recursive: true }) as
+      | string
+      | undefined;
+    const actualDir = dir ?? join(tmpdir(), `uwf-test-${Date.now()}`);
+    mkdirSync(actualDir, { recursive: true });
+    const filePath = join(actualDir, "uwf-hermes");
+    writeFileSync(filePath, "#!/bin/sh\n", { mode: 0o755 });
+    const result = await _searchPathDirs(actualDir);
+    expect(result).toContain("uwf-hermes");
+  });
+
+  test("skips non-uwf- prefixed binaries", async () => {
+    const dir = join(tmpdir(), `uwf-test-${Date.now()}-2`);
+    mkdirSync(dir, { recursive: true });
+    writeFileSync(join(dir, "hermes"), "#!/bin/sh\n", { mode: 0o755 });
+    writeFileSync(join(dir, "uwf-hermes"), "#!/bin/sh\n", { mode: 0o755 });
+    const result = await _searchPathDirs(dir);
+    expect(result).toEqual(["uwf-hermes"]);
+  });
+
+  test("skips entry named exactly 'uwf'", async () => {
+    const dir = join(tmpdir(), `uwf-test-${Date.now()}-3`);
+    mkdirSync(dir, { recursive: true });
+    writeFileSync(join(dir, "uwf"), "#!/bin/sh\n", { mode: 0o755 });
+    writeFileSync(join(dir, "uwf-hermes"), "#!/bin/sh\n", { mode: 0o755 });
+    const result = await _searchPathDirs(dir);
+    expect(result).toEqual(["uwf-hermes"]);
+  });
+
+  test("skips non-executable files", async () => {
+    const dir = join(tmpdir(), `uwf-test-${Date.now()}-4`);
+    mkdirSync(dir, { recursive: true });
+    writeFileSync(join(dir, "uwf-foo"), "#!/bin/sh\n", { mode: 0o644 });
+    const result = await _searchPathDirs(dir);
+    expect(result).toEqual([]);
+  });
+
+  test("deduplicates across PATH dirs", async () => {
+    const dir1 = join(tmpdir(), `uwf-test-${Date.now()}-5a`);
+    const dir2 = join(tmpdir(), `uwf-test-${Date.now()}-5b`);
+    mkdirSync(dir1, { recursive: true });
+    mkdirSync(dir2, { recursive: true });
+    writeFileSync(join(dir1, "uwf-hermes"), "#!/bin/sh\n", { mode: 0o755 });
+    writeFileSync(join(dir2, "uwf-hermes"), "#!/bin/sh\n", { mode: 0o755 });
+    const result = await _searchPathDirs(`${dir1}:${dir2}`);
+    expect(result).toEqual(["uwf-hermes"]);
+  });
+
+  test("returns sorted array", async () => {
+    const dir = join(tmpdir(), `uwf-test-${Date.now()}-6`);
+    mkdirSync(dir, { recursive: true });
+    writeFileSync(join(dir, "uwf-zoo"), "#!/bin/sh\n", { mode: 0o755 });
+    writeFileSync(join(dir, "uwf-alpha"), "#!/bin/sh\n", { mode: 0o755 });
+    writeFileSync(join(dir, "uwf-mid"), "#!/bin/sh\n", { mode: 0o755 });
+    const result = await _searchPathDirs(dir);
+    expect(result).toEqual(["uwf-alpha", "uwf-mid", "uwf-zoo"]);
+  });
+
+  test("skips inaccessible/nonexistent directories silently", async () => {
+    const result = await _searchPathDirs("/nonexistent-dir-xyz-abc-12345");
+    expect(result).toEqual([]);
+  });
+});
+
+// ──────────────────────────────────────────────────────────────────────────────
+// 1b. _parseWhichOutput
+// ──────────────────────────────────────────────────────────────────────────────
+
+describe("_parseWhichOutput", () => {
+  test("returns empty array for empty string", () => {
+    expect(_parseWhichOutput("")).toEqual([]);
+  });
+
+  test("parses single path", () => {
+    expect(_parseWhichOutput("/usr/local/bin/uwf-hermes")).toEqual(["uwf-hermes"]);
+  });
+
+  test("parses multiple paths", () => {
+    expect(_parseWhichOutput("/usr/local/bin/uwf-hermes\n/usr/bin/uwf-claude-code")).toEqual([
+      "uwf-claude-code",
+      "uwf-hermes",
+    ]);
+  });
+
+  test("deduplicates identical basenames from different dirs", () => {
+    expect(_parseWhichOutput("/a/uwf-hermes\n/b/uwf-hermes")).toEqual(["uwf-hermes"]);
+  });
+
+  test("skips blank lines", () => {
+    expect(_parseWhichOutput("/a/uwf-hermes\n\n/b/uwf-cursor")).toEqual([
+      "uwf-cursor",
+      "uwf-hermes",
+    ]);
+  });
+
+  test("skips entry named exactly 'uwf'", () => {
+    expect(_parseWhichOutput("/usr/bin/uwf")).toEqual([]);
+  });
+
+  test("skips basenames not starting with uwf-", () => {
+    expect(_parseWhichOutput("/usr/bin/node")).toEqual([]);
+  });
+
+  test("returns sorted array", () => {
+    expect(_parseWhichOutput("/a/uwf-zoo\n/a/uwf-alpha")).toEqual(["uwf-alpha", "uwf-zoo"]);
+  });
+});
+
+// ──────────────────────────────────────────────────────────────────────────────
+// 2a. _isTerminator
+// ──────────────────────────────────────────────────────────────────────────────
+
+describe("_isTerminator", () => {
+  test("\\n is a terminator", () => {
+    expect(_isTerminator("\n")).toBe(true);
+  });
+  test("\\r is a terminator", () => {
+    expect(_isTerminator("\r")).toBe(true);
+  });
+  test("\\u0004 (EOT) is a terminator", () => {
+    expect(_isTerminator("")).toBe(true);
+  });
+  test("regular char is not a terminator", () => {
+    expect(_isTerminator("a")).toBe(false);
+  });
+  test("empty string is not a terminator", () => {
+    expect(_isTerminator("")).toBe(false);
+  });
+});
+
+// ──────────────────────────────────────────────────────────────────────────────
+// 2b. _isBackspace
+// ──────────────────────────────────────────────────────────────────────────────
+
+describe("_isBackspace", () => {
+  test("\\u007F is a backspace", () => {
+    expect(_isBackspace("")).toBe(true);
+  });
+  test("\\b is a backspace", () => {
+    expect(_isBackspace("\b")).toBe(true);
+  });
+  test("regular char is not a backspace", () => {
+    expect(_isBackspace("x")).toBe(false);
+  });
+});
+
+// ──────────────────────────────────────────────────────────────────────────────
+// 3a. _printProviderMenu
+// ──────────────────────────────────────────────────────────────────────────────
+
+describe("_printProviderMenu", () => {
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  const providers = [
+    { name: "openai", label: "OpenAI", baseUrl: "https://api.openai.com/v1" },
+    { name: "xai", label: "xAI", baseUrl: "https://api.x.ai/v1" },
+  ] as const;
+
+  test("prints correct number of lines (one per provider + custom)", () => {
+    const lines: string[] = [];
+    vi.spyOn(console, "log").mockImplementation((msg: string) => {
+      lines.push(msg);
+    });
+    _printProviderMenu(providers);
+    // 2 providers + 1 custom = 3 lines
+    expect(lines.length).toBe(3);
+  });
+
+  test("custom option number = providers.length + 1", () => {
+    const lines: string[] = [];
+    vi.spyOn(console, "log").mockImplementation((msg: string) => {
+      lines.push(msg);
+    });
+    _printProviderMenu(providers);
+    const lastLine = lines[lines.length - 1] ?? "";
+    expect(lastLine).toMatch(/3\)/);
+  });
+
+  test("each provider line contains its label and baseUrl", () => {
+    const lines: string[] = [];
+    vi.spyOn(console, "log").mockImplementation((msg: string) => {
+      lines.push(msg);
+    });
+    _printProviderMenu(providers);
+    expect(lines[0]).toContain("OpenAI");
+    expect(lines[0]).toContain("https://api.openai.com/v1");
+    expect(lines[1]).toContain("xAI");
+    expect(lines[1]).toContain("https://api.x.ai/v1");
+  });
+});
+
+// ──────────────────────────────────────────────────────────────────────────────
+// 3b. _resolveProviderChoice
+// ──────────────────────────────────────────────────────────────────────────────
+
+describe("_resolveProviderChoice", () => {
+  const providers = [
+    { name: "openai", label: "OpenAI", baseUrl: "https://api.openai.com/v1" },
+    { name: "xai", label: "xAI", baseUrl: "https://api.x.ai/v1" },
+    { name: "deepseek", label: "DeepSeek", baseUrl: "https://api.deepseek.com/v1" },
+  ] as const;
+
+  test("valid index 1 returns first provider", () => {
+    const result = _resolveProviderChoice("1", providers);
+    expect(result).toEqual({ providerName: "openai", baseUrl: "https://api.openai.com/v1" });
+  });
+
+  test("valid index N (last preset) returns last provider", () => {
+    const result = _resolveProviderChoice("3", providers);
+    expect(result).toEqual({ providerName: "deepseek", baseUrl: "https://api.deepseek.com/v1" });
+  });
+
+  test("index providers.length+1 (custom) returns null", () => {
+    const result = _resolveProviderChoice("4", providers);
+    expect(result).toBeNull();
+  });
+
+  test("non-numeric string returns null", () => {
+    expect(_resolveProviderChoice("abc", providers)).toBeNull();
+  });
+
+  test("0 returns null (out of range)", () => {
+    expect(_resolveProviderChoice("0", providers)).toBeNull();
+  });
+
+  test("N+2 returns null (out of range)", () => {
+    expect(_resolveProviderChoice("5", providers)).toBeNull();
+  });
+
+  test("negative number returns null", () => {
+    expect(_resolveProviderChoice("-1", providers)).toBeNull();
+  });
+});
+
+// ──────────────────────────────────────────────────────────────────────────────
+// 3c. _resolveModelChoice
+// ──────────────────────────────────────────────────────────────────────────────
+
+describe("_resolveModelChoice", () => {
+  test("numeric input within range returns model at that index", () => {
+    expect(_resolveModelChoice("2", ["a", "b", "c"])).toBe("b");
+  });
+
+  test("numeric input out of range returns input as-is", () => {
+    expect(_resolveModelChoice("5", ["a"])).toBe("5");
+  });
+
+  test("non-numeric input returns input as-is", () => {
+    expect(_resolveModelChoice("gpt-4o", ["a", "b"])).toBe("gpt-4o");
+  });
+
+  test("numeric input 1 returns first model", () => {
+    expect(_resolveModelChoice("1", ["alpha", "beta"])).toBe("alpha");
+  });
+
+  test("empty models list with numeric input returns input as-is", () => {
+    expect(_resolveModelChoice("1", [])).toBe("1");
+  });
+});
+
+// ──────────────────────────────────────────────────────────────────────────────
+// 3d. _printModelMenu
+// ──────────────────────────────────────────────────────────────────────────────
+
+describe("_printModelMenu", () => {
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  test("prints all models — each model name appears in output", () => {
+    const output: string[] = [];
+    vi.spyOn(console, "log").mockImplementation((msg: string) => {
+      output.push(msg);
+    });
+    const models = ["model-a", "model-b", "model-c"];
+    _printModelMenu(models, 100);
+    const combined = output.join("\n");
+    for (const m of models) {
+      expect(combined).toContain(m);
+    }
+  });
+
+  test("single column when termCols is very small", () => {
+    const output: string[] = [];
+    vi.spyOn(console, "log").mockImplementation((msg: string) => {
+      output.push(msg);
+    });
+    _printModelMenu(["a", "b", "c"], 1);
+    // Each model on its own row → 3 lines
+    expect(output.length).toBe(3);
+  });
+
+  test("wide terminal fits multiple columns", () => {
+    const output: string[] = [];
+    vi.spyOn(console, "log").mockImplementation((msg: string) => {
+      output.push(msg);
+    });
+    const models = Array.from({ length: 6 }, (_, i) => `m${i}`);
+    _printModelMenu(models, 200);
+    // With wide terminal and short names, should fit in fewer than 6 rows
+    expect(output.length).toBeLessThan(6);
+  });
+});
+
+// ──────────────────────────────────────────────────────────────────────────────
+// 3e. _printValidationResult
+// ──────────────────────────────────────────────────────────────────────────────
+
+describe("_printValidationResult", () => {
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  test("ok=true prints success message containing '✓'", () => {
+    const lines: string[] = [];
+    vi.spyOn(console, "log").mockImplementation((msg: string) => {
+      lines.push(msg);
+    });
+    _printValidationResult({ ok: true, error: null });
+    expect(lines.join("\n")).toContain("✓");
+  });
+
+  test("ok=false prints warning message containing '⚠'", () => {
+    const lines: string[] = [];
+    vi.spyOn(console, "log").mockImplementation((msg: string) => {
+      lines.push(msg);
+    });
+    _printValidationResult({ ok: false, error: "HTTP 401" });
+    expect(lines.join("\n")).toContain("⚠");
+  });
+
+  test("ok=false includes the error string in output", () => {
+    const lines: string[] = [];
+    vi.spyOn(console, "log").mockImplementation((msg: string) => {
+      lines.push(msg);
+    });
+    _printValidationResult({ ok: false, error: "HTTP 401" });
+    expect(lines.join("\n")).toContain("HTTP 401");
+  });
+});
+
+// ──────────────────────────────────────────────────────────────────────────────
+// 4. Regression
+// ──────────────────────────────────────────────────────────────────────────────
+
+describe("_discoverAgents regression", () => {
+  test("returns an array (may be empty) — never throws", async () => {
+    const result = await _discoverAgents();
+    expect(Array.isArray(result)).toBe(true);
+  });
+});
@@ -0,0 +1,150 @@
+import { mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
+import { cmdSetup, validateModel } from "../commands/setup.js";
+
+describe("validateModel", () => {
+  const BASE_URL = "https://api.example.com/v1";
+  const API_KEY = "sk-test-key";
+  const MODEL = "test-model";
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  test("success path — returns ok on 200", async () => {
+    const mockFetch = vi
+      .spyOn(globalThis, "fetch")
+      .mockResolvedValue(new Response(JSON.stringify({}), { status: 200 }));
+
+    const result = await validateModel(BASE_URL, API_KEY, MODEL);
+
+    expect(result).toEqual({ ok: true, value: undefined });
+    expect(mockFetch).toHaveBeenCalledOnce();
+
+    const [url, opts] = mockFetch.mock.calls[0]!;
+    expect(url).toBe(`${BASE_URL}/chat/completions`);
+    expect((opts as RequestInit).headers).toEqual(
+      expect.objectContaining({ Authorization: `Bearer ${API_KEY}` }),
+    );
+    const body = JSON.parse((opts as RequestInit).body as string);
+    expect(body).toEqual({
+      model: MODEL,
+      messages: [{ role: "user", content: "hi" }],
+      max_tokens: 1,
+    });
+  });
+
+  test("HTTP 401 — returns error containing 401", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response("Unauthorized", { status: 401, statusText: "Unauthorized" }),
+    );
+
+    const result = await validateModel(BASE_URL, API_KEY, MODEL);
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toContain("401");
+    }
+  });
+
+  test("HTTP 404 — returns error containing 404", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response("Not Found", { status: 404, statusText: "Not Found" }),
+    );
+
+    const result = await validateModel(BASE_URL, API_KEY, MODEL);
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toContain("404");
+    }
+  });
+
+  test("network timeout — returns error mentioning timeout", async () => {
+    const err = new DOMException("signal timed out", "AbortError");
+    vi.spyOn(globalThis, "fetch").mockRejectedValue(err);
+
+    const result = await validateModel(BASE_URL, API_KEY, MODEL);
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.toLowerCase()).toMatch(/timeout|timed out/);
+    }
+  });
+
+  test("network error (DNS/connection) — returns error mentioning connectivity", async () => {
+    vi.spyOn(globalThis, "fetch").mockRejectedValue(new TypeError("fetch failed"));
+
+    const result = await validateModel(BASE_URL, API_KEY, MODEL);
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.toLowerCase()).toMatch(/connect|reach|network/);
+    }
+  });
+
+  test("request body correctness", async () => {
+    const mockFetch = vi
+      .spyOn(globalThis, "fetch")
+      .mockResolvedValue(new Response(JSON.stringify({}), { status: 200 }));
+
+    await validateModel(BASE_URL, API_KEY, "my-special-model");
+
+    const body = JSON.parse((mockFetch.mock.calls[0]![1] as RequestInit).body as string);
+    expect(body).toEqual({
+      model: "my-special-model",
+      messages: [{ role: "user", content: "hi" }],
+      max_tokens: 1,
+    });
+  });
+});
+
+describe("cmdSetup with validation", () => {
+  let storageRoot: string;
+
+  beforeEach(async () => {
+    storageRoot = await mkdtemp(join(tmpdir(), "uwf-setup-validate-"));
+  });
+
+  afterEach(async () => {
+    vi.restoreAllMocks();
+    await rm(storageRoot, { recursive: true, force: true });
+  });
+
+  const setupArgs = () => ({
+    provider: "testprovider",
+    baseUrl: "https://api.test.com/v1",
+    apiKey: "sk-test",
+    model: "test-model",
+    storageRoot,
+  });
+
+  test("includes validation result on success", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({}), { status: 200 }),
+    );
+
+    const result = await cmdSetup(setupArgs());
+
+    expect(result.validation).toEqual({ ok: true, value: undefined });
+    // Config files should still be written
+    expect(result.configPath).toBeTruthy();
+    expect(result.envPath).toBeTruthy();
+  });
+
+  test("includes validation failure — config still saved", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response("Unauthorized", { status: 401, statusText: "Unauthorized" }),
+    );
+
+    const result = await cmdSetup(setupArgs());
+
+    expect(result.validation).toBeDefined();
+    expect((result.validation as { ok: boolean }).ok).toBe(false);
+    // Config files should still be written despite validation failure
+    expect(result.configPath).toBeTruthy();
+    expect(result.envPath).toBeTruthy();
+  });
+});
@@ -0,0 +1,78 @@
+import { execFileSync } from "node:child_process";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
+import { describe, expect, test } from "vitest";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+import {
+  cmdSkillArchitecture,
+  cmdSkillCli,
+  cmdSkillList,
+  cmdSkillModerator,
+  cmdSkillYaml,
+} from "../commands/skill.js";
+
+describe("skill commands", () => {
+  test("skill list returns all skill names", () => {
+    const result = cmdSkillList();
+    expect(result).toBeInstanceOf(Array);
+    expect(result).toContain("cli");
+    expect(result).toContain("architecture");
+    expect(result).toContain("yaml");
+    expect(result).toContain("moderator");
+    for (const name of result) {
+      expect(typeof name).toBe("string");
+      expect(name).toMatch(/^\S+$/);
+    }
+  });
+
+  test("skill architecture returns non-empty markdown string", () => {
+    const result = cmdSkillArchitecture();
+    expect(typeof result).toBe("string");
+    expect(result).toContain("CAS");
+    expect(result).toContain("Thread");
+    expect(result).toContain("Workflow");
+    expect(result).toContain("Step");
+    expect(result.length).toBeGreaterThan(200);
+  });
+
+  test("skill yaml returns non-empty markdown string", () => {
+    const result = cmdSkillYaml();
+    expect(typeof result).toBe("string");
+    expect(result).toContain("roles");
+    expect(result).toContain("graph");
+    expect(result).toContain("frontmatter");
+    expect(result.length).toBeGreaterThan(200);
+  });
+
+  test("skill moderator returns non-empty markdown string", () => {
+    const result = cmdSkillModerator();
+    expect(typeof result).toBe("string");
+    expect(result).toContain("routing");
+    expect(result).toContain("status");
+    expect(result.length).toBeGreaterThan(200);
+    // Check for edge or graph
+    expect(result).toMatch(/edge|graph/i);
+  });
+
+  test("skill cli returns CLI reference markdown", () => {
+    const result = cmdSkillCli();
+    expect(typeof result).toBe("string");
+    expect(result).toContain("uwf");
+  });
+
+  test("skill help subcommand is suppressed", () => {
+    const output = execFileSync("bun", ["src/cli.ts", "skill", "--help"], {
+      cwd: join(__dirname, "..", ".."),
+      encoding: "utf-8",
+      env: { ...process.env, PATH: `/opt/homebrew/bin:${process.env.PATH}` },
+    });
+    expect(output).not.toMatch(/help\s+\[command\]/i);
+    expect(output).toContain("cli");
+    expect(output).toContain("architecture");
+    expect(output).toContain("yaml");
+    expect(output).toContain("moderator");
+    expect(output).toContain("list");
+  });
+});
@@ -0,0 +1,99 @@
+import { readFile } from "node:fs/promises";
+import { join } from "node:path";
+import type { WorkflowPayload } from "@uncaged/workflow-protocol";
+import { describe, expect, test } from "vitest";
+import { parse } from "yaml";
+
+/**
+ * Test: Issue #474 - tea pr create fails in git worktree directories
+ *
+ * This test verifies that the solve-issue workflow's committer role
+ * includes the --repo flag when running tea pr create, which fixes
+ * the "path segment [0] is empty" error in worktree directories.
+ */
+
+describe("solve-issue workflow: tea pr create worktree fix", () => {
+  // Navigate up from packages/cli-workflow/src/__tests__ to repo root
+  const workflowPath = join(
+    import.meta.dirname,
+    "..",
+    "..",
+    "..",
+    "..",
+    ".workflows",
+    "solve-issue.yaml",
+  );
+
+  test("committer procedure should require running tea pr create from main repo directory", async () => {
+    const yamlContent = await readFile(workflowPath, "utf-8");
+    const workflow = parse(yamlContent) as WorkflowPayload;
+
+    expect(workflow.roles.committer).toBeDefined();
+    const committerProcedure = workflow.roles.committer?.procedure;
+    expect(committerProcedure).toBeDefined();
+
+    // Verify the procedure includes tea pr create
+    expect(committerProcedure).toContain("tea pr create");
+
+    // Verify the procedure warns about running from main repo dir (not worktree)
+    expect(committerProcedure).toMatch(/main repo directory/i);
+    expect(committerProcedure).toMatch(/not a worktree/i);
+  });
+
+  test("committer procedure should mention repo extraction from git remote", async () => {
+    const yamlContent = await readFile(workflowPath, "utf-8");
+    const workflow = parse(yamlContent) as WorkflowPayload;
+
+    const committerProcedure = workflow.roles.committer?.procedure;
+    expect(committerProcedure).toBeDefined();
+
+    // Verify the procedure mentions extracting repo info from git remote
+    // This ensures fallback logic is documented
+    expect(committerProcedure).toMatch(/git remote/i);
+  });
+
+  test("committer procedure should include error handling for tea failures", async () => {
+    const yamlContent = await readFile(workflowPath, "utf-8");
+    const workflow = parse(yamlContent) as WorkflowPayload;
+
+    const committerProcedure = workflow.roles.committer?.procedure;
+    expect(committerProcedure).toBeDefined();
+
+    // Verify the procedure includes error handling guidance
+    // This ensures we capture failures and provide actionable output
+    expect(committerProcedure).toMatch(/error|fail/i);
+  });
+
+  test("workflow should be parseable as valid WorkflowPayload", async () => {
+    const yamlContent = await readFile(workflowPath, "utf-8");
+    const workflow = parse(yamlContent) as WorkflowPayload;
+
+    // Basic structure validation
+    expect(workflow.name).toBe("solve-issue");
+    expect(workflow.roles).toBeDefined();
+    expect(workflow.graph).toBeDefined();
+
+    // Verify committer role exists with required fields
+    expect(workflow.roles.committer).toBeDefined();
+    expect(workflow.roles.committer?.description).toBeDefined();
+    expect(workflow.roles.committer?.goal).toBeDefined();
+    expect(workflow.roles.committer?.procedure).toBeDefined();
+    expect(workflow.roles.committer?.output).toBeDefined();
+    expect(workflow.roles.committer?.frontmatter).toBeDefined();
+  });
+
+  test("committer frontmatter schema should be oneOf with $status discriminant", async () => {
+    const yamlContent = await readFile(workflowPath, "utf-8");
+    // Parse as any to access the raw YAML structure (frontmatter is inline JSON Schema in YAML)
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const workflow = parse(yamlContent) as any;
+    const frontmatter = workflow.roles.committer?.frontmatter;
+    expect(frontmatter).toBeDefined();
+    expect(frontmatter?.oneOf).toBeDefined();
+    const committedVariant = frontmatter.oneOf.find(
+      (v: any) => v.properties?.["$status"]?.const === "committed",
+    );
+    expect(committedVariant).toBeDefined();
+    expect(committedVariant.required).toContain("$status");
+  });
+});
@@ -0,0 +1,602 @@
+import { mkdir, mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { bootstrap, putSchema } from "@uncaged/json-cas";
+import { createFsStore } from "@uncaged/json-cas-fs";
+import type { CasRef } from "@uncaged/workflow-protocol";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { cmdStepRead } from "../commands/step.js";
+import { registerUwfSchemas } from "../schemas.js";
+
+// ── schemas used in tests ────────────────────────────────────────────────────
+
+const TURN_SCHEMA = {
+  title: "hermes-turn",
+  type: "object" as const,
+  required: ["index", "role", "content"],
+  properties: {
+    index: { type: "integer" as const },
+    role: { type: "string" as const },
+    content: { type: "string" as const },
+    toolCalls: {
+      anyOf: [
+        { type: "array" as const, items: { type: "object" as const } },
+        { type: "null" as const },
+      ],
+    },
+    reasoning: { anyOf: [{ type: "string" as const }, { type: "null" as const }] },
+  },
+  additionalProperties: false,
+};
+
+const DETAIL_SCHEMA = {
+  title: "hermes-detail",
+  type: "object" as const,
+  required: ["sessionId", "model", "duration", "turnCount", "turns"],
+  properties: {
+    sessionId: { type: "string" as const },
+    model: { type: "string" as const },
+    duration: { type: "integer" as const },
+    turnCount: { type: "integer" as const },
+    turns: {
+      type: "array" as const,
+      items: { type: "string" as const, format: "cas_ref" },
+    },
+  },
+  additionalProperties: false,
+};
+
+// ── helpers ───────────────────────────────────────────────────────────────────
+
+async function registerDetailSchemas(store: ReturnType<typeof createFsStore>) {
+  await bootstrap(store);
+  const [turn, detail] = await Promise.all([
+    putSchema(store, TURN_SCHEMA),
+    putSchema(store, DETAIL_SCHEMA),
+  ]);
+  return { turn, detail };
+}
+
+function generateContent(size: number, prefix = "Content"): string {
+  const base = `${prefix} `;
+  const repeat = Math.ceil(size / base.length);
+  return base.repeat(repeat).slice(0, size);
+}
+
+// ── fixture ───────────────────────────────────────────────────────────────────
+
+let tmpDir: string;
+
+beforeEach(async () => {
+  tmpDir = await mkdtemp(join(tmpdir(), "cli-uwf-step-read-test-"));
+});
+
+afterEach(async () => {
+  await rm(tmpDir, { recursive: true, force: true });
+});
+
+// ── step read tests ───────────────────────────────────────────────────────────
+
+describe("step read", () => {
+  test("test 1: basic single-step read with 3 turns", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    // Create 3 turns
+    const turnHashes: CasRef[] = [];
+    for (let i = 1; i <= 3; i++) {
+      const content = `Turn ${i} content with some text to make it readable.`;
+      const turnHash = await store.put(detailSchemas.turn, {
+        index: i - 1,
+        role: "assistant",
+        content,
+        toolCalls: null,
+        reasoning: null,
+      });
+      turnHashes.push(turnHash);
+    }
+
+    const detailHash = await store.put(detailSchemas.detail, {
+      sessionId: "session-1",
+      model: "test-model",
+      duration: 1000,
+      turnCount: 3,
+      turns: turnHashes,
+    });
+
+    const stepHash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    // Read step with large quota
+    const markdown = await cmdStepRead(tmpDir, stepHash, 10000);
+
+    // Assert structure
+    expect(markdown).toContain(`# Step ${stepHash}`);
+    expect(markdown).toContain("**Role:** worker");
+    expect(markdown).toContain("**Agent:** uwf-test");
+    expect(markdown).toContain("## Turn 1");
+    expect(markdown).toContain("## Turn 2");
+    expect(markdown).toContain("## Turn 3");
+    expect(markdown).toContain("Turn 1 content with some text to make it readable.");
+    expect(markdown).toContain("Turn 2 content with some text to make it readable.");
+    expect(markdown).toContain("Turn 3 content with some text to make it readable.");
+  });
+
+  test("test 2: quota enforcement - multiple turns", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    // Create 4 turns of ~300 chars each
+    const turnHashes: CasRef[] = [];
+    for (let i = 1; i <= 4; i++) {
+      const content = generateContent(300, `Turn${i}`);
+      const turnHash = await store.put(detailSchemas.turn, {
+        index: i - 1,
+        role: "assistant",
+        content,
+        toolCalls: null,
+        reasoning: null,
+      });
+      turnHashes.push(turnHash);
+    }
+
+    const detailHash = await store.put(detailSchemas.detail, {
+      sessionId: "session-1",
+      model: "test-model",
+      duration: 1000,
+      turnCount: 4,
+      turns: turnHashes,
+    });
+
+    const stepHash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    // Read step with limited quota (700 chars)
+    const markdown = await cmdStepRead(tmpDir, stepHash, 700);
+
+    // Assert only most recent turns fit
+    expect(markdown).toContain(`# Step ${stepHash}`);
+    // Should have skip hint
+    expect(markdown).toContain("Earlier turns omitted");
+    // Should include at least Turn 4 (most recent)
+    expect(markdown).toContain("Turn4");
+    // Total length should respect quota (with tolerance for structural overhead)
+    expect(markdown.length).toBeLessThanOrEqual(900); // 700 quota + 200 buffer tolerance
+  });
+
+  test("test 3: minimal quota edge case - always show at least one turn", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    // Create 1 turn of 500 chars
+    const content = generateContent(500, "LongTurn");
+    const turnHash = await store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content,
+      toolCalls: null,
+      reasoning: null,
+    });
+
+    const detailHash = await store.put(detailSchemas.detail, {
+      sessionId: "session-1",
+      model: "test-model",
+      duration: 1000,
+      turnCount: 1,
+      turns: [turnHash],
+    });
+
+    const stepHash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    // Read step with minimal quota (1 char)
+    const markdown = await cmdStepRead(tmpDir, stepHash, 1);
+
+    // Assert at least one turn is always shown
+    expect(markdown).toContain("LongTurn");
+    expect(markdown.length).toBeGreaterThan(1);
+  });
+
+  test("test 4: step with no detail field", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const stepHash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    // Read step - should return metadata only (no error)
+    const markdown = await cmdStepRead(tmpDir, stepHash, 4000);
+
+    // Assert metadata is present
+    expect(markdown).toContain(`# Step ${stepHash}`);
+    expect(markdown).toContain("**Role:** worker");
+    expect(markdown).toContain("**Agent:** uwf-test");
+    // Should not have turn sections
+    expect(markdown).not.toContain("## Turn");
+  });
+
+  test("test 5: step with detail but no turns array", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    // Create detail with different schema (no turns)
+    const SIMPLE_DETAIL_SCHEMA = {
+      title: "simple-detail",
+      type: "object" as const,
+      required: ["sessionId"],
+      properties: {
+        sessionId: { type: "string" as const },
+      },
+      additionalProperties: false,
+    };
+
+    await bootstrap(store);
+    const simpleDetailType = await putSchema(store, SIMPLE_DETAIL_SCHEMA);
+    const detailHash = await store.put(simpleDetailType, {
+      sessionId: "session-1",
+    });
+
+    const stepHash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    // Read step - should return metadata only (no error)
+    const markdown = await cmdStepRead(tmpDir, stepHash, 4000);
+
+    // Assert metadata is present
+    expect(markdown).toContain(`# Step ${stepHash}`);
+    expect(markdown).toContain("**Role:** worker");
+    // Should not have turn sections
+    expect(markdown).not.toContain("## Turn");
+  });
+
+  test("test 6: displays role and tool calls in turn body", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const turnHash = await store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content: "",
+      toolCalls: [{ name: "terminal", args: '{"command":"echo hi"}' }],
+      reasoning: null,
+    });
+
+    const detailHash = await store.put(detailSchemas.detail, {
+      sessionId: "session-1",
+      model: "test-model",
+      duration: 1000,
+      turnCount: 1,
+      turns: [turnHash],
+    });
+
+    const stepHash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-hermes",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const markdown = await cmdStepRead(tmpDir, stepHash, 4000);
+
+    expect(markdown).toContain("**Turn role:** assistant");
+    expect(markdown).toContain("**terminal**");
+    expect(markdown).toContain('{"command":"echo hi"}');
+  });
+
+  test("test 7: turn content with special characters", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    // Create turn with special markdown characters
+    const content = "This has `backticks`, **bold**, *italic*, and [links](http://example.com)";
+    const turnHash = await store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content,
+      toolCalls: null,
+      reasoning: null,
+    });
+
+    const detailHash = await store.put(detailSchemas.detail, {
+      sessionId: "session-1",
+      model: "test-model",
+      duration: 1000,
+      turnCount: 1,
+      turns: [turnHash],
+    });
+
+    const stepHash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    // Read step
+    const markdown = await cmdStepRead(tmpDir, stepHash, 4000);
+
+    // Assert content is rendered correctly without corruption
+    expect(markdown).toContain("`backticks`");
+    expect(markdown).toContain("**bold**");
+    expect(markdown).toContain("*italic*");
+    expect(markdown).toContain("[links](http://example.com)");
+  });
+});
@@ -0,0 +1,378 @@
+import { mkdir, mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { bootstrap, putSchema } from "@uncaged/json-cas";
+import { createFsStore } from "@uncaged/json-cas-fs";
+import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
+import { STEP_NODE_SCHEMA } from "@uncaged/workflow-protocol";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { cmdStepList } from "../commands/step.js";
+import { cmdThreadRead } from "../commands/thread.js";
+import { registerUwfSchemas } from "../schemas.js";
+import { saveThreadsIndex } from "../store.js";
+
+// ── schemas ──────────────────────────────────────────────────────────────────
+
+const TURN_SCHEMA = {
+  title: "hermes-turn",
+  type: "object" as const,
+  required: ["index", "role", "content"],
+  properties: {
+    index: { type: "integer" as const },
+    role: { type: "string" as const },
+    content: { type: "string" as const },
+    toolCalls: {
+      anyOf: [
+        { type: "array" as const, items: { type: "object" as const } },
+        { type: "null" as const },
+      ],
+    },
+    reasoning: { anyOf: [{ type: "string" as const }, { type: "null" as const }] },
+  },
+  additionalProperties: false,
+};
+
+const DETAIL_SCHEMA = {
+  title: "hermes-detail",
+  type: "object" as const,
+  required: ["sessionId", "model", "duration", "turnCount", "turns"],
+  properties: {
+    sessionId: { type: "string" as const },
+    model: { type: "string" as const },
+    duration: { type: "integer" as const },
+    turnCount: { type: "integer" as const },
+    turns: {
+      type: "array" as const,
+      items: { type: "string" as const, format: "cas_ref" },
+    },
+  },
+  additionalProperties: false,
+};
+
+// ── helpers ──────────────────────────────────────────────────────────────────
+
+async function registerDetailSchemas(store: ReturnType<typeof createFsStore>) {
+  await bootstrap(store);
+  const [turn, detail] = await Promise.all([
+    putSchema(store, TURN_SCHEMA),
+    putSchema(store, DETAIL_SCHEMA),
+  ]);
+  return { turn, detail };
+}
+
+// ── fixture ──────────────────────────────────────────────────────────────────
+
+let tmpDir: string;
+
+beforeEach(async () => {
+  tmpDir = await mkdtemp(join(tmpdir(), "cli-uwf-step-timing-test-"));
+});
+
+afterEach(async () => {
+  await rm(tmpDir, { recursive: true, force: true });
+});
+
+// ── 1. Protocol types (compile-time) ─────────────────────────────────────────
+
+describe("protocol types", () => {
+  test("StepRecord has startedAtMs and completedAtMs as required fields", () => {
+    // Type-level test: this block compiles only if fields exist and are number
+    const record: import("@uncaged/workflow-protocol").StepRecord = {
+      role: "test",
+      output: "hash1" as CasRef,
+      detail: "hash2" as CasRef,
+      agent: "uwf-test",
+      edgePrompt: "",
+      startedAtMs: 1000,
+      completedAtMs: 2000,
+    };
+    expect(record.startedAtMs).toBe(1000);
+    expect(record.completedAtMs).toBe(2000);
+  });
+
+  test("StepEntry has durationMs as required field", () => {
+    const entry: import("@uncaged/workflow-protocol").StepEntry = {
+      hash: "hash" as CasRef,
+      role: "test",
+      output: {},
+      detail: "hash2" as CasRef,
+      agent: "uwf-test",
+      timestamp: 123,
+      durationMs: 5000,
+    };
+    expect(entry.durationMs).toBe(5000);
+  });
+});
+
+// ── 2. JSON Schema ───────────────────────────────────────────────────────────
+
+describe("StepNode JSON schema", () => {
+  test("schema requires startedAtMs and completedAtMs", () => {
+    const required = STEP_NODE_SCHEMA.required as string[];
+    expect(required).toContain("startedAtMs");
+    expect(required).toContain("completedAtMs");
+  });
+
+  test("schema defines timing fields as integer", () => {
+    const props = STEP_NODE_SCHEMA.properties as Record<string, { type: string }>;
+    expect(props.startedAtMs.type).toBe("integer");
+    expect(props.completedAtMs.type).toBe("integer");
+  });
+
+  test("StepNode with timing fields passes CAS validation", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: "placeholder0000" as CasRef,
+      prompt: "test",
+    });
+
+    const outputHash = await store.put(schemas.text, "output text");
+
+    const detailSchemas = await registerDetailSchemas(store);
+    const detailHash = await store.put(detailSchemas.detail, {
+      sessionId: "s1",
+      model: "m1",
+      duration: 100,
+      turnCount: 0,
+      turns: [],
+    });
+
+    // Should succeed — valid timing fields
+    const hash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-test",
+      edgePrompt: "",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+    expect(hash).toBeTruthy();
+  });
+});
+
+// ── 3. step list — durationMs computed ───────────────────────────────────────
+
+describe("step list timing", () => {
+  test("step list includes durationMs = completedAtMs - startedAtMs", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "test",
+    });
+
+    const outputHash = await store.put(schemas.text, "output");
+    const detailHash = await store.put(detailSchemas.detail, {
+      sessionId: "s1",
+      model: "m1",
+      duration: 100,
+      turnCount: 0,
+      turns: [],
+    });
+
+    const startedAt = 1716600000000;
+    const completedAt = 1716600003500;
+
+    const stepHash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-test",
+      edgePrompt: "",
+      startedAtMs: startedAt,
+      completedAtMs: completedAt,
+    });
+
+    const threadId = "01HX2Q3R4S5T6V7W8X9YZ1" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHash });
+
+    const result = await cmdStepList(tmpDir, threadId);
+    const stepEntries = result.steps.slice(1); // skip start entry
+    expect(stepEntries).toHaveLength(1);
+
+    const step = stepEntries[0] as import("@uncaged/workflow-protocol").StepEntry;
+    expect(step.durationMs).toBe(3500);
+  });
+});
+
+// ── 4. thread read — duration in header ──────────────────────────────────────
+
+describe("thread read timing", () => {
+  test("thread read header includes Duration", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "Do work",
+          capabilities: [],
+          procedure: "work",
+          output: "result",
+          frontmatter: "placeholder0000" as CasRef,
+        },
+      },
+      graph: {
+        $START: { _: { role: "worker", prompt: "go" } },
+        worker: { _: { role: "$END", prompt: "" } },
+      },
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "test task",
+    });
+
+    const turnHash = await store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content: "Done.",
+      toolCalls: null,
+      reasoning: null,
+    });
+    const detailHash = await store.put(detailSchemas.detail, {
+      sessionId: "s1",
+      model: "m1",
+      duration: 100,
+      turnCount: 1,
+      turns: [turnHash],
+    });
+    const outputHash = await store.put(schemas.text, "output");
+
+    const stepHash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-test",
+      edgePrompt: "",
+      startedAtMs: 1716600000000,
+      completedAtMs: 1716600042000,
+    });
+
+    const threadId = "01HX2Q3R4S5T6V7W8X9YZ3" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, 10000, null, false);
+    expect(markdown).toContain("**Duration:** 42.0s");
+  });
+
+  test("thread read shows sub-second duration as ms", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "Do work",
+          capabilities: [],
+          procedure: "work",
+          output: "result",
+          frontmatter: "placeholder0000" as CasRef,
+        },
+      },
+      graph: {
+        $START: { _: { role: "worker", prompt: "go" } },
+        worker: { _: { role: "$END", prompt: "" } },
+      },
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "test",
+    });
+
+    const turnHash = await store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content: "Done.",
+      toolCalls: null,
+      reasoning: null,
+    });
+    const detailHash = await store.put(detailSchemas.detail, {
+      sessionId: "s1",
+      model: "m1",
+      duration: 100,
+      turnCount: 1,
+      turns: [turnHash],
+    });
+    const outputHash = await store.put(schemas.text, "output");
+
+    const stepHash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-test",
+      edgePrompt: "",
+      startedAtMs: 1716600000000,
+      completedAtMs: 1716600000350,
+    });
+
+    const threadId = "01HX2Q3R4S5T6V7W8X9YZ4" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, 10000, null, false);
+    expect(markdown).toContain("**Duration:** 350ms");
+  });
+});
+
+// ── 6. Breaking change — old data without timing fails ───────────────────────
+
+describe("breaking change", () => {
+  test("StepNode schema rejects payload without timing fields", () => {
+    const required = STEP_NODE_SCHEMA.required as string[];
+    // Both fields must be in the required array
+    expect(required).toContain("startedAtMs");
+    expect(required).toContain("completedAtMs");
+
+    // Payload without timing fields would fail schema validation
+    // because the schema marks them as required
+    const payloadWithoutTiming = {
+      start: "hash1",
+      prev: null,
+      role: "worker",
+      output: "hash2",
+      detail: "hash3",
+      agent: "uwf-test",
+      edgePrompt: "",
+    };
+    // Verify the payload is missing required fields
+    expect(payloadWithoutTiming).not.toHaveProperty("startedAtMs");
+    expect(payloadWithoutTiming).not.toHaveProperty("completedAtMs");
+  });
+});
@@ -0,0 +1,550 @@
+import { mkdir, mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
+import { extractUlidTimestamp, generateUlid } from "@uncaged/workflow-util";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { createMarker, deleteMarker } from "../background/index.js";
+import { cmdThreadList } from "../commands/thread.js";
+import { parseTimeInput } from "../commands/thread-time-parser.js";
+import type { UwfStore } from "../store.js";
+import { appendThreadHistory, createUwfStore, saveThreadsIndex } from "../store.js";
+
+// ── helpers ───────────────────────────────────────────────────────────────────
+
+async function makeUwfStore(storageRoot: string): Promise<UwfStore> {
+  const casDir = join(storageRoot, "cas");
+  await mkdir(casDir, { recursive: true });
+  return createUwfStore(storageRoot);
+}
+
+async function createTestWorkflow(uwf: UwfStore): Promise<CasRef> {
+  const workflowPayload = {
+    name: "test-workflow",
+    roles: {
+      role1: {
+        goal: "test goal",
+        outputSchema: { type: "object" as const, properties: {} },
+      },
+    },
+    graph: { start: "role1" },
+    conditions: {},
+  };
+  return await uwf.store.put(uwf.schemas.workflow, workflowPayload);
+}
+
+async function createTestThread(
+  uwf: UwfStore,
+  storageRoot: string,
+  workflowHash: CasRef,
+  timestamp: number,
+): Promise<ThreadId> {
+  const threadId = generateUlid(timestamp) as ThreadId;
+  const startPayload = {
+    workflow: workflowHash,
+    prompt: "test prompt",
+  };
+  const headHash = await uwf.store.put(uwf.schemas.startNode, startPayload);
+  const index = await import("../store.js").then((m) => m.loadThreadsIndex(storageRoot));
+  index[threadId] = headHash;
+  await saveThreadsIndex(storageRoot, index);
+  return threadId;
+}
+
+async function markThreadRunning(storageRoot: string, threadId: ThreadId, workflow: CasRef) {
+  await createMarker(storageRoot, {
+    thread: threadId,
+    workflow,
+    pid: process.pid, // Use current process PID so isPidAlive returns true
+    startedAt: Date.now(),
+  });
+}
+
+async function completeThread(
+  storageRoot: string,
+  threadId: ThreadId,
+  workflowHash: CasRef,
+  headHash: CasRef,
+) {
+  const index = await import("../store.js").then((m) => m.loadThreadsIndex(storageRoot));
+  delete index[threadId];
+  await saveThreadsIndex(storageRoot, index);
+  await appendThreadHistory(storageRoot, {
+    thread: threadId,
+    workflow: workflowHash,
+    head: headHash,
+    completedAt: Date.now(),
+  });
+}
+
+// ── test setup ────────────────────────────────────────────────────────────────
+
+let tmpDir: string;
+
+beforeEach(async () => {
+  tmpDir = await mkdtemp(join(tmpdir(), "thread-list-filters-test-"));
+});
+
+afterEach(async () => {
+  await rm(tmpDir, { recursive: true, force: true });
+});
+
+// ── status filter tests ───────────────────────────────────────────────────────
+
+describe("cmdThreadList status filter", () => {
+  test("should return idle and running threads when status=active", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const thread1 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 3000);
+    const thread2 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 2000);
+    const thread3 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 1000);
+
+    await markThreadRunning(tmpDir, thread2, workflowHash);
+
+    const index = await import("../store.js").then((m) => m.loadThreadsIndex(tmpDir));
+    const thread3Head = index[thread3];
+    if (thread3Head === undefined) throw new Error("thread3 head not found");
+    await completeThread(tmpDir, thread3, workflowHash, thread3Head);
+
+    const result = await cmdThreadList(tmpDir, ["idle", "running"], null, null, null, null);
+
+    expect(result).toHaveLength(2);
+    expect(result.map((r) => r.thread).sort()).toEqual([thread1, thread2].sort());
+
+    // Clean up marker after test
+    await deleteMarker(tmpDir, thread2);
+  });
+
+  test("should support comma-separated status values", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const thread1 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 3000);
+    const thread2 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 2000);
+    const thread3 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 1000);
+
+    await markThreadRunning(tmpDir, thread2, workflowHash);
+
+    const index = await import("../store.js").then((m) => m.loadThreadsIndex(tmpDir));
+    const thread3Head = index[thread3];
+    if (thread3Head === undefined) throw new Error("thread3 head not found");
+    await completeThread(tmpDir, thread3, workflowHash, thread3Head);
+
+    const result = await cmdThreadList(tmpDir, ["idle", "completed"], null, null, null, null);
+
+    // Clean up marker
+    await deleteMarker(tmpDir, thread2);
+
+    // thread2 is running (not idle), so should not be included
+    // Expected: thread1 (idle) and thread3 (completed)
+    expect(result).toHaveLength(2);
+    expect(result.map((r) => r.thread).sort()).toEqual([thread1, thread3].sort());
+  });
+
+  test("should support single status filter (backward compat)", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const _thread1 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 3000);
+    const _thread2 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 2000);
+    const thread3 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 1000);
+
+    const index = await import("../store.js").then((m) => m.loadThreadsIndex(tmpDir));
+    const thread3Head = index[thread3];
+    if (thread3Head === undefined) throw new Error("thread3 head not found");
+    await completeThread(tmpDir, thread3, workflowHash, thread3Head);
+
+    const result = await cmdThreadList(tmpDir, ["completed"], null, null, null, null);
+
+    expect(result).toHaveLength(1);
+    expect(result[0]?.thread).toBe(thread3);
+    expect(result[0]?.status).toBe("completed");
+  });
+
+  test("should return all threads when no status filter provided", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const thread1 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 3000);
+    const thread2 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 2000);
+    const thread3 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 1000);
+
+    await markThreadRunning(tmpDir, thread2, workflowHash);
+
+    const index = await import("../store.js").then((m) => m.loadThreadsIndex(tmpDir));
+    const thread3Head = index[thread3];
+    if (thread3Head === undefined) throw new Error("thread3 head not found");
+    await completeThread(tmpDir, thread3, workflowHash, thread3Head);
+
+    const result = await cmdThreadList(tmpDir, null, null, null, null, null);
+
+    expect(result).toHaveLength(3);
+    expect(result.map((r) => r.thread).sort()).toEqual([thread1, thread2, thread3].sort());
+  });
+});
+
+// ── time range filtering tests ────────────────────────────────────────────────
+
+describe("cmdThreadList time filters", () => {
+  test("should filter threads created after given timestamp", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const ts1 = Date.UTC(2026, 4, 20, 0, 0, 0);
+    const ts2 = Date.UTC(2026, 4, 21, 0, 0, 0);
+    const ts3 = Date.UTC(2026, 4, 22, 0, 0, 0);
+
+    const _threadA = await createTestThread(uwf, tmpDir, workflowHash, ts1);
+    const threadB = await createTestThread(uwf, tmpDir, workflowHash, ts2);
+    const threadC = await createTestThread(uwf, tmpDir, workflowHash, ts3);
+
+    // Use a timestamp slightly before ts2 to include threadB
+    const afterMs = Date.UTC(2026, 4, 20, 12, 0, 0);
+    const result = await cmdThreadList(tmpDir, null, afterMs, null, null, null);
+
+    expect(result).toHaveLength(2);
+    expect(result.map((r) => r.thread).sort()).toEqual([threadB, threadC].sort());
+  });
+
+  test("should filter threads created before given timestamp", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const ts1 = Date.UTC(2026, 4, 20, 0, 0, 0);
+    const ts2 = Date.UTC(2026, 4, 21, 0, 0, 0);
+    const ts3 = Date.UTC(2026, 4, 22, 0, 0, 0);
+
+    const threadA = await createTestThread(uwf, tmpDir, workflowHash, ts1);
+    const threadB = await createTestThread(uwf, tmpDir, workflowHash, ts2);
+    const _threadC = await createTestThread(uwf, tmpDir, workflowHash, ts3);
+
+    const beforeMs = Date.UTC(2026, 4, 22, 0, 0, 0);
+    const result = await cmdThreadList(tmpDir, null, null, beforeMs, null, null);
+
+    expect(result).toHaveLength(2);
+    expect(result.map((r) => r.thread).sort()).toEqual([threadA, threadB].sort());
+  });
+
+  test("should support both after and before filters (time range)", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const ts1 = Date.UTC(2026, 4, 20, 0, 0, 0);
+    const ts2 = Date.UTC(2026, 4, 21, 0, 0, 0);
+    const ts3 = Date.UTC(2026, 4, 22, 0, 0, 0);
+
+    const _threadA = await createTestThread(uwf, tmpDir, workflowHash, ts1);
+    const threadB = await createTestThread(uwf, tmpDir, workflowHash, ts2);
+    const _threadC = await createTestThread(uwf, tmpDir, workflowHash, ts3);
+
+    const afterMs = Date.UTC(2026, 4, 20, 12, 0, 0);
+    const beforeMs = Date.UTC(2026, 4, 22, 0, 0, 0);
+    const result = await cmdThreadList(tmpDir, null, afterMs, beforeMs, null, null);
+
+    expect(result).toHaveLength(1);
+    expect(result[0]?.thread).toBe(threadB);
+  });
+});
+
+// ── pagination tests ──────────────────────────────────────────────────────────
+
+describe("cmdThreadList pagination", () => {
+  test("should limit results with --take", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const threads: ThreadId[] = [];
+    for (let i = 0; i < 10; i++) {
+      threads.push(await createTestThread(uwf, tmpDir, workflowHash, Date.now() - i * 1000));
+    }
+
+    const result = await cmdThreadList(tmpDir, null, null, null, null, 5);
+
+    expect(result).toHaveLength(5);
+  });
+
+  test("should skip first N threads with --skip", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const threads: ThreadId[] = [];
+    // Create threads in chronological order, but they'll be sorted newest first
+    for (let i = 0; i < 10; i++) {
+      threads.push(await createTestThread(uwf, tmpDir, workflowHash, Date.now() + i * 100));
+      // Small delay to ensure distinct timestamps
+      await new Promise((resolve) => setTimeout(resolve, 10));
+    }
+
+    const result = await cmdThreadList(tmpDir, null, null, null, 3, null);
+
+    expect(result).toHaveLength(7);
+    // The 3 newest threads should be skipped, so we should get the 7 oldest
+  });
+
+  test("should support skip + take for pagination", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const threads: ThreadId[] = [];
+    for (let i = 0; i < 10; i++) {
+      threads.push(await createTestThread(uwf, tmpDir, workflowHash, Date.now() + i * 100));
+      await new Promise((resolve) => setTimeout(resolve, 10));
+    }
+
+    const result = await cmdThreadList(tmpDir, null, null, null, 5, 3);
+
+    expect(result).toHaveLength(3);
+    // Should skip first 5 (newest), then take 3
+  });
+
+  test("should handle take > available threads", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const _thread1 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 3000);
+    const _thread2 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 2000);
+    const _thread3 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 1000);
+
+    const result = await cmdThreadList(tmpDir, null, null, null, null, 10);
+
+    expect(result).toHaveLength(3);
+  });
+
+  test("should return empty array when skip >= thread count", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 3000);
+    await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 2000);
+    await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 1000);
+
+    const result = await cmdThreadList(tmpDir, null, null, null, 5, null);
+
+    expect(result).toHaveLength(0);
+  });
+});
+
+// ── combined filters tests ────────────────────────────────────────────────────
+
+describe("combined filters", () => {
+  test("should combine status and time range filters", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const ts1 = Date.UTC(2026, 4, 20, 0, 0, 0);
+    const ts2 = Date.UTC(2026, 4, 21, 0, 0, 0);
+    const ts3 = Date.UTC(2026, 4, 22, 0, 0, 0);
+    const ts4 = Date.UTC(2026, 4, 23, 0, 0, 0);
+
+    const _thread1 = await createTestThread(uwf, tmpDir, workflowHash, ts1);
+    const thread2 = await createTestThread(uwf, tmpDir, workflowHash, ts2);
+    const thread3 = await createTestThread(uwf, tmpDir, workflowHash, ts3);
+    const thread4 = await createTestThread(uwf, tmpDir, workflowHash, ts4);
+
+    await markThreadRunning(tmpDir, thread2, workflowHash);
+
+    const index = await import("../store.js").then((m) => m.loadThreadsIndex(tmpDir));
+    const thread3Head = index[thread3];
+    if (thread3Head === undefined) throw new Error("thread3 head not found");
+    await completeThread(tmpDir, thread3, workflowHash, thread3Head);
+
+    const afterMs = Date.UTC(2026, 4, 20, 12, 0, 0);
+    const result = await cmdThreadList(tmpDir, ["idle"], afterMs, null, null, null);
+
+    expect(result).toHaveLength(1);
+    expect(result[0]?.thread).toBe(thread4);
+    expect(result[0]?.status).toBe("idle");
+
+    // Clean up marker
+    await deleteMarker(tmpDir, thread2);
+  });
+
+  test("should combine status filter and pagination", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const threads: ThreadId[] = [];
+    for (let i = 9; i >= 0; i--) {
+      const thread = await createTestThread(uwf, tmpDir, workflowHash, Date.now() + i * 1000);
+      threads.push(thread);
+      const index = await import("../store.js").then((m) => m.loadThreadsIndex(tmpDir));
+      const headHash = index[thread];
+      if (headHash === undefined) throw new Error("head not found");
+      await completeThread(tmpDir, thread, workflowHash, headHash);
+    }
+
+    const result = await cmdThreadList(tmpDir, ["completed"], null, null, 3, 5);
+
+    expect(result).toHaveLength(5);
+    for (const r of result) {
+      expect(r.status).toBe("completed");
+    }
+  });
+
+  test("should combine time range and pagination", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const threads: ThreadId[] = [];
+    for (let i = 0; i < 20; i++) {
+      const ts = Date.UTC(2026, 4, 1 + i, 0, 0, 0);
+      threads.push(await createTestThread(uwf, tmpDir, workflowHash, ts));
+    }
+
+    const afterMs = Date.UTC(2026, 4, 10, 0, 0, 0);
+    const result = await cmdThreadList(tmpDir, null, afterMs, null, 2, 5);
+
+    expect(result).toHaveLength(5);
+    for (const r of result) {
+      const ts = extractUlidTimestamp(r.thread);
+      expect(ts).not.toBeNull();
+      if (ts !== null) {
+        expect(ts).toBeGreaterThan(afterMs);
+      }
+    }
+  });
+
+  async function setupMixedStatusThreads(
+    uwf: UwfStore,
+    workflowHash: string,
+    count: number,
+  ): Promise<ThreadId[]> {
+    const threads: ThreadId[] = [];
+    for (let i = 0; i < count; i++) {
+      const ts = Date.UTC(2026, 4, 10 + i, 0, 0, 0);
+      const thread = await createTestThread(uwf, tmpDir, workflowHash, ts);
+      threads.push(thread);
+
+      if (i % 2 === 0) {
+        const index = await import("../store.js").then((m) => m.loadThreadsIndex(tmpDir));
+        const headHash = index[thread];
+        if (headHash === undefined) throw new Error("head not found");
+        await completeThread(tmpDir, thread, workflowHash, headHash);
+      } else {
+        await markThreadRunning(tmpDir, thread, workflowHash);
+      }
+    }
+    return threads;
+  }
+
+  async function cleanupRunningMarkers(threads: ThreadId[]): Promise<void> {
+    for (let i = 0; i < threads.length; i++) {
+      if (i % 2 !== 0) {
+        await deleteMarker(tmpDir, threads[i] as ThreadId);
+      }
+    }
+  }
+
+  test("should combine all filters (status + time + pagination)", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+    const threads = await setupMixedStatusThreads(uwf, workflowHash, 15);
+
+    const afterMs = Date.UTC(2026, 4, 14, 12, 0, 0);
+    const beforeMs = Date.UTC(2026, 4, 20, 0, 0, 0);
+    const result = await cmdThreadList(tmpDir, ["idle", "running"], afterMs, beforeMs, 1, 3);
+
+    expect(result.length).toBeLessThanOrEqual(3);
+    for (const r of result) {
+      expect(["idle", "running"]).toContain(r.status);
+      const ts = extractUlidTimestamp(r.thread);
+      if (ts !== null) {
+        expect(ts).toBeGreaterThan(afterMs);
+        expect(ts).toBeLessThan(beforeMs);
+      }
+    }
+
+    await cleanupRunningMarkers(threads);
+  });
+});
+
+// ── edge cases tests ──────────────────────────────────────────────────────────
+
+describe("edge cases", () => {
+  test("should handle empty thread list", async () => {
+    await makeUwfStore(tmpDir);
+    const result = await cmdThreadList(tmpDir, null, null, null, null, null);
+    expect(result).toHaveLength(0);
+  });
+
+  test("should skip threads with invalid ULID when time filtering", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const workflowHash = await createTestWorkflow(uwf);
+
+    const thread1 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 2000);
+    const thread2 = await createTestThread(uwf, tmpDir, workflowHash, Date.now() - 1000);
+
+    const index = await import("../store.js").then((m) => m.loadThreadsIndex(tmpDir));
+    index["INVALID_ULID_FORMAT_HERE" as ThreadId] = "01J6HMVRNQKJV2";
+    await saveThreadsIndex(tmpDir, index);
+
+    const afterMs = Date.now() - 3000;
+    const result = await cmdThreadList(tmpDir, null, afterMs, null, null, null);
+
+    expect(result).toHaveLength(2);
+    expect(result.map((r) => r.thread).sort()).toEqual([thread1, thread2].sort());
+  });
+});
+
+// ── time parsing tests ────────────────────────────────────────────────────────
+
+describe("relative time parsing", () => {
+  test("should parse '7d' as 7 days ago", () => {
+    const nowMs = Date.UTC(2026, 4, 24, 12, 0, 0);
+    const result = parseTimeInput("7d", nowMs);
+    const expected = Date.UTC(2026, 4, 17, 12, 0, 0);
+    expect(result).toBe(expected);
+  });
+
+  test("should parse '24h' as 24 hours ago", () => {
+    const nowMs = Date.UTC(2026, 4, 24, 12, 0, 0);
+    const result = parseTimeInput("24h", nowMs);
+    const expected = Date.UTC(2026, 4, 23, 12, 0, 0);
+    expect(result).toBe(expected);
+  });
+
+  test("should parse '30m' as 30 minutes ago", () => {
+    const nowMs = Date.UTC(2026, 4, 24, 12, 30, 0);
+    const result = parseTimeInput("30m", nowMs);
+    const expected = Date.UTC(2026, 4, 24, 12, 0, 0);
+    expect(result).toBe(expected);
+  });
+
+  test("should parse '1d' as 1 day ago", () => {
+    const nowMs = Date.UTC(2026, 4, 24, 0, 0, 0);
+    const result = parseTimeInput("1d", nowMs);
+    const expected = Date.UTC(2026, 4, 23, 0, 0, 0);
+    expect(result).toBe(expected);
+  });
+});
+
+describe("ISO date parsing", () => {
+  test("should parse ISO date (YYYY-MM-DD)", () => {
+    const nowMs = Date.now();
+    const result = parseTimeInput("2026-05-20", nowMs);
+    const expected = Date.UTC(2026, 4, 20, 0, 0, 0);
+    expect(result).toBe(expected);
+  });
+
+  test("should parse ISO datetime (YYYY-MM-DDTHH:MM:SS)", () => {
+    const nowMs = Date.now();
+    const result = parseTimeInput("2026-05-20T14:30:00", nowMs);
+    const expected = Date.parse("2026-05-20T14:30:00");
+    expect(result).toBe(expected);
+  });
+
+  test("should parse ISO datetime with Z suffix", () => {
+    const nowMs = Date.now();
+    const result = parseTimeInput("2026-05-20T14:30:00Z", nowMs);
+    const expected = Date.UTC(2026, 4, 20, 14, 30, 0);
+    expect(result).toBe(expected);
+  });
+
+  test("should reject invalid date formats", () => {
+    const nowMs = Date.now();
+    expect(() => parseTimeInput("not-a-date", nowMs)).toThrow();
+    expect(() => parseTimeInput("2026-13-01", nowMs)).toThrow();
+    expect(() => parseTimeInput("invalid", nowMs)).toThrow();
+  });
+});
@@ -0,0 +1,597 @@
+import { mkdir, mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { bootstrap, putSchema } from "@uncaged/json-cas";
+import { createFsStore } from "@uncaged/json-cas-fs";
+import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { cmdThreadRead } from "../commands/thread.js";
+import { registerUwfSchemas } from "../schemas.js";
+import { saveThreadsIndex } from "../store.js";
+
+// ── schemas used in tests ────────────────────────────────────────────────────
+
+const TURN_SCHEMA = {
+  title: "hermes-turn",
+  type: "object" as const,
+  required: ["index", "role", "content"],
+  properties: {
+    index: { type: "integer" as const },
+    role: { type: "string" as const },
+    content: { type: "string" as const },
+    toolCalls: {
+      anyOf: [
+        { type: "array" as const, items: { type: "object" as const } },
+        { type: "null" as const },
+      ],
+    },
+    reasoning: { anyOf: [{ type: "string" as const }, { type: "null" as const }] },
+  },
+  additionalProperties: false,
+};
+
+const DETAIL_SCHEMA = {
+  title: "hermes-detail",
+  type: "object" as const,
+  required: ["sessionId", "model", "duration", "turnCount", "turns"],
+  properties: {
+    sessionId: { type: "string" as const },
+    model: { type: "string" as const },
+    duration: { type: "integer" as const },
+    turnCount: { type: "integer" as const },
+    turns: {
+      type: "array" as const,
+      items: { type: "string" as const, format: "cas_ref" },
+    },
+  },
+  additionalProperties: false,
+};
+
+// ── helpers ───────────────────────────────────────────────────────────────────
+
+async function registerDetailSchemas(store: ReturnType<typeof createFsStore>) {
+  await bootstrap(store);
+  const [turn, detail] = await Promise.all([
+    putSchema(store, TURN_SCHEMA),
+    putSchema(store, DETAIL_SCHEMA),
+  ]);
+  return { turn, detail };
+}
+
+function generateContent(size: number, prefix = "Content"): string {
+  const base = `${prefix} `;
+  const repeat = Math.ceil(size / base.length);
+  return base.repeat(repeat).slice(0, size);
+}
+
+// ── fixture ───────────────────────────────────────────────────────────────────
+
+let tmpDir: string;
+
+beforeEach(async () => {
+  tmpDir = await mkdtemp(join(tmpdir(), "cli-uwf-quota-test-"));
+});
+
+afterEach(async () => {
+  await rm(tmpDir, { recursive: true, force: true });
+});
+
+// ── thread read quota enforcement ─────────────────────────────────────────────
+
+describe("thread read --quota flag", () => {
+  test("test 1: basic quota enforcement with 3 steps", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    // Create 3 steps with ~500 chars each
+    const steps: CasRef[] = [];
+    for (let i = 1; i <= 3; i++) {
+      const content = generateContent(500, `Step${i}`);
+      const turnHash = await store.put(detailSchemas.turn, {
+        index: 0,
+        role: "assistant",
+        content,
+        toolCalls: null,
+        reasoning: null,
+      });
+      const detailHash = await store.put(detailSchemas.detail, {
+        sessionId: `session-${i}`,
+        model: "test-model",
+        duration: 1000,
+        turnCount: 1,
+        turns: [turnHash],
+      });
+      const stepHash = await store.put(schemas.stepNode, {
+        start: startHash,
+        prev: steps[i - 2] ?? null,
+        role: "worker",
+        output: outputHash,
+        detail: detailHash,
+        agent: "uwf-test",
+        startedAtMs: 1000000000000,
+        completedAtMs: 1000000005000,
+      });
+      steps.push(stepHash);
+    }
+
+    const threadId = "01HX2Q3R4S5T6V7W8X9YZ0" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: steps[2] as CasRef });
+
+    // Set quota to 800 chars - should only fit most recent steps
+    const markdown = await cmdThreadRead(tmpDir, threadId, 800, null, false);
+
+    // Quota must be reasonably enforced (allow ~200 char tolerance for skip hint)
+    expect(markdown.length).toBeLessThanOrEqual(1000);
+
+    // Should contain skip hint since not all steps fit
+    expect(markdown).toMatch(/earlier step/);
+
+    // Most recent step should be included
+    expect(markdown).toMatch(/Step3/);
+  });
+
+  test("test 2: quota check order - verifies bug is fixed", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    // Create 2 steps: first=300 chars, second=600 chars
+    const step1Content = generateContent(300, "First");
+    const step1TurnHash = await store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content: step1Content,
+      toolCalls: null,
+      reasoning: null,
+    });
+    const step1DetailHash = await store.put(detailSchemas.detail, {
+      sessionId: "session-1",
+      model: "test-model",
+      duration: 1000,
+      turnCount: 1,
+      turns: [step1TurnHash],
+    });
+    const step1Hash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: step1DetailHash,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const step2Content = generateContent(600, "Second");
+    const step2TurnHash = await store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content: step2Content,
+      toolCalls: null,
+      reasoning: null,
+    });
+    const step2DetailHash = await store.put(detailSchemas.detail, {
+      sessionId: "session-2",
+      model: "test-model",
+      duration: 1000,
+      turnCount: 1,
+      turns: [step2TurnHash],
+    });
+    const step2Hash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: step1Hash,
+      role: "worker",
+      output: outputHash,
+      detail: step2DetailHash,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const threadId = "01HX2Q3R4S5T6V7W8X9YZ1" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: step2Hash });
+
+    // Set quota to 500 chars
+    const markdown = await cmdThreadRead(tmpDir, threadId, 500, null, false);
+
+    // Bug fix verification: output must be limited (allow ~200 char tolerance)
+    expect(markdown.length).toBeLessThanOrEqual(1100);
+
+    // Should contain "Second" (most recent step)
+    expect(markdown).toMatch(/Second/);
+
+    // Should skip first step
+    expect(markdown).toMatch(/earlier step/);
+
+    // Verify improvement: before fix would be ~1264, now should be much closer to 500
+    expect(markdown.length).toBeLessThan(1200);
+  });
+
+  test("test 3: quota with --start section", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task with a moderately long prompt to test quota accounting",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    // Create 2 steps
+    const steps: CasRef[] = [];
+    for (let i = 1; i <= 2; i++) {
+      const content = generateContent(400, `Step${i}`);
+      const turnHash = await store.put(detailSchemas.turn, {
+        index: 0,
+        role: "assistant",
+        content,
+        toolCalls: null,
+        reasoning: null,
+      });
+      const detailHash = await store.put(detailSchemas.detail, {
+        sessionId: `session-${i}`,
+        model: "test-model",
+        duration: 1000,
+        turnCount: 1,
+        turns: [turnHash],
+      });
+      const stepHash = await store.put(schemas.stepNode, {
+        start: startHash,
+        prev: steps[i - 2] ?? null,
+        role: "worker",
+        output: outputHash,
+        detail: detailHash,
+        agent: "uwf-test",
+        startedAtMs: 1000000000000,
+        completedAtMs: 1000000005000,
+      });
+      steps.push(stepHash);
+    }
+
+    const threadId = "01HX2Q3R4S5T6V7W8X9YZ2" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: steps[1] as CasRef });
+
+    // Set tight quota with --start flag
+    const markdown = await cmdThreadRead(tmpDir, threadId, 600, null, true);
+
+    // Quota must be reasonably enforced (allow ~260 char tolerance for structure)
+    expect(markdown.length).toBeLessThanOrEqual(860);
+
+    // Should contain thread header
+    expect(markdown).toMatch(/# Thread/);
+    expect(markdown).toMatch(/test-wf/);
+  });
+
+  test("test 5a: quota edge case - minimal quota", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const content = generateContent(500, "Test");
+    const turnHash = await store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content,
+      toolCalls: null,
+      reasoning: null,
+    });
+    const detailHash = await store.put(detailSchemas.detail, {
+      sessionId: "session-1",
+      model: "test-model",
+      duration: 1000,
+      turnCount: 1,
+      turns: [turnHash],
+    });
+    const stepHash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const threadId = "01HX2Q3R4S5T6V7W8X9YZ4" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHash });
+
+    // Minimal quota
+    const markdown = await cmdThreadRead(tmpDir, threadId, 1, null, false);
+
+    // Should handle gracefully - always shows at least one step
+    expect(markdown.length).toBeGreaterThan(1);
+    expect(markdown).toMatch(/Test/);
+  });
+
+  test("test 5b: quota edge case - very large quota", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    // Create 3 steps
+    const steps: CasRef[] = [];
+    for (let i = 1; i <= 3; i++) {
+      const content = generateContent(300, `Step${i}`);
+      const turnHash = await store.put(detailSchemas.turn, {
+        index: 0,
+        role: "assistant",
+        content,
+        toolCalls: null,
+        reasoning: null,
+      });
+      const detailHash = await store.put(detailSchemas.detail, {
+        sessionId: `session-${i}`,
+        model: "test-model",
+        duration: 1000,
+        turnCount: 1,
+        turns: [turnHash],
+      });
+      const stepHash = await store.put(schemas.stepNode, {
+        start: startHash,
+        prev: steps[i - 2] ?? null,
+        role: "worker",
+        output: outputHash,
+        detail: detailHash,
+        agent: "uwf-test",
+        startedAtMs: 1000000000000,
+        completedAtMs: 1000000005000,
+      });
+      steps.push(stepHash);
+    }
+
+    const threadId = "01HX2Q3R4S5T6V7W8X9YZ5" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: steps[2] as CasRef });
+
+    // Very large quota
+    const markdown = await cmdThreadRead(tmpDir, threadId, 1000000, null, false);
+
+    // Should show all steps (no skipping)
+    expect(markdown).not.toMatch(/earlier step/);
+    expect(markdown).toMatch(/Step1/);
+    expect(markdown).toMatch(/Step2/);
+    expect(markdown).toMatch(/Step3/);
+  });
+
+  test("test 6: quota with --before parameter", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    // Create 5 steps
+    const steps: CasRef[] = [];
+    for (let i = 1; i <= 5; i++) {
+      const content = generateContent(300, `Step${i}`);
+      const turnHash = await store.put(detailSchemas.turn, {
+        index: 0,
+        role: "assistant",
+        content,
+        toolCalls: null,
+        reasoning: null,
+      });
+      const detailHash = await store.put(detailSchemas.detail, {
+        sessionId: `session-${i}`,
+        model: "test-model",
+        duration: 1000,
+        turnCount: 1,
+        turns: [turnHash],
+      });
+      const stepHash = await store.put(schemas.stepNode, {
+        start: startHash,
+        prev: steps[i - 2] ?? null,
+        role: "worker",
+        output: outputHash,
+        detail: detailHash,
+        agent: "uwf-test",
+        startedAtMs: 1000000000000,
+        completedAtMs: 1000000005000,
+      });
+      steps.push(stepHash);
+    }
+
+    const threadId = "01HX2Q3R4S5T6V7W8X9YZ6" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: steps[4] as CasRef });
+
+    // Use --before to limit to steps 1-2, then set quota that allows only 1
+    const markdown = await cmdThreadRead(tmpDir, threadId, 500, steps[2] as CasRef, false);
+
+    // Should not contain Step3 or later
+    expect(markdown).not.toMatch(/Step3/);
+    expect(markdown).not.toMatch(/Step4/);
+    expect(markdown).not.toMatch(/Step5/);
+
+    // Quota should select most recent of candidates (Step2)
+    expect(markdown).toMatch(/Step2/);
+
+    // Quota enforcement (allow ~200 char tolerance)
+    expect(markdown.length).toBeLessThanOrEqual(700);
+  });
+});
@@ -0,0 +1,707 @@
+import { mkdir, mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { bootstrap, putSchema } from "@uncaged/json-cas";
+import { createFsStore } from "@uncaged/json-cas-fs";
+import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { cmdThreadRead, THREAD_READ_DEFAULT_QUOTA } from "../commands/thread.js";
+import { registerUwfSchemas } from "../schemas.js";
+import type { UwfStore } from "../store.js";
+import { saveThreadsIndex } from "../store.js";
+
+// ── schemas used in tests ────────────────────────────────────────────────────
+
+const TURN_SCHEMA = {
+  title: "hermes-turn",
+  type: "object" as const,
+  required: ["index", "role", "content"],
+  properties: {
+    index: { type: "integer" as const },
+    role: { type: "string" as const },
+    content: { type: "string" as const },
+    toolCalls: {
+      anyOf: [
+        { type: "array" as const, items: { type: "object" as const } },
+        { type: "null" as const },
+      ],
+    },
+    reasoning: { anyOf: [{ type: "string" as const }, { type: "null" as const }] },
+  },
+  additionalProperties: false,
+};
+
+const DETAIL_SCHEMA = {
+  title: "hermes-detail",
+  type: "object" as const,
+  required: ["sessionId", "model", "duration", "turnCount", "turns"],
+  properties: {
+    sessionId: { type: "string" as const },
+    model: { type: "string" as const },
+    duration: { type: "integer" as const },
+    turnCount: { type: "integer" as const },
+    turns: {
+      type: "array" as const,
+      items: { type: "string" as const, format: "cas_ref" },
+    },
+  },
+  additionalProperties: false,
+};
+
+// ── helpers ───────────────────────────────────────────────────────────────────
+
+async function makeUwfStore(storageRoot: string): Promise<UwfStore> {
+  const casDir = join(storageRoot, "cas");
+  await mkdir(casDir, { recursive: true });
+  const store = createFsStore(casDir);
+  const schemas = await registerUwfSchemas(store);
+  return { storageRoot, store, schemas };
+}
+
+async function registerDetailSchemas(store: ReturnType<typeof createFsStore>) {
+  await bootstrap(store);
+  const [turn, detail] = await Promise.all([
+    putSchema(store, TURN_SCHEMA),
+    putSchema(store, DETAIL_SCHEMA),
+  ]);
+  return { turn, detail };
+}
+
+// ── fixture ───────────────────────────────────────────────────────────────────
+
+let tmpDir: string;
+
+beforeEach(async () => {
+  tmpDir = await mkdtemp(join(tmpdir(), "cli-uwf-test-"));
+});
+
+afterEach(async () => {
+  await rm(tmpDir, { recursive: true, force: true });
+});
+
+// ── thread read XML tag isolation ─────────────────────────────────────────────
+
+describe("thread read XML tag isolation", () => {
+  test("scenario 1: wraps output in XML tags instead of heading", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const detailSchemas = await registerDetailSchemas(uwf.store);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        planner: {
+          description: "Planner",
+          goal: "You are a planning agent. Your task is to...",
+          capabilities: [],
+          procedure: "Plan the work.",
+          output: "Summarize the plan.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Fix issue #459",
+    });
+
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const turnHash = await uwf.store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content:
+        "---\nstatus: ready\nplan: CMWGHQKT58RY4\n---\n\n# Analysis Complete\n## Issue Summary\nThe issue requires XML tag isolation.",
+      toolCalls: null,
+      reasoning: null,
+    });
+    const detailHash = await uwf.store.put(detailSchemas.detail, {
+      sessionId: "sx",
+      model: "mx",
+      duration: 500,
+      turnCount: 1,
+      turns: [turnHash],
+    });
+
+    const stepHash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "planner",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-claude-code",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const threadId = "01JTEST0000000000000001" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+
+    // Should wrap output in XML tags
+    expect(markdown).toContain("<output>");
+    expect(markdown).toContain("</output>");
+
+    // Should not have ### Content heading
+    expect(markdown).not.toContain("### Content");
+
+    // Should preserve markdown headings inside output tags
+    expect(markdown).toContain("# Analysis Complete");
+    expect(markdown).toContain("## Issue Summary");
+  });
+
+  test("scenario 2: wraps prompt in XML tags", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const detailSchemas = await registerDetailSchemas(uwf.store);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        planner: {
+          description: "Planner",
+          goal: "You are a planning agent. Your task is to analyze and plan.",
+          capabilities: [],
+          procedure: "Plan the work.",
+          output: "Summarize the plan.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Fix issue",
+    });
+
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const turnHash = await uwf.store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content: "---\nstatus: ready\n---\n\nContent here...",
+      toolCalls: null,
+      reasoning: null,
+    });
+    const detailHash = await uwf.store.put(detailSchemas.detail, {
+      sessionId: "sx",
+      model: "mx",
+      duration: 500,
+      turnCount: 1,
+      turns: [turnHash],
+    });
+
+    const stepHash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "planner",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-claude-code",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const threadId = "01JTEST0000000000000002" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+
+    // Should wrap prompt in XML tags
+    expect(markdown).toContain("<prompt>");
+    expect(markdown).toContain("</prompt>");
+    expect(markdown).toContain("You are a planning agent. Your task is to analyze and plan.");
+
+    // Should not have ### Prompt heading
+    expect(markdown).not.toContain("### Prompt");
+
+    // Should wrap output in XML tags
+    expect(markdown).toContain("<output>");
+    expect(markdown).toContain("</output>");
+  });
+
+  test("scenario 3: same role repeated does not show prompt twice", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        writer: {
+          description: "Writer",
+          goal: "You are a writer agent.",
+          capabilities: [],
+          procedure: "Write content.",
+          output: "Summarize writing.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Write something",
+    });
+
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const step1 = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "writer",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const step2 = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: step1 as CasRef,
+      role: "writer",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const threadId = "01JTEST0000000000000003" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: step2 });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+
+    // Should only show prompt tags once
+    const promptCount = (markdown.match(/<prompt>/g) ?? []).length;
+    expect(promptCount).toBe(1);
+  });
+
+  test("scenario 4: step with no detail shows no output tags", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do work.",
+          output: "Summarize work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Do stuff",
+    });
+
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const stepHash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const threadId = "01JTEST0000000000000004" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+
+    // Should not have output tags
+    expect(markdown).not.toContain("<output>");
+    expect(markdown).not.toContain("</output>");
+
+    // Step header should still be displayed
+    expect(markdown).toContain("## Step 1: worker");
+
+    // Prompt should still be shown
+    expect(markdown).toContain("<prompt>");
+  });
+
+  test("scenario 5: empty content shows no output tags", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Do stuff",
+    });
+
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    // A detail ref that doesn't exist → extractLastAssistantContent returns null
+    const missingDetailRef = "missingdetail0" as CasRef;
+
+    const stepHash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: missingDetailRef,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const threadId = "01JTEST0000000000000005" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+
+    // Should not have output tags
+    expect(markdown).not.toContain("<output>");
+    expect(markdown).not.toContain("</output>");
+  });
+
+  test("scenario 6: thread read with --start flag shows task section", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        roleA: {
+          description: "Role A",
+          goal: "Goal for roleA",
+          capabilities: [],
+          procedure: "Do stuff.",
+          output: "Output.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Initial prompt",
+    });
+
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const stepHash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "roleA",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const threadId = "01JTEST0000000000000006" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, true);
+
+    // Should include task section
+    expect(markdown).toContain("# Thread");
+    expect(markdown).toContain("## Task");
+    expect(markdown).toContain("Initial prompt");
+
+    // Prompts should use XML tags
+    expect(markdown).toContain("<prompt>");
+  });
+
+  test("scenario 7: thread read with --before parameter", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        roleA: {
+          description: "Role A",
+          goal: "Goal for roleA",
+          capabilities: [],
+          procedure: "Do stuff.",
+          output: "Output.",
+          meta: "placeholder00" as CasRef,
+        },
+        roleB: {
+          description: "Role B",
+          goal: "Goal for roleB",
+          capabilities: [],
+          procedure: "Do stuff.",
+          output: "Output.",
+          meta: "placeholder00" as CasRef,
+        },
+        roleC: {
+          description: "Role C",
+          goal: "Goal for roleC",
+          capabilities: [],
+          procedure: "Do stuff.",
+          output: "Output.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Initial prompt",
+    });
+
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const step1 = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "roleA",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const step2 = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: step1 as CasRef,
+      role: "roleB",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const step3 = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: step2 as CasRef,
+      role: "roleC",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const threadId = "01JTEST0000000000000007" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: step3 });
+
+    const markdown = await cmdThreadRead(
+      tmpDir,
+      threadId,
+      THREAD_READ_DEFAULT_QUOTA,
+      step2 as CasRef,
+      false,
+    );
+
+    // Should only show roleA
+    expect(markdown).toContain("roleA");
+    expect(markdown).not.toContain("roleB");
+    expect(markdown).not.toContain("roleC");
+
+    // Should use XML tags
+    expect(markdown).toContain("<prompt>");
+  });
+
+  test("scenario 9: special characters in content are preserved", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const detailSchemas = await registerDetailSchemas(uwf.store);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        writer: {
+          description: "Writer",
+          goal: "You are a writer.",
+          capabilities: [],
+          procedure: "Write content.",
+          output: "Summarize.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Write something",
+    });
+
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const turnHash = await uwf.store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content: "Content with <special> & characters > like <this>",
+      toolCalls: null,
+      reasoning: null,
+    });
+    const detailHash = await uwf.store.put(detailSchemas.detail, {
+      sessionId: "sx",
+      model: "mx",
+      duration: 500,
+      turnCount: 1,
+      turns: [turnHash],
+    });
+
+    const stepHash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "writer",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-test",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const threadId = "01JTEST0000000000000008" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+
+    // Special characters should be preserved as-is
+    expect(markdown).toContain("Content with <special> & characters > like <this>");
+  });
+
+  test("scenario 10: quota limit with XML tags", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        roleA: {
+          description: "Role A",
+          goal: "Goal for roleA",
+          capabilities: [],
+          procedure: "Do stuff.",
+          output: "Output.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Initial prompt",
+    });
+
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const steps: CasRef[] = [];
+    let prev: CasRef | null = null;
+    for (let i = 0; i < 5; i++) {
+      const step = (await uwf.store.put(uwf.schemas.stepNode, {
+        start: startHash,
+        prev,
+        role: "roleA",
+        output: outputHash,
+        detail: null,
+        agent: "uwf-test",
+        startedAtMs: 1000000000000,
+        completedAtMs: 1000000005000,
+      })) as CasRef;
+      steps.push(step);
+      prev = step;
+    }
+
+    const threadId = "01JTEST0000000000000009" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: steps[steps.length - 1]! });
+
+    // Use very small quota
+    const markdown = await cmdThreadRead(tmpDir, threadId, 1, null, false);
+
+    // Should have skip hint
+    expect(markdown).toContain("earlier step");
+
+    // Should have XML tags for displayed steps
+    if (markdown.includes("<prompt>")) {
+      expect(markdown).toContain("</prompt>");
+    }
+  });
+});
@@ -0,0 +1,71 @@
+import { execFileSync } from "node:child_process";
+import { join } from "node:path";
+import { describe, expect, test } from "vitest";
+
+const CLI_PATH = join(import.meta.dirname, "..", "cli.js");
+
+function runCli(args: string[]): { stdout: string; stderr: string; exitCode: number } {
+  try {
+    const stdout = execFileSync("bun", ["run", CLI_PATH, ...args], {
+      encoding: "utf8",
+      env: { ...process.env, WORKFLOW_STORAGE_ROOT: "/tmp/uwf-test-nonexistent" },
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+    return { stdout, stderr: "", exitCode: 0 };
+  } catch (e: unknown) {
+    const err = e as NodeJS.ErrnoException & { stdout?: string; stderr?: string; status?: number };
+    return {
+      stdout: err.stdout ?? "",
+      stderr: err.stderr ?? "",
+      exitCode: err.status ?? 1,
+    };
+  }
+}
+
+describe("thread exec --count CLI parsing", () => {
+  test("--help shows -c/--count option", () => {
+    const result = runCli(["thread", "exec", "--help"]);
+    expect(result.stdout).toContain("--count");
+    expect(result.stdout).toContain("-c");
+  });
+
+  test("description says 'one or more steps'", () => {
+    const result = runCli(["thread", "exec", "--help"]);
+    expect(result.stdout).toContain("one or more steps");
+  });
+});
+
+describe("cmdThreadExec count logic", () => {
+  test("count=0 fails with validation error", () => {
+    const result = runCli(["thread", "exec", "FAKE_THREAD_ID", "-c", "0"]);
+    expect(result.exitCode).not.toBe(0);
+    expect(result.stderr).toContain("positive integer");
+  });
+
+  test("negative count fails with validation error", () => {
+    const result = runCli(["thread", "exec", "FAKE_THREAD_ID", "-c", "-1"]);
+    expect(result.exitCode).not.toBe(0);
+    expect(result.stderr).toContain("positive integer");
+  });
+
+  test("non-integer count fails with validation error", () => {
+    const result = runCli(["thread", "exec", "FAKE_THREAD_ID", "-c", "1.5"]);
+    expect(result.exitCode).not.toBe(0);
+    expect(result.stderr).toContain("positive integer");
+  });
+
+  test("count=1 is the default (no -c flag)", () => {
+    // Without -c, it should attempt to run 1 step (failing on missing thread, not on count validation)
+    const result = runCli(["thread", "exec", "FAKE_THREAD_ID"]);
+    expect(result.exitCode).not.toBe(0);
+    // Should NOT contain "positive integer" error — should fail on thread lookup instead
+    expect(result.stderr).not.toContain("positive integer");
+  });
+
+  test("count=3 passes validation (fails on thread lookup)", () => {
+    const result = runCli(["thread", "exec", "FAKE_THREAD_ID", "-c", "3"]);
+    expect(result.exitCode).not.toBe(0);
+    // Should NOT contain "positive integer" error — should fail on thread/storage lookup
+    expect(result.stderr).not.toContain("positive integer");
+  });
+});
@@ -5,15 +5,15 @@ import { bootstrap, putSchema } from "@uncaged/json-cas";
 import { createFsStore } from "@uncaged/json-cas-fs";
 import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
 import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { cmdStepList, cmdStepShow } from "../commands/step.js";
 import {
  cmdThreadRead,
-  cmdThreadStepDetails,
  extractLastAssistantContent,
  THREAD_READ_DEFAULT_QUOTA,
 } from "../commands/thread.js";
 import { registerUwfSchemas } from "../schemas.js";
 import type { UwfStore } from "../store.js";
-import { saveThreadsIndex } from "../store.js";
+import { appendThreadHistory, saveThreadsIndex } from "../store.js";

 // ── schemas used in tests ────────────────────────────────────────────────────

@@ -198,10 +198,10 @@ describe("extractLastAssistantContent", () => {
  });
 });

-// ── cmdThreadRead: ### Content section ───────────────────────────────────────
+// ── cmdThreadRead: <output> section ──────────────────────────────────────────

-describe("cmdThreadRead ### Content section", () => {
-  test("includes ### Content before ### Output when detail has assistant turns", async () => {
+describe("cmdThreadRead <output> section", () => {
+  test("includes <output> tags when detail has assistant turns", async () => {
    const uwf = await makeUwfStore(tmpDir);
    const detailSchemas = await registerDetailSchemas(uwf.store);

@@ -211,11 +211,11 @@ describe("cmdThreadRead ### Content section", () => {
      roles: {
        writer: {
          description: "Write",
-          identity: "You are a writer.",
-          prepare: "",
-          execute: "Write content as requested.",
-          report: "Summarize what was written.",
-          outputSchema: "placeholder00" as CasRef,
+          goal: "You are a writer.",
+          capabilities: [],
+          procedure: "Write content as requested.",
+          output: "Summarize what was written.",
+          meta: "placeholder00" as CasRef,
        },
      },
      conditions: {},
@@ -264,17 +264,13 @@ describe("cmdThreadRead ### Content section", () => {

    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);

-    expect(markdown).toContain("### Content");
+    expect(markdown).toContain("<output>");
+    expect(markdown).toContain("</output>");
    expect(markdown).toContain("The assistant response text");
-
-    const contentIdx = markdown.indexOf("### Content");
-    const outputIdx = markdown.indexOf("### Output");
-    expect(contentIdx).toBeGreaterThanOrEqual(0);
-    expect(outputIdx).toBeGreaterThanOrEqual(0);
-    expect(contentIdx).toBeLessThan(outputIdx);
+    expect(markdown).not.toContain("### Content");
  });

-  test("omits ### Content when detail has no matching assistant turns", async () => {
+  test("omits <output> tags when detail has no matching assistant turns", async () => {
    const uwf = await makeUwfStore(tmpDir);

    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
@@ -313,14 +309,15 @@ describe("cmdThreadRead ### Content section", () => {

    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);

+    expect(markdown).not.toContain("<output>");
+    expect(markdown).not.toContain("</output>");
    expect(markdown).not.toContain("### Content");
-    expect(markdown).toContain("### Output");
  });
 });

-// ── cmdThreadStepDetails ──────────────────────────────────────────────────────
+// ── cmdStepShow ───────────────────────────────────────────────────────────────

-describe("cmdThreadStepDetails", () => {
+describe("cmdStepShow", () => {
  test("returns expanded detail node with turns inlined", async () => {
    const uwf = await makeUwfStore(tmpDir);
    const detailSchemas = await registerDetailSchemas(uwf.store);
@@ -368,7 +365,7 @@ describe("cmdThreadStepDetails", () => {
      agent: "uwf-hermes",
    });

-    const result = await cmdThreadStepDetails(tmpDir, stepHash);
+    const result = await cmdStepShow(tmpDir, stepHash);

    expect(result).toMatchObject({
      sessionId: "sess42",
@@ -387,8 +384,646 @@ describe("cmdThreadStepDetails", () => {
      content: "done",
    });
  });
+});

-  test("throws when step hash does not exist", async () => {
-    await expect(cmdThreadStepDetails(tmpDir, "nonexistenth0" as CasRef)).rejects.toThrow();
+// ── cmdThreadRead: <prompt> deduplication ────────────────────────────────────
+
+describe("cmdThreadRead <prompt> deduplication", () => {
+  async function makeThreadWithRoles(uwf: UwfStore, roles: string[]): Promise<string> {
+    const roleMap: Record<string, unknown> = {};
+    for (const r of [...new Set(roles)]) {
+      roleMap[r] = {
+        description: r,
+        goal: `Goal for ${r}`,
+        capabilities: [],
+        procedure: "Do stuff.",
+        output: "Output.",
+        meta: "placeholder00" as CasRef,
+      };
+    }
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "dedup-wf",
+      description: "desc",
+      roles: roleMap,
+      conditions: {},
+      graph: {},
+    });
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Start",
+    });
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    let prev: string | null = null;
+    let stepHash = "";
+    for (const role of roles) {
+      stepHash = await uwf.store.put(uwf.schemas.stepNode, {
+        start: startHash,
+        prev: prev as CasRef | null,
+        role,
+        output: outputHash,
+        detail: null,
+        agent: "uwf-test",
+      });
+      prev = stepHash;
+    }
+    return stepHash;
+  }
+
+  test("same consecutive role shows <prompt> once", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const headHash = await makeThreadWithRoles(uwf, ["writer", "writer"]);
+    const threadId = "01JTEST0000000000000003" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: headHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+    const count = (markdown.match(/<prompt>/g) ?? []).length;
+    expect(count).toBe(1);
+  });
+
+  test("different consecutive roles each show <prompt>", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const headHash = await makeThreadWithRoles(uwf, ["planner", "coder"]);
+    const threadId = "01JTEST0000000000000004" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: headHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+    const count = (markdown.match(/<prompt>/g) ?? []).length;
+    expect(count).toBe(2);
+  });
+
+  test("non-consecutive same role shows <prompt> twice", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const headHash = await makeThreadWithRoles(uwf, ["roleA", "roleB", "roleA"]);
+    const threadId = "01JTEST0000000000000005" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: headHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+    const count = (markdown.match(/<prompt>/g) ?? []).length;
+    expect(count).toBe(2);
+  });
+});
+
+// ── cmdThreadRead: showStart / before / quota ─────────────────────────────────
+
+describe("cmdThreadRead start section / before / quota", () => {
+  async function makeSimpleThread(
+    uwf: UwfStore,
+    roles: string[],
+  ): Promise<{ startHash: CasRef; stepHashes: CasRef[] }> {
+    const uniqueRoles = [...new Set(roles)];
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "simple-wf",
+      description: "desc",
+      roles: Object.fromEntries(
+        uniqueRoles.map((r) => [
+          r,
+          {
+            description: r,
+            goal: `Goal for ${r}`,
+            capabilities: [],
+            procedure: "Do stuff.",
+            output: "Output.",
+            meta: "placeholder00" as CasRef,
+          },
+        ]),
+      ),
+      conditions: {},
+      graph: {},
+    });
+    const startHash = (await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Initial prompt",
+    })) as CasRef;
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const stepHashes: CasRef[] = [];
+    let prev: CasRef | null = null;
+    for (const role of roles) {
+      const stepHash = (await uwf.store.put(uwf.schemas.stepNode, {
+        start: startHash,
+        prev,
+        role,
+        output: outputHash,
+        detail: null,
+        agent: "uwf-test",
+      })) as CasRef;
+      stepHashes.push(stepHash);
+      prev = stepHash;
+    }
+    return { startHash, stepHashes };
+  }
+
+  test("showStart=true includes # Thread header and ## Task section", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const { stepHashes } = await makeSimpleThread(uwf, ["roleA"]);
+    const threadId = "01JTEST0000000000000006" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHashes[stepHashes.length - 1]! });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, true);
+    expect(markdown).toContain("# Thread");
+    expect(markdown).toContain("## Task");
+    expect(markdown).toContain("Initial prompt");
+  });
+
+  test("showStart=false with before=null still shows # Thread header (default behavior)", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const { stepHashes } = await makeSimpleThread(uwf, ["roleA"]);
+    const threadId = "01JTEST0000000000000007" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHashes[stepHashes.length - 1]! });
+
+    // When before=null, the start section is always shown regardless of showStart
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+    expect(markdown).toContain("# Thread");
+    expect(markdown).toContain("## Task");
+  });
+
+  test("before filter: only steps before the given hash appear", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const { stepHashes } = await makeSimpleThread(uwf, ["roleA", "roleB", "roleC"]);
+    const [_hashA, hashB, hashC] = stepHashes as [CasRef, CasRef, CasRef];
+    const threadId = "01JTEST0000000000000008" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: hashC });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, hashB, false);
+    expect(markdown).toContain("roleA");
+    expect(markdown).not.toContain("roleB");
+    expect(markdown).not.toContain("roleC");
+  });
+
+  test("quota=1 limits output and includes skip hint", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const { stepHashes } = await makeSimpleThread(uwf, ["roleA", "roleB", "roleC"]);
+    const threadId = "01JTEST000000000000000A" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHashes[stepHashes.length - 1]! });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, 1, null, false);
+    expect(markdown).toContain("earlier step");
+  });
+
+  test("all steps fit in quota: no skip hint", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const { stepHashes } = await makeSimpleThread(uwf, ["roleA"]);
+    const threadId = "01JTEST000000000000000B" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHashes[0]! });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+    expect(markdown).not.toContain("earlier step");
+  });
+});
+
+// ── Tests that call process.exit must be last ─────────────────────────────────
+
+describe("cmdStepShow (process.exit tests - must be last)", () => {
+  test("throws when step hash does not exist", async () => {
+    await expect(cmdStepShow(tmpDir, "nonexistenth0" as CasRef)).rejects.toThrow();
+  });
+
+  test("before with unknown hash rejects", async () => {
+    const _uwf = await makeUwfStore(tmpDir);
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const uwfStore: UwfStore = { storageRoot: tmpDir, store, schemas };
+
+    const workflowHash = await uwfStore.store.put(uwfStore.schemas.workflow, {
+      name: "wf2",
+      description: "",
+      roles: {
+        roleA: {
+          description: "r",
+          goal: "g",
+          capabilities: [],
+          procedure: "p",
+          output: "o",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+    const startHash = await uwfStore.store.put(uwfStore.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "p",
+    });
+    const outputHash = await uwfStore.store.put(uwfStore.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+    const stepHash = await uwfStore.store.put(uwfStore.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "roleA",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+    });
+    await saveThreadsIndex(tmpDir, { ["01JTEST000000000000000C" as ThreadId]: stepHash as CasRef });
+
+    await expect(
+      cmdThreadRead(
+        tmpDir,
+        "01JTEST000000000000000C" as ThreadId,
+        THREAD_READ_DEFAULT_QUOTA,
+        "unknownhash0" as CasRef,
+        false,
+      ),
+    ).rejects.toThrow();
+  });
+});
+
+// ── cmdStepList / cmdStepShow: completed threads ──────────────────────────────
+
+describe("cmdStepList with completed threads", () => {
+  test("lists steps from active thread", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf-active",
+      description: "desc",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Start prompt",
+    });
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const step1Hash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "role1",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+    });
+    const step2Hash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: step1Hash,
+      role: "role2",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+    });
+    const step3Hash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: step2Hash,
+      role: "role3",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+    });
+
+    const threadId = "01JTEST0000000000000000A1" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: step3Hash });
+
+    const result = await cmdStepList(tmpDir, threadId);
+
+    expect(result.thread).toBe(threadId);
+    expect(result.steps).toHaveLength(4); // start + 3 steps
+    expect(result.steps[1].role).toBe("role1");
+    expect(result.steps[2].role).toBe("role2");
+    expect(result.steps[3].role).toBe("role3");
+  });
+
+  test("lists steps from completed thread", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf-completed",
+      description: "desc",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Start prompt",
+    });
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const step1Hash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "roleA",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+    });
+    const step2Hash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: step1Hash,
+      role: "roleB",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+    });
+
+    const threadId = "01JTEST0000000000000000A2" as ThreadId;
+    // Thread is NOT in threads.yaml (simulating completed thread)
+    await saveThreadsIndex(tmpDir, {});
+    // But it IS in history.jsonl
+    await appendThreadHistory(tmpDir, {
+      thread: threadId,
+      workflow: workflowHash,
+      head: step2Hash,
+      completedAt: Date.now(),
+    });
+
+    const result = await cmdStepList(tmpDir, threadId);
+
+    expect(result.thread).toBe(threadId);
+    expect(result.steps).toHaveLength(3); // start + 2 steps
+    expect(result.steps[1].role).toBe("roleA");
+    expect(result.steps[2].role).toBe("roleB");
+  });
+});
+
+describe("cmdStepShow with completed threads", () => {
+  test("shows step detail from active thread", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const detailSchemas = await registerDetailSchemas(uwf.store);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf-step-active",
+      description: "desc",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "p",
+    });
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const turnHash = await uwf.store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content: "Active thread response",
+      toolCalls: null,
+      reasoning: null,
+    });
+    const detailHash = await uwf.store.put(detailSchemas.detail, {
+      sessionId: "sess-active",
+      model: "model-x",
+      duration: 1234,
+      turnCount: 1,
+      turns: [turnHash],
+    });
+
+    const stepHash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "coder",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-hermes",
+    });
+
+    const threadId = "01JTEST0000000000000000B1" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: stepHash });
+
+    const result = await cmdStepShow(tmpDir, stepHash);
+
+    expect(result).toMatchObject({
+      sessionId: "sess-active",
+      model: "model-x",
+      duration: 1234,
+      turnCount: 1,
+    });
+  });
+
+  test("shows step detail from completed thread", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const detailSchemas = await registerDetailSchemas(uwf.store);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf-step-completed",
+      description: "desc",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "p",
+    });
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const turnHash = await uwf.store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content: "Completed thread response",
+      toolCalls: null,
+      reasoning: null,
+    });
+    const detailHash = await uwf.store.put(detailSchemas.detail, {
+      sessionId: "sess-completed",
+      model: "model-y",
+      duration: 5678,
+      turnCount: 1,
+      turns: [turnHash],
+    });
+
+    const stepHash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "reviewer",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-hermes",
+    });
+
+    const threadId = "01JTEST0000000000000000B2" as ThreadId;
+    // Thread is NOT in threads.yaml
+    await saveThreadsIndex(tmpDir, {});
+    // But it IS in history.jsonl
+    await appendThreadHistory(tmpDir, {
+      thread: threadId,
+      workflow: workflowHash,
+      head: stepHash,
+      completedAt: Date.now(),
+    });
+
+    const result = await cmdStepShow(tmpDir, stepHash);
+
+    expect(result).toMatchObject({
+      sessionId: "sess-completed",
+      model: "model-y",
+      duration: 5678,
+      turnCount: 1,
+    });
+  });
+});
+
+describe("cmdThreadRead with completed threads", () => {
+  test("reads completed thread context", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf-read-completed",
+      description: "desc",
+      roles: {
+        writer: {
+          description: "Write",
+          goal: "You are a writer.",
+          capabilities: [],
+          procedure: "Write content.",
+          output: "Summary.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Write something",
+    });
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const stepHash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "writer",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-hermes",
+    });
+
+    const threadId = "01JTEST0000000000000000C1" as ThreadId;
+    // Thread is NOT in threads.yaml
+    await saveThreadsIndex(tmpDir, {});
+    // But it IS in history.jsonl
+    await appendThreadHistory(tmpDir, {
+      thread: threadId,
+      workflow: workflowHash,
+      head: stepHash,
+      completedAt: Date.now(),
+    });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+
+    expect(markdown).toContain("writer");
+    expect(markdown).toContain("Write something");
+  });
+
+  test("reads completed thread with before filter", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "test-wf-read-before",
+      description: "desc",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Do task",
+    });
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const step1Hash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "roleX",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+    });
+    const step2Hash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: step1Hash,
+      role: "roleY",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+    });
+    const step3Hash = await uwf.store.put(uwf.schemas.stepNode, {
+      start: startHash,
+      prev: step2Hash,
+      role: "roleZ",
+      output: outputHash,
+      detail: null,
+      agent: "uwf-test",
+    });
+
+    const threadId = "01JTEST0000000000000000C2" as ThreadId;
+    await saveThreadsIndex(tmpDir, {});
+    await appendThreadHistory(tmpDir, {
+      thread: threadId,
+      workflow: workflowHash,
+      head: step3Hash,
+      completedAt: Date.now(),
+    });
+
+    const markdown = await cmdThreadRead(
+      tmpDir,
+      threadId,
+      THREAD_READ_DEFAULT_QUOTA,
+      step2Hash,
+      false,
+    );
+
+    // Should contain step1 (roleX) but not step2 (roleY) or step3 (roleZ)
+    expect(markdown).toContain("roleX");
+    expect(markdown).not.toContain("roleY");
+    expect(markdown).not.toContain("roleZ");
  });
 });
@@ -0,0 +1,470 @@
+import type { WorkflowPayload } from "@uncaged/workflow-protocol";
+import { describe, expect, test } from "vitest";
+import { validateWorkflow } from "../validate-semantic.js";
+
+/** Build a valid two-role workflow that passes all checks. */
+function makeWorkflow(overrides?: Partial<WorkflowPayload>): WorkflowPayload {
+  const base: WorkflowPayload = {
+    name: "test-workflow",
+    description: "A test workflow",
+    roles: {
+      writer: {
+        description: "Writes content",
+        goal: "Write content",
+        capabilities: ["writing"],
+        procedure: "Write it",
+        output: "The content",
+        frontmatter: {
+          type: "object",
+          properties: {
+            $status: { enum: ["_"] },
+            plan: { type: "string" },
+          },
+          required: ["$status", "plan"],
+        } as unknown as string,
+      },
+      reviewer: {
+        description: "Reviews content",
+        goal: "Review content",
+        capabilities: ["reviewing"],
+        procedure: "Review it",
+        output: "The review",
+        frontmatter: {
+          type: "object",
+          oneOf: [
+            {
+              properties: {
+                $status: { const: "approved" },
+                summary: { type: "string" },
+              },
+              required: ["$status", "summary"],
+            },
+            {
+              properties: {
+                $status: { const: "rejected" },
+                reason: { type: "string" },
+              },
+              required: ["$status", "reason"],
+            },
+          ],
+        } as unknown as string,
+      },
+    },
+    graph: {
+      $START: { _: { role: "writer", prompt: "Begin writing" } },
+      writer: { _: { role: "reviewer", prompt: "Review this: {{{plan}}}" } },
+      reviewer: {
+        approved: { role: "$END", prompt: "Done: {{{summary}}}" },
+        rejected: { role: "writer", prompt: "Fix: {{{reason}}}" },
+      },
+    },
+  };
+
+  if (!overrides) return base;
+  return { ...base, ...overrides };
+}
+
+describe("Suite 1: Role Reference Integrity", () => {
+  test("1.1 graph references unknown role", () => {
+    const wf = makeWorkflow();
+    wf.graph.nonexistent = { _: { role: "$END", prompt: "done" } };
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes('unknown role "nonexistent"'))).toBe(true);
+  });
+
+  test("1.2 orphan role not in graph", () => {
+    const wf = makeWorkflow();
+    wf.roles.orphan = {
+      description: "Orphan",
+      goal: "Nothing",
+      capabilities: [],
+      procedure: "None",
+      output: "None",
+      frontmatter: {
+        type: "object",
+        properties: { $status: { enum: ["_"] } },
+        required: ["$status"],
+      } as unknown as string,
+    };
+    const errors = validateWorkflow(wf);
+    expect(
+      errors.some((e) => e.includes('role "orphan" is defined but not referenced in graph')),
+    ).toBe(true);
+  });
+
+  test("1.3 $START in roles", () => {
+    const wf = makeWorkflow();
+    (wf.roles as Record<string, unknown>).$START = {
+      description: "Bad",
+      goal: "Bad",
+      capabilities: [],
+      procedure: "Bad",
+      output: "Bad",
+      frontmatter: { type: "object", properties: {}, required: [] },
+    };
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes('reserved name "$START"'))).toBe(true);
+  });
+
+  test("1.4 $END in roles", () => {
+    const wf = makeWorkflow();
+    (wf.roles as Record<string, unknown>).$END = {
+      description: "Bad",
+      goal: "Bad",
+      capabilities: [],
+      procedure: "Bad",
+      output: "Bad",
+      frontmatter: { type: "object", properties: {}, required: [] },
+    };
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes('reserved name "$END"'))).toBe(true);
+  });
+
+  test("1.5 valid workflow returns no errors", () => {
+    const wf = makeWorkflow();
+    const errors = validateWorkflow(wf);
+    expect(errors).toEqual([]);
+  });
+});
+
+describe("Suite 2: Graph Structure", () => {
+  test("2.1 $START missing from graph", () => {
+    const wf = makeWorkflow();
+    delete wf.graph.$START;
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes("$START must be defined in graph"))).toBe(true);
+  });
+
+  test("2.2 $START has multiple status keys", () => {
+    const wf = makeWorkflow();
+    wf.graph.$START = {
+      _: { role: "writer", prompt: "Begin" },
+      other: { role: "reviewer", prompt: "Also" },
+    };
+    const errors = validateWorkflow(wf);
+    expect(
+      errors.some((e) => e.includes('$START must have exactly one edge with status "_"')),
+    ).toBe(true);
+  });
+
+  test("2.3 $START edge uses non-_ status", () => {
+    const wf = makeWorkflow();
+    wf.graph.$START = { ready: { role: "writer", prompt: "Begin" } };
+    const errors = validateWorkflow(wf);
+    expect(
+      errors.some((e) => e.includes('$START must have exactly one edge with status "_"')),
+    ).toBe(true);
+  });
+
+  test("2.4 $END has outgoing edges", () => {
+    const wf = makeWorkflow();
+    wf.graph.$END = { _: { role: "writer", prompt: "Loop" } };
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes("$END must not have outgoing edges"))).toBe(true);
+  });
+
+  test("2.5 unreachable role", () => {
+    const wf = makeWorkflow();
+    wf.roles.isolated = {
+      description: "Isolated",
+      goal: "Isolated",
+      capabilities: [],
+      procedure: "Isolated",
+      output: "Isolated",
+      frontmatter: {
+        type: "object",
+        properties: { $status: { enum: ["_"] } },
+        required: ["$status"],
+      } as unknown as string,
+    };
+    wf.graph.isolated = { _: { role: "$END", prompt: "done" } };
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes('role "isolated" is not reachable from $START'))).toBe(
+      true,
+    );
+  });
+
+  test("2.6 edge target references invalid role", () => {
+    const wf = makeWorkflow();
+    wf.graph.writer = { _: { role: "ghost", prompt: "Go to ghost" } };
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes('unknown target role "ghost"'))).toBe(true);
+  });
+});
+
+describe("Suite 3: Status-Edge Consistency", () => {
+  test("3.1 single-exit role with multiple graph keys", () => {
+    const wf = makeWorkflow();
+    wf.graph.writer = {
+      _: { role: "reviewer", prompt: "Review" },
+      extra: { role: "$END", prompt: "Done" },
+    };
+    const errors = validateWorkflow(wf);
+    expect(
+      errors.some((e) =>
+        e.includes('role "writer" is single-exit but has status keys other than "_"'),
+      ),
+    ).toBe(true);
+  });
+
+  test("3.2 single-exit role missing _ key", () => {
+    const wf = makeWorkflow();
+    wf.graph.writer = { done: { role: "reviewer", prompt: "Review" } };
+    const errors = validateWorkflow(wf);
+    expect(
+      errors.some((e) => e.includes('role "writer" is single-exit but graph has no "_" key')),
+    ).toBe(true);
+  });
+
+  test("3.3 multi-exit role with extra statuses", () => {
+    const wf = makeWorkflow();
+    wf.graph.reviewer = {
+      approved: { role: "$END", prompt: "Done" },
+      rejected: { role: "writer", prompt: "Fix" },
+      timeout: { role: "$END", prompt: "Timed out" },
+    };
+    const errors = validateWorkflow(wf);
+    expect(
+      errors.some((e) => e.includes('role "reviewer" graph has extra status keys: timeout')),
+    ).toBe(true);
+  });
+
+  test("3.4 multi-exit role missing a status", () => {
+    const wf = makeWorkflow();
+    wf.graph.reviewer = {
+      approved: { role: "$END", prompt: "Done" },
+    };
+    const errors = validateWorkflow(wf);
+    expect(
+      errors.some((e) => e.includes('role "reviewer" graph is missing status keys: rejected')),
+    ).toBe(true);
+  });
+
+  test("3.5 multi-exit role with _ key", () => {
+    const wf = makeWorkflow();
+    wf.graph.reviewer = { _: { role: "$END", prompt: "Done" } };
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes('role "reviewer" is multi-exit but graph uses "_"'))).toBe(
+      true,
+    );
+  });
+});
+
+describe("Suite 3b: Enum-Based Multi-Exit", () => {
+  test("3b.1 enum multi-exit passes with matching graph keys", () => {
+    const wf = makeWorkflow();
+    wf.roles.reviewer = {
+      ...wf.roles.reviewer,
+      frontmatter: {
+        type: "object",
+        properties: {
+          $status: { enum: ["approved", "rejected"] },
+          comments: { type: "string" },
+        },
+        required: ["$status", "comments"],
+      } as unknown as string,
+    };
+    wf.graph.reviewer = {
+      approved: { role: "$END", prompt: "Done" },
+      rejected: { role: "writer", prompt: "Fix: {{{comments}}}" },
+    };
+    const errors = validateWorkflow(wf);
+    expect(errors).toEqual([]);
+  });
+
+  test("3b.2 enum multi-exit with extra graph key", () => {
+    const wf = makeWorkflow();
+    wf.roles.reviewer = {
+      ...wf.roles.reviewer,
+      frontmatter: {
+        type: "object",
+        properties: {
+          $status: { enum: ["approved", "rejected"] },
+          comments: { type: "string" },
+        },
+        required: ["$status", "comments"],
+      } as unknown as string,
+    };
+    wf.graph.reviewer = {
+      approved: { role: "$END", prompt: "Done" },
+      rejected: { role: "writer", prompt: "Fix" },
+      timeout: { role: "$END", prompt: "Timed out" },
+    };
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes("extra status keys: timeout"))).toBe(true);
+  });
+
+  test("3b.3 enum multi-exit with missing graph key", () => {
+    const wf = makeWorkflow();
+    wf.roles.reviewer = {
+      ...wf.roles.reviewer,
+      frontmatter: {
+        type: "object",
+        properties: {
+          $status: { enum: ["approved", "rejected"] },
+          comments: { type: "string" },
+        },
+        required: ["$status", "comments"],
+      } as unknown as string,
+    };
+    wf.graph.reviewer = {
+      approved: { role: "$END", prompt: "Done" },
+    };
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes("missing status keys: rejected"))).toBe(true);
+  });
+
+  test("3b.4 enum with single value (not multi-exit) treated as single-exit", () => {
+    const wf = makeWorkflow();
+    wf.roles.writer = {
+      ...wf.roles.writer,
+      frontmatter: {
+        type: "object",
+        properties: {
+          $status: { enum: ["_"] },
+          plan: { type: "string" },
+        },
+        required: ["$status", "plan"],
+      } as unknown as string,
+    };
+    wf.graph.writer = { _: { role: "reviewer", prompt: "Review: {{{plan}}}" } };
+    const errors = validateWorkflow(wf);
+    expect(errors).toEqual([]);
+  });
+
+  test("3b.5 enum multi-exit mustache var not in frontmatter", () => {
+    const wf = makeWorkflow();
+    wf.roles.reviewer = {
+      ...wf.roles.reviewer,
+      frontmatter: {
+        type: "object",
+        properties: {
+          $status: { enum: ["approved", "rejected"] },
+          comments: { type: "string" },
+        },
+        required: ["$status", "comments"],
+      } as unknown as string,
+    };
+    wf.graph.reviewer = {
+      approved: { role: "$END", prompt: "Done: {{{nonexistent}}}" },
+      rejected: { role: "writer", prompt: "Fix: {{{comments}}}" },
+    };
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes("nonexistent") && e.includes("not found"))).toBe(true);
+  });
+});
+
+describe("Suite 4: Mustache Template Variable Existence", () => {
+  test("4.1 prompt references nonexistent variable (single-exit)", () => {
+    const wf = makeWorkflow();
+    wf.graph.writer = { _: { role: "reviewer", prompt: "Review: {{{branch}}}" } };
+    const errors = validateWorkflow(wf);
+    expect(
+      errors.some((e) =>
+        e.includes('prompt variable "branch" not found in role "writer" frontmatter'),
+      ),
+    ).toBe(true);
+  });
+
+  test("4.2 prompt references nonexistent variable (multi-exit)", () => {
+    const wf = makeWorkflow();
+    wf.graph.reviewer = {
+      approved: { role: "$END", prompt: "Done: {{{branch}}}" },
+      rejected: { role: "writer", prompt: "Fix: {{{reason}}}" },
+    };
+    const errors = validateWorkflow(wf);
+    expect(
+      errors.some((e) =>
+        e.includes('prompt variable "branch" not found in role "reviewer" variant "approved"'),
+      ),
+    ).toBe(true);
+  });
+
+  test("4.3 valid mustache variables pass", () => {
+    const wf = makeWorkflow();
+    const errors = validateWorkflow(wf);
+    expect(errors).toEqual([]);
+  });
+
+  test("4.4 $status variable is always valid", () => {
+    const wf = makeWorkflow();
+    wf.graph.writer = { _: { role: "reviewer", prompt: "Status: {{$status}}" } };
+    const errors = validateWorkflow(wf);
+    expect(errors).toEqual([]);
+  });
+});
+
+describe("Suite 5: oneOf Discriminant Validity", () => {
+  test("5.1 oneOf without $status const", () => {
+    const wf = makeWorkflow();
+    wf.roles.reviewer = {
+      ...wf.roles.reviewer,
+      frontmatter: {
+        type: "object",
+        oneOf: [
+          { properties: { summary: { type: "string" } }, required: ["summary"] },
+          { properties: { reason: { type: "string" } }, required: ["reason"] },
+        ],
+      } as unknown as string,
+    };
+    const errors = validateWorkflow(wf);
+    expect(
+      errors.some((e) => e.includes('oneOf variants must have "$status" as const discriminant')),
+    ).toBe(true);
+  });
+
+  test("5.2 oneOf with non-const $status", () => {
+    const wf = makeWorkflow();
+    wf.roles.reviewer = {
+      ...wf.roles.reviewer,
+      frontmatter: {
+        type: "object",
+        oneOf: [
+          {
+            properties: { $status: { type: "string" }, summary: { type: "string" } },
+            required: ["$status", "summary"],
+          },
+          {
+            properties: { $status: { type: "string" }, reason: { type: "string" } },
+            required: ["$status", "reason"],
+          },
+        ],
+      } as unknown as string,
+    };
+    const errors = validateWorkflow(wf);
+    expect(errors.some((e) => e.includes("oneOf variant $status must be a const value"))).toBe(
+      true,
+    );
+  });
+
+  test("5.3 valid oneOf passes", () => {
+    const wf = makeWorkflow();
+    const errors = validateWorkflow(wf);
+    expect(errors).toEqual([]);
+  });
+});
+
+describe("Suite 6: Multiple Errors Collection", () => {
+  test("6.1 multiple errors collected", () => {
+    const wf = makeWorkflow();
+    // orphan role
+    wf.roles.orphan = {
+      description: "Orphan",
+      goal: "Nothing",
+      capabilities: [],
+      procedure: "None",
+      output: "None",
+      frontmatter: {
+        type: "object",
+        properties: { $status: { enum: ["_"] } },
+        required: ["$status"],
+      } as unknown as string,
+    };
+    // unknown graph reference
+    wf.graph.nonexistent = { _: { role: "$END", prompt: "done" } };
+    // bad mustache var
+    wf.graph.writer = { _: { role: "reviewer", prompt: "{{{badvar}}}" } };
+    const errors = validateWorkflow(wf);
+    expect(errors.length).toBeGreaterThanOrEqual(3);
+  });
+});
@@ -0,0 +1,385 @@
+import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { createFsStore } from "@uncaged/json-cas-fs";
+import type { CasRef, WorkflowPayload } from "@uncaged/workflow-protocol";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { stringify } from "yaml";
+import { cmdThreadStart } from "../commands/thread.js";
+import { registerUwfSchemas } from "../schemas.js";
+import type { UwfStore } from "../store.js";
+import { loadWorkflowRegistry, saveWorkflowRegistry } from "../store.js";
+
+// ── helpers ───────────────────────────────────────────────────────────────────
+
+async function makeUwfStore(storageRoot: string): Promise<UwfStore> {
+  const casDir = join(storageRoot, "cas");
+  await mkdir(casDir, { recursive: true });
+  const store = createFsStore(casDir);
+  const schemas = await registerUwfSchemas(store);
+  return { storageRoot, store, schemas };
+}
+
+function makeMinimalPayload(name: string, description: string): WorkflowPayload {
+  return {
+    name,
+    description,
+    roles: {
+      worker: {
+        description: "worker role",
+        goal: "do work",
+        capabilities: [],
+        procedure: "",
+        output: "",
+        frontmatter: {
+          type: "object",
+          properties: {
+            $status: { type: "string" },
+          },
+          required: ["$status"],
+        } as unknown as CasRef,
+      },
+    },
+    graph: {
+      $START: { _: { role: "worker", prompt: "start working" } },
+      worker: { _: { role: "$END", prompt: "done" } },
+    },
+  };
+}
+
+async function storeWorkflow(uwf: UwfStore, name: string): Promise<CasRef> {
+  const payload = makeMinimalPayload(name, "Test workflow");
+  return await uwf.store.put(uwf.schemas.workflow, payload);
+}
+
+async function createWorkflowYaml(name: string, version: string | null = null): Promise<string> {
+  const payload = makeMinimalPayload(
+    name,
+    version !== null ? `Test workflow (${version})` : "Test workflow",
+  );
+  const yaml = stringify(payload);
+  return yaml;
+}
+
+// ── fixture ───────────────────────────────────────────────────────────────────
+
+let tmpDir: string;
+let storageRoot: string;
+let projectRoot: string;
+
+beforeEach(async () => {
+  tmpDir = await mkdtemp(join(tmpdir(), "cli-uwf-wf-resolve-test-"));
+  storageRoot = join(tmpDir, "storage");
+  projectRoot = join(tmpDir, "project");
+  await mkdir(storageRoot, { recursive: true });
+  await mkdir(projectRoot, { recursive: true });
+});
+
+afterEach(async () => {
+  await rm(tmpDir, { recursive: true, force: true });
+});
+
+// ── Strategy 1: CAS Hash Resolution ───────────────────────────────────────────
+
+describe("Strategy 1: CAS Hash Resolution", () => {
+  test("should resolve valid 13-char Crockford Base32 hash", async () => {
+    const uwf = await makeUwfStore(storageRoot);
+    const hash = await storeWorkflow(uwf, "test-workflow");
+
+    const result = await cmdThreadStart(storageRoot, hash, "test prompt", projectRoot);
+
+    expect(result.workflow).toBe(hash);
+    expect(result.thread).toMatch(/^[0-9A-HJKMNP-TV-Z]{26}$/);
+  });
+
+  test("should fail on invalid hash format (non-Crockford characters)", async () => {
+    await makeUwfStore(storageRoot);
+
+    await expect(
+      cmdThreadStart(storageRoot, "123456789ABCD", "prompt", projectRoot),
+    ).rejects.toThrow();
+  });
+
+  test("should fail on valid-format hash not present in CAS", async () => {
+    await makeUwfStore(storageRoot);
+    const fakeHash = "0000000000000"; // valid format, doesn't exist
+
+    await expect(cmdThreadStart(storageRoot, fakeHash, "prompt", projectRoot)).rejects.toThrow();
+  });
+
+  test("should reject 40-char hex hash (legacy format not supported)", async () => {
+    await makeUwfStore(storageRoot);
+    const hexHash = "a".repeat(40);
+
+    await expect(cmdThreadStart(storageRoot, hexHash, "prompt", projectRoot)).rejects.toThrow();
+  });
+});
+
+// ── Strategy 2: File Path Resolution ──────────────────────────────────────────
+
+describe("Strategy 2: File Path Resolution", () => {
+  test("should load workflow from absolute file path", async () => {
+    await makeUwfStore(storageRoot);
+    const yamlPath = join(tmpDir, "test-workflow.yaml");
+    await writeFile(yamlPath, await createWorkflowYaml("test-workflow"));
+
+    const result = await cmdThreadStart(storageRoot, yamlPath, "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+    const uwf = await makeUwfStore(storageRoot);
+    const node = uwf.store.get(result.workflow);
+    expect(node).not.toBeNull();
+    if (node !== null) {
+      expect((node.payload as WorkflowPayload).name).toBe("test-workflow");
+    }
+  });
+
+  test("should load workflow from relative file path", async () => {
+    await makeUwfStore(storageRoot);
+    const yamlPath = "test-workflow.yaml";
+    await writeFile(join(projectRoot, yamlPath), await createWorkflowYaml("test-workflow"));
+
+    const result = await cmdThreadStart(storageRoot, yamlPath, "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+  });
+
+  test("should fail when file path does not exist", async () => {
+    await makeUwfStore(storageRoot);
+
+    await expect(
+      cmdThreadStart(storageRoot, "./nonexistent.yaml", "prompt", projectRoot),
+    ).rejects.toThrow();
+  });
+
+  test("should fail on invalid YAML syntax in file", async () => {
+    await makeUwfStore(storageRoot);
+    const yamlPath = join(tmpDir, "bad-syntax.yaml");
+    await writeFile(yamlPath, "invalid: yaml: : :");
+
+    await expect(cmdThreadStart(storageRoot, yamlPath, "prompt", projectRoot)).rejects.toThrow();
+  });
+
+  test("should fail on valid YAML with invalid WorkflowPayload shape", async () => {
+    await makeUwfStore(storageRoot);
+    const yamlPath = join(tmpDir, "invalid-workflow.yaml");
+    await writeFile(yamlPath, "name: test\n# missing roles and graph");
+
+    await expect(cmdThreadStart(storageRoot, yamlPath, "prompt", projectRoot)).rejects.toThrow();
+  });
+
+  test("should enforce filename matches workflow name", async () => {
+    await makeUwfStore(storageRoot);
+    const yamlPath = join(tmpDir, "solve-issue.yaml");
+    await writeFile(yamlPath, await createWorkflowYaml("wrong-name"));
+
+    await expect(cmdThreadStart(storageRoot, yamlPath, "prompt", projectRoot)).rejects.toThrow();
+  });
+});
+
+// ── Strategy 3: Local Discovery (Parent Traversal) ────────────────────────────
+
+describe("Strategy 3: Local Discovery", () => {
+  test("should find workflow in current directory .workflow/", async () => {
+    await makeUwfStore(storageRoot);
+    const workflowDir = join(projectRoot, ".workflow");
+    await mkdir(workflowDir, { recursive: true });
+    await writeFile(join(workflowDir, "solve-issue.yaml"), await createWorkflowYaml("solve-issue"));
+
+    const result = await cmdThreadStart(storageRoot, "solve-issue", "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+    const uwf = await makeUwfStore(storageRoot);
+    const node = uwf.store.get(result.workflow);
+    expect(node).not.toBeNull();
+    if (node !== null) {
+      expect((node.payload as WorkflowPayload).name).toBe("solve-issue");
+    }
+  });
+
+  test("should find workflow in parent directory .workflow/", async () => {
+    await makeUwfStore(storageRoot);
+    const workflowDir = join(projectRoot, ".workflow");
+    await mkdir(workflowDir, { recursive: true });
+    await writeFile(join(workflowDir, "solve-issue.yaml"), await createWorkflowYaml("solve-issue"));
+
+    const subdir = join(projectRoot, "packages", "cli-workflow", "src");
+    await mkdir(subdir, { recursive: true });
+
+    const result = await cmdThreadStart(storageRoot, "solve-issue", "prompt", subdir);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+  });
+
+  test("should stop at filesystem root when traversing", async () => {
+    await makeUwfStore(storageRoot);
+    const deepPath = join(tmpDir, "deep", "path", "that", "does", "not", "have", "workflow");
+    await mkdir(deepPath, { recursive: true });
+
+    await expect(cmdThreadStart(storageRoot, "nonexistent", "prompt", deepPath)).rejects.toThrow();
+  });
+
+  test("should prefer .workflow/ over .workflows/ directory", async () => {
+    await makeUwfStore(storageRoot);
+    const workflowDir = join(projectRoot, ".workflow");
+    const workflowsDir = join(projectRoot, ".workflows");
+    await mkdir(workflowDir, { recursive: true });
+    await mkdir(workflowsDir, { recursive: true });
+
+    await writeFile(
+      join(workflowDir, "solve-issue.yaml"),
+      await createWorkflowYaml("solve-issue", "1"),
+    );
+    await writeFile(
+      join(workflowsDir, "solve-issue.yaml"),
+      await createWorkflowYaml("solve-issue", "2"),
+    );
+
+    const result = await cmdThreadStart(storageRoot, "solve-issue", "prompt", projectRoot);
+
+    const uwf = await makeUwfStore(storageRoot);
+    const node = uwf.store.get(result.workflow);
+    expect(node).not.toBeNull();
+    if (node !== null) {
+      expect((node.payload as WorkflowPayload).description).toBe("Test workflow (1)");
+    }
+  });
+
+  test("should support .yml extension in local discovery", async () => {
+    await makeUwfStore(storageRoot);
+    const workflowDir = join(projectRoot, ".workflow");
+    await mkdir(workflowDir, { recursive: true });
+    await writeFile(join(workflowDir, "solve-issue.yml"), await createWorkflowYaml("solve-issue"));
+
+    const result = await cmdThreadStart(storageRoot, "solve-issue", "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+  });
+});
+
+// ── Strategy 4: Global Registry Fallback ──────────────────────────────────────
+
+describe("Strategy 4: Global Registry Resolution", () => {
+  test("should resolve workflow from global registry when not found locally", async () => {
+    const uwf = await makeUwfStore(storageRoot);
+    const hash = await storeWorkflow(uwf, "deploy-pipeline");
+    const registry = await loadWorkflowRegistry(storageRoot);
+    registry["deploy-pipeline"] = hash;
+    await saveWorkflowRegistry(storageRoot, registry);
+
+    const isolatedRoot = join(tmpDir, "isolated");
+    await mkdir(isolatedRoot, { recursive: true });
+
+    const result = await cmdThreadStart(storageRoot, "deploy-pipeline", "prompt", isolatedRoot);
+
+    expect(result.workflow).toBe(hash);
+  });
+
+  test("should fail when workflow not found in any strategy", async () => {
+    await makeUwfStore(storageRoot);
+
+    await expect(cmdThreadStart(storageRoot, "nonexistent", "prompt", tmpDir)).rejects.toThrow();
+  });
+});
+
+// ── Strategy Priority Order ───────────────────────────────────────────────────
+
+describe("Resolution Priority", () => {
+  test("should use explicit file path over local discovery", async () => {
+    await makeUwfStore(storageRoot);
+
+    // Setup: Create workflow in .workflow/ AND as explicit file
+    const workflowDir = join(projectRoot, ".workflow");
+    await mkdir(workflowDir, { recursive: true });
+    await writeFile(
+      join(workflowDir, "solve-issue.yaml"),
+      await createWorkflowYaml("solve-issue", "discovery"),
+    );
+
+    const explicitPath = join(projectRoot, "custom-solve-issue.yaml");
+    await writeFile(explicitPath, await createWorkflowYaml("custom-solve-issue", "explicit"));
+
+    // Execute with explicit path
+    const result = await cmdThreadStart(storageRoot, explicitPath, "prompt", projectRoot);
+
+    const uwf = await makeUwfStore(storageRoot);
+    const node = uwf.store.get(result.workflow);
+    expect(node).not.toBeNull();
+    if (node !== null) {
+      expect((node.payload as WorkflowPayload).description).toBe("Test workflow (explicit)");
+    }
+  });
+
+  test("should use local discovery over global registry", async () => {
+    const uwf = await makeUwfStore(storageRoot);
+
+    // Setup: Register globally
+    const globalHash = await storeWorkflow(uwf, "solve-issue");
+    const registry = await loadWorkflowRegistry(storageRoot);
+    registry["solve-issue"] = globalHash;
+    await saveWorkflowRegistry(storageRoot, registry);
+
+    // Setup: Create local .workflow/
+    const workflowDir = join(projectRoot, ".workflow");
+    await mkdir(workflowDir, { recursive: true });
+    const localYaml = await createWorkflowYaml("solve-issue", "local");
+    await writeFile(join(workflowDir, "solve-issue.yaml"), localYaml);
+
+    const result = await cmdThreadStart(storageRoot, "solve-issue", "prompt", projectRoot);
+
+    const uwf2 = await makeUwfStore(storageRoot);
+    const node = uwf2.store.get(result.workflow);
+    expect(node).not.toBeNull();
+    if (node !== null) {
+      expect((node.payload as WorkflowPayload).description).toBe("Test workflow (local)");
+    }
+  });
+});
+
+// ── Edge Cases ────────────────────────────────────────────────────────────────
+
+describe("Edge Cases", () => {
+  test("should treat '13-char-string.yaml' as file path, not CAS hash", async () => {
+    await makeUwfStore(storageRoot);
+    const fileName = "0123456789ABC.yaml"; // 13 chars + .yaml
+    await writeFile(join(projectRoot, fileName), await createWorkflowYaml("0123456789ABC"));
+
+    const result = await cmdThreadStart(storageRoot, fileName, "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+  });
+
+  test("should handle workflow names containing slashes as file paths", async () => {
+    await makeUwfStore(storageRoot);
+    const filePath = "subdir/solve-issue.yaml";
+    const fullPath = join(projectRoot, filePath);
+    await mkdir(join(projectRoot, "subdir"), { recursive: true });
+    await writeFile(fullPath, await createWorkflowYaml("solve-issue"));
+
+    const result = await cmdThreadStart(storageRoot, filePath, "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+  });
+
+  test("should handle absolute paths correctly", async () => {
+    await makeUwfStore(storageRoot);
+    const absPath = join(tmpDir, "abs-workflow.yaml");
+    await writeFile(absPath, await createWorkflowYaml("abs-workflow"));
+
+    const result = await cmdThreadStart(storageRoot, absPath, "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+  });
+
+  test("should fail on empty workflow ID", async () => {
+    await makeUwfStore(storageRoot);
+
+    await expect(cmdThreadStart(storageRoot, "", "prompt", projectRoot)).rejects.toThrow();
+  });
+
+  test("should fail on whitespace-only workflow ID", async () => {
+    await makeUwfStore(storageRoot);
+
+    await expect(cmdThreadStart(storageRoot, "   ", "prompt", projectRoot)).rejects.toThrow();
+  });
+});
@@ -0,0 +1,147 @@
+import { mkdir, readdir, readFile, rename, rm, writeFile } from "node:fs/promises";
+import { join } from "node:path";
+import type { RunningThreadItem, ThreadId } from "@uncaged/workflow-protocol";
+
+import type { RunningMarker } from "./types.js";
+
+/**
+ * Get the path to the running markers directory.
+ */
+export function getRunningDir(storageRoot: string): string {
+  return join(storageRoot, "running");
+}
+
+/**
+ * Get the path to a specific thread's marker file.
+ */
+export function getMarkerPath(storageRoot: string, threadId: ThreadId): string {
+  return join(getRunningDir(storageRoot), `${threadId}.json`);
+}
+
+/**
+ * Check if a PID is still running.
+ * Returns true if the process exists, false otherwise.
+ */
+export function isPidAlive(pid: number): boolean {
+  try {
+    // process.kill with signal 0 checks existence without killing
+    process.kill(pid, 0);
+    return true;
+  } catch {
+    // ESRCH means process doesn't exist
+    return false;
+  }
+}
+
+/**
+ * Create a marker file for a running thread.
+ * Writes to a temp file in the same directory, then atomically renames.
+ */
+export async function createMarker(storageRoot: string, marker: RunningMarker): Promise<void> {
+  const runningDir = getRunningDir(storageRoot);
+  await mkdir(runningDir, { recursive: true });
+
+  const markerPath = getMarkerPath(storageRoot, marker.thread);
+  const tempPath = join(runningDir, `.${marker.thread}-${process.pid}.tmp`);
+
+  const content = JSON.stringify(marker, null, 2);
+  await writeFile(tempPath, content, "utf8");
+  await rename(tempPath, markerPath);
+}
+
+/**
+ * Delete a marker file for a thread.
+ */
+export async function deleteMarker(storageRoot: string, threadId: ThreadId): Promise<void> {
+  const markerPath = getMarkerPath(storageRoot, threadId);
+  try {
+    await rm(markerPath);
+  } catch {
+    // Ignore errors if file doesn't exist
+  }
+}
+
+/**
+ * Read a marker file. Returns null if file doesn't exist or is invalid.
+ */
+export async function readMarker(
+  storageRoot: string,
+  threadId: ThreadId,
+): Promise<RunningMarker | null> {
+  const markerPath = getMarkerPath(storageRoot, threadId);
+  try {
+    const content = await readFile(markerPath, "utf8");
+    const marker = JSON.parse(content) as RunningMarker;
+    return marker;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * List all running threads, filtering out stale markers.
+ */
+export async function listRunningThreads(storageRoot: string): Promise<RunningThreadItem[]> {
+  const runningDir = getRunningDir(storageRoot);
+
+  let files: string[];
+  try {
+    files = await readdir(runningDir);
+  } catch {
+    // Directory doesn't exist or can't be read
+    return [];
+  }
+
+  const results: RunningThreadItem[] = [];
+
+  for (const filename of files) {
+    if (!filename.endsWith(".json")) {
+      continue;
+    }
+
+    const threadId = filename.slice(0, -5) as ThreadId;
+    const marker = await readMarker(storageRoot, threadId);
+
+    if (marker === null) {
+      // Invalid marker file
+      continue;
+    }
+
+    if (!isPidAlive(marker.pid)) {
+      // Stale marker - process no longer exists
+      await deleteMarker(storageRoot, threadId);
+      continue;
+    }
+
+    results.push({
+      thread: marker.thread,
+      workflow: marker.workflow,
+      pid: marker.pid,
+      startedAt: marker.startedAt,
+    });
+  }
+
+  return results;
+}
+
+/**
+ * Check if a thread is currently executing in the background.
+ * Returns the marker if running, null otherwise.
+ */
+export async function isThreadRunning(
+  storageRoot: string,
+  threadId: ThreadId,
+): Promise<RunningMarker | null> {
+  const marker = await readMarker(storageRoot, threadId);
+  if (marker === null) {
+    return null;
+  }
+
+  if (!isPidAlive(marker.pid)) {
+    // Stale marker
+    await deleteMarker(storageRoot, threadId);
+    return null;
+  }
+
+  return marker;
+}
@@ -0,0 +1,11 @@
+export {
+  createMarker,
+  deleteMarker,
+  getMarkerPath,
+  getRunningDir,
+  isPidAlive,
+  isThreadRunning,
+  listRunningThreads,
+  readMarker,
+} from "./background.js";
+export type { RunningMarker } from "./types.js";
@@ -0,0 +1,9 @@
+import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
+
+/** Marker file stored at ~/.uncaged/workflow/running/<thread-id>.json */
+export type RunningMarker = {
+  thread: ThreadId;
+  workflow: CasRef;
+  pid: number;
+  startedAt: number;
+};
@@ -1,32 +1,41 @@
 #!/usr/bin/env bun

-import type { ThreadId } from "@uncaged/workflow-protocol";
+import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
 import { Command } from "commander";
-import { stringify as yamlStringify } from "yaml";
 import {
  cmdCasGet,
  cmdCasHas,
  cmdCasPut,
+  cmdCasPutText,
  cmdCasRefs,
  cmdCasReindex,
  cmdCasSchemaGet,
  cmdCasSchemaList,
  cmdCasWalk,
 } from "./commands/cas.js";
+import { cmdLogClean, cmdLogList, cmdLogShow } from "./commands/log.js";
 import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js";
 import {
-  cmdThreadFork,
-  cmdThreadKill,
+  cmdSkillArchitecture,
+  cmdSkillCli,
+  cmdSkillList,
+  cmdSkillModerator,
+  cmdSkillYaml,
+} from "./commands/skill.js";
+import { cmdStepFork, cmdStepList, cmdStepRead, cmdStepShow } from "./commands/step.js";
+import {
+  cmdThreadCancel,
+  cmdThreadExec,
  cmdThreadList,
  cmdThreadRead,
  cmdThreadShow,
  cmdThreadStart,
-  cmdThreadStep,
-  cmdThreadStepDetails,
-  cmdThreadSteps,
+  cmdThreadStop,
  THREAD_READ_DEFAULT_QUOTA,
+  type ThreadStatus,
 } from "./commands/thread.js";
-import { cmdWorkflowList, cmdWorkflowPut, cmdWorkflowShow } from "./commands/workflow.js";
+import { parseTimeInput } from "./commands/thread-time-parser.js";
+import { cmdWorkflowAdd, cmdWorkflowList, cmdWorkflowShow } from "./commands/workflow.js";
 import { formatOutput, type OutputFormat } from "./format.js";
 import { resolveStorageRoot } from "./store.js";

@@ -47,19 +56,28 @@ const program = new Command();

 // eslint-disable-next-line -- dynamic import for version
 const pkg = await import("../package.json", { with: { type: "json" } });
-program.name("uwf").description("Stateless workflow CLI").version(pkg.default.version, "-V, --version");
+program
+  .name("uwf")
+  .description(
+    "Stateless workflow CLI\n\n" +
+      "Four-layer architecture:\n" +
+      "  workflow → thread → step → turn",
+  )
+  .version(pkg.default.version, "-V, --version");
 program.option("--format <fmt>", "Output format: json or yaml", "json");

-const workflow = program.command("workflow").description("Workflow registry and CAS");
+const workflow = program
+  .command("workflow")
+  .description("Workflow definitions (layer 1: templates)");

 workflow
-  .command("put")
+  .command("add")
  .description("Register a workflow from YAML")
  .argument("<file>", "Workflow YAML file")
  .action((file: string) => {
    const storageRoot = resolveStorageRoot();
    runAction(async () => {
-      const result = await cmdWorkflowPut(storageRoot, file);
+      const result = await cmdWorkflowAdd(storageRoot, file);
      writeOutput(result);
    });
  });
@@ -82,12 +100,12 @@ workflow
  .action(() => {
    const storageRoot = resolveStorageRoot();
    runAction(async () => {
-      const result = await cmdWorkflowList(storageRoot);
+      const result = await cmdWorkflowList(storageRoot, process.cwd());
      writeOutput(result);
    });
  });

-const thread = program.command("thread").description("Thread lifecycle and execution");
+const thread = program.command("thread").description("Thread execution (layer 2: instances)");

 thread
  .command("start")
@@ -97,24 +115,52 @@ thread
  .action((workflow: string, opts: { prompt: string }) => {
    const storageRoot = resolveStorageRoot();
    runAction(async () => {
-      const result = await cmdThreadStart(storageRoot, workflow, opts.prompt);
+      const result = await cmdThreadStart(storageRoot, workflow, opts.prompt, process.cwd());
      writeOutput(result);
    });
  });

 thread
-  .command("step")
-  .description("Execute one step")
+  .command("exec")
+  .description("Execute one or more steps")
  .argument("<thread-id>", "Thread ULID")
  .option("--agent <cmd>", "Override agent command")
-  .action((threadId: string, opts: { agent: string | undefined }) => {
-    const storageRoot = resolveStorageRoot();
-    runAction(async () => {
-      const agentOverride = opts.agent ?? null;
-      const result = await cmdThreadStep(storageRoot, threadId, agentOverride);
-      writeOutput(result);
-    });
-  });
+  .option("-c, --count <number>", "Number of steps to run (default: 1)")
+  .option("--background", "Run in background and return immediately")
+  .option("--_background-worker", "Internal flag for background worker process", false)
+  .action(
+    (
+      threadId: string,
+      opts: {
+        agent: string | undefined;
+        count: string | undefined;
+        background: boolean;
+        _backgroundWorker: boolean;
+      },
+    ) => {
+      const storageRoot = resolveStorageRoot();
+      runAction(async () => {
+        const agentOverride = opts.agent ?? null;
+        const count = opts.count !== undefined ? Number(opts.count) : 1;
+        const background = opts.background ?? false;
+        const backgroundWorker = opts._backgroundWorker ?? false;
+
+        const results = await cmdThreadExec(
+          storageRoot,
+          threadId,
+          agentOverride,
+          count,
+          background,
+          backgroundWorker,
+        );
+        if (results.length === 1) {
+          writeOutput(results[0]);
+        } else {
+          writeOutput(results);
+        }
+      });
+    },
+  );

 thread
  .command("show")
@@ -128,38 +174,124 @@ thread
    });
  });

+// Helper functions for thread list command parsing
+function parseStatusFilter(status: string | undefined): ThreadStatus[] | null {
+  if (status === undefined) return null;
+  const raw = status.trim();
+  if (raw === "active") return ["idle", "running"];
+
+  const parts = raw.split(",").map((s) => s.trim());
+  const validStatuses: ThreadStatus[] = ["idle", "running", "completed"];
+  for (const part of parts) {
+    if (!validStatuses.includes(part as ThreadStatus)) {
+      process.stderr.write(
+        `Invalid status: ${part}. Must be one of: idle, running, completed, active\n`,
+      );
+      process.exit(1);
+    }
+  }
+  return parts as ThreadStatus[];
+}
+
+function parseTimeFilters(
+  after: string | undefined,
+  before: string | undefined,
+  nowMs: number,
+): { afterMs: number | null; beforeMs: number | null } {
+  try {
+    const afterMs = after !== undefined ? parseTimeInput(after, nowMs) : null;
+    const beforeMs = before !== undefined ? parseTimeInput(before, nowMs) : null;
+    return { afterMs, beforeMs };
+  } catch (e) {
+    const message = e instanceof Error ? e.message : String(e);
+    process.stderr.write(`${message}\n`);
+    process.exit(1);
+  }
+}
+
+function parsePaginationOptions(
+  skip: string | undefined,
+  take: string | undefined,
+): { skip: number | null; take: number | null } {
+  let skipVal: number | null = null;
+  let takeVal: number | null = null;
+
+  if (skip !== undefined) {
+    skipVal = Number.parseInt(skip, 10);
+    if (!Number.isInteger(skipVal) || skipVal < 0) {
+      process.stderr.write("--skip must be a non-negative integer\n");
+      process.exit(1);
+    }
+  }
+  if (take !== undefined) {
+    takeVal = Number.parseInt(take, 10);
+    if (!Number.isInteger(takeVal) || takeVal < 1) {
+      process.stderr.write("--take must be a positive integer\n");
+      process.exit(1);
+    }
+  }
+  return { skip: skipVal, take: takeVal };
+}
+
 thread
  .command("list")
-  .description("List active threads")
-  .option("--all", "Include archived threads")
-  .action((opts: { all: boolean }) => {
+  .description("List threads")
+  .option(
+    "--status <status>",
+    "Filter by status: idle, running, completed, active (idle+running), or comma-separated values",
+  )
+  .option("--after <date>", "Filter threads created after this date (ISO or relative like '7d')")
+  .option("--before <date>", "Filter threads created before this date (ISO or relative like '7d')")
+  .option("--skip <n>", "Skip first n threads")
+  .option("--take <n>", "Return at most n threads")
+  .action(
+    (opts: {
+      status: string | undefined;
+      after: string | undefined;
+      before: string | undefined;
+      skip: string | undefined;
+      take: string | undefined;
+    }) => {
+      const storageRoot = resolveStorageRoot();
+      runAction(async () => {
+        const statusFilter = parseStatusFilter(opts.status);
+        const nowMs = Date.now();
+        const { afterMs, beforeMs } = parseTimeFilters(opts.after, opts.before, nowMs);
+        const { skip, take } = parsePaginationOptions(opts.skip, opts.take);
+
+        const result = await cmdThreadList(
+          storageRoot,
+          statusFilter,
+          afterMs,
+          beforeMs,
+          skip,
+          take,
+        );
+        writeOutput(result);
+      });
+    },
+  );
+
+thread
+  .command("stop")
+  .description("Stop background execution of a thread (keep thread active)")
+  .argument("<thread-id>", "Thread ULID")
+  .action((threadId: string) => {
    const storageRoot = resolveStorageRoot();
    runAction(async () => {
-      const result = await cmdThreadList(storageRoot, opts.all);
+      const result = await cmdThreadStop(storageRoot, threadId);
      writeOutput(result);
    });
  });

 thread
-  .command("kill")
-  .description("Terminate and archive a thread")
+  .command("cancel")
+  .description("Cancel a thread (stop execution and move to history)")
  .argument("<thread-id>", "Thread ULID")
  .action((threadId: string) => {
    const storageRoot = resolveStorageRoot();
    runAction(async () => {
-      const result = await cmdThreadKill(storageRoot, threadId);
-      writeOutput(result);
-    });
-  });
-
-thread
-  .command("steps")
-  .description("List all steps in a thread")
-  .argument("<thread-id>", "Thread ULID")
-  .action((threadId: string) => {
-    const storageRoot = resolveStorageRoot();
-    runAction(async () => {
-      const result = await cmdThreadSteps(storageRoot, threadId);
+      const result = await cmdThreadCancel(storageRoot, threadId);
      writeOutput(result);
    });
  });
@@ -193,28 +325,195 @@ thread
    },
  );

-thread
+const step = program.command("step").description("Step results (layer 3: single cycle)");
+
+step
+  .command("list")
+  .description("List all steps in a thread")
+  .argument("<thread-id>", "Thread ULID")
+  .action((threadId: string) => {
+    const storageRoot = resolveStorageRoot();
+    runAction(async () => {
+      const result = await cmdStepList(storageRoot, threadId);
+      writeOutput(result);
+    });
+  });
+
+step
+  .command("show")
+  .description("Show details of a specific step")
+  .argument("<step-hash>", "CAS hash of the StepNode")
+  .action((stepHash: string) => {
+    const storageRoot = resolveStorageRoot();
+    runAction(async () => {
+      const detail = await cmdStepShow(storageRoot, stepHash as CasRef);
+      writeOutput(detail);
+    });
+  });
+
+step
+  .command("read")
+  .description("Read a step's turns as human-readable markdown")
+  .argument("<step-hash>", "CAS hash of the StepNode")
+  .option("--quota <chars>", "Max output characters", "4000")
+  .action((stepHash: string, opts: { quota: string }) => {
+    const storageRoot = resolveStorageRoot();
+    runAction(async () => {
+      const quota = Number.parseInt(opts.quota, 10);
+      if (!Number.isFinite(quota) || quota < 1) {
+        process.stderr.write("invalid --quota: must be a positive integer\n");
+        process.exit(1);
+      }
+      const markdown = await cmdStepRead(storageRoot, stepHash as CasRef, quota);
+      process.stdout.write(markdown.endsWith("\n") ? markdown : `${markdown}\n`);
+    });
+  });
+
+step
  .command("fork")
  .description("Fork a thread from a specific step")
  .argument("<step-hash>", "CAS hash of the StartNode or StepNode to fork from")
  .action((stepHash: string) => {
    const storageRoot = resolveStorageRoot();
    runAction(async () => {
-      const result = await cmdThreadFork(storageRoot, stepHash);
+      const result = await cmdStepFork(storageRoot, stepHash as CasRef);
      writeOutput(result);
    });
  });

+// ── Deprecation Handlers ──────────────────────────────────────────────────────
+// These commands have been removed. Show helpful error messages.
+
+workflow
+  .command("put")
+  .description("[DEPRECATED] Use 'workflow add' instead")
+  .argument("<file>", "Workflow YAML file")
+  .action(() => {
+    process.stderr.write(`Error: Command 'workflow put' has been removed.
+Use 'workflow add' instead.
+
+For more information, see: uwf help workflow add
+`);
+    process.exit(1);
+  });
+
+thread
+  .command("step")
+  .description("[DEPRECATED] Use 'thread exec' instead")
+  .argument("<thread-id>", "Thread ULID")
+  .allowUnknownOption()
+  .action(() => {
+    process.stderr.write(`Error: Command 'thread step' has been removed.
+Use 'thread exec' instead.
+
+For more information, see: uwf help thread exec
+`);
+    process.exit(1);
+  });
+
+thread
+  .command("steps")
+  .description("[DEPRECATED] Use 'step list' instead")
+  .argument("<thread-id>", "Thread ULID")
+  .action(() => {
+    process.stderr.write(`Error: Command 'thread steps' has been removed.
+Use 'step list' instead.
+
+For more information, see: uwf help step list
+`);
+    process.exit(1);
+  });
+
 thread
  .command("step-details")
-  .description("Dump the full detail node of a step as YAML")
-  .argument("<step-hash>", "CAS hash of the StepNode")
-  .action((stepHash: string) => {
-    const storageRoot = resolveStorageRoot();
-    runAction(async () => {
-      const detail = await cmdThreadStepDetails(storageRoot, stepHash);
-      process.stdout.write(yamlStringify(detail));
-    });
+  .description("[DEPRECATED] Use 'step show' instead")
+  .argument("<step-hash>", "Step hash")
+  .action(() => {
+    process.stderr.write(`Error: Command 'thread step-details' has been removed.
+Use 'step show' instead.
+
+For more information, see: uwf help step show
+`);
+    process.exit(1);
+  });
+
+thread
+  .command("fork")
+  .description("[DEPRECATED] Use 'step fork' instead")
+  .argument("<step-hash>", "Step hash")
+  .action(() => {
+    process.stderr.write(`Error: Command 'thread fork' has been removed.
+Use 'step fork' instead.
+
+For more information, see: uwf help step fork
+`);
+    process.exit(1);
+  });
+
+thread
+  .command("kill")
+  .description("[DEPRECATED] Use 'thread stop' or 'thread cancel' instead")
+  .argument("<thread-id>", "Thread ULID")
+  .action(() => {
+    process.stderr.write(`Error: Command 'thread kill' has been removed.
+Use 'thread stop' to stop background execution (keep thread active),
+or 'thread cancel' to cancel and archive the thread.
+
+For more information, see:
+  uwf help thread stop
+  uwf help thread cancel
+`);
+    process.exit(1);
+  });
+
+thread
+  .command("running")
+  .description("[DEPRECATED] Use 'thread list --status running' instead")
+  .action(() => {
+    process.stderr.write(`Error: Command 'thread running' has been removed.
+Use 'thread list --status running' instead.
+
+For more information, see: uwf help thread list
+`);
+    process.exit(1);
+  });
+
+const skill = program.command("skill").description("Built-in skill references for agents");
+skill.addHelpCommand(false);
+
+skill
+  .command("cli")
+  .description("Print a markdown reference of all uwf commands")
+  .action(() => {
+    console.log(cmdSkillCli());
+  });
+
+skill
+  .command("architecture")
+  .description("Print the architecture reference")
+  .action(() => {
+    console.log(cmdSkillArchitecture());
+  });
+
+skill
+  .command("yaml")
+  .description("Print the workflow YAML schema reference")
+  .action(() => {
+    console.log(cmdSkillYaml());
+  });
+
+skill
+  .command("moderator")
+  .description("Print the moderator reference")
+  .action(() => {
+    console.log(cmdSkillModerator());
+  });
+
+skill
+  .command("list")
+  .description("List all available skill names")
+  .action(() => {
+    console.log(cmdSkillList().join("\n"));
  });

 program
@@ -282,6 +581,17 @@ cas
    });
  });

+cas
+  .command("put-text")
+  .description("Store a plain text string, print its hash")
+  .argument("<text>", "Text content to store")
+  .action((text: string) => {
+    const storageRoot = resolveStorageRoot();
+    runAction(async () => {
+      writeOutput(await cmdCasPutText(storageRoot, text));
+    });
+  });
+
 cas
  .command("has")
  .description("Check if a hash exists")
@@ -289,7 +599,11 @@ cas
  .action((hash: string) => {
    const storageRoot = resolveStorageRoot();
    runAction(async () => {
-      writeOutput(await cmdCasHas(storageRoot, hash));
+      const result = await cmdCasHas(storageRoot, hash);
+      writeOutput(result);
+      if (!result.exists) {
+        process.exit(1);
+      }
    });
  });

@@ -348,6 +662,55 @@ casSchema
    });
  });

+const log = program.command("log").description("Process-level debug logs");
+
+log
+  .command("list")
+  .description("List log files with sizes")
+  .action(() => {
+    const storageRoot = resolveStorageRoot();
+    runAction(async () => {
+      const result = await cmdLogList(storageRoot);
+      writeOutput(result);
+    });
+  });
+
+log
+  .command("show")
+  .description("Show and filter log entries")
+  .option("--thread <thread-id>", "Filter by thread ID")
+  .option("--process <pid>", "Filter by process ID")
+  .option("--date <date>", "Filter by date (YYYY-MM-DD)")
+  .action(
+    (opts: {
+      thread: string | undefined;
+      process: string | undefined;
+      date: string | undefined;
+    }) => {
+      const storageRoot = resolveStorageRoot();
+      runAction(async () => {
+        const result = await cmdLogShow(storageRoot, {
+          thread: opts.thread ?? null,
+          process: opts.process ?? null,
+          date: opts.date ?? null,
+        });
+        writeOutput(result);
+      });
+    },
+  );
+
+log
+  .command("clean")
+  .description("Delete log files older than given date")
+  .requiredOption("--before <date>", "Delete files before this date (YYYY-MM-DD)")
+  .action((opts: { before: string }) => {
+    const storageRoot = resolveStorageRoot();
+    runAction(async () => {
+      const result = await cmdLogClean(storageRoot, opts.before);
+      writeOutput(result);
+    });
+  });
+
 program.parseAsync(process.argv).catch((e: unknown) => {
  const message = e instanceof Error ? e.message : String(e);
  process.stderr.write(`${message}\n`);
@@ -1,10 +1,12 @@
 import { readFileSync } from "node:fs";
 import { join } from "node:path";

-import type { Hash, JSONSchema, Store } from "@uncaged/json-cas";
-import { bootstrap, getSchema, refs, walk } from "@uncaged/json-cas";
+import type { JSONSchema, Store } from "@uncaged/json-cas";
+import { bootstrap, getSchema, putSchema, refs, walk } from "@uncaged/json-cas";
 import { createFsStore } from "@uncaged/json-cas-fs";

+import { TEXT_SCHEMA } from "../schemas.js";
+
 // ---- Helpers ----

 function openStore(storageRoot: string): Store {
@@ -53,18 +55,12 @@ export async function cmdCasPut(
  return { hash };
 }

-export async function cmdCasHas(
-  storageRoot: string,
-  hash: string,
-): Promise<{ exists: boolean }> {
+export async function cmdCasHas(storageRoot: string, hash: string): Promise<{ exists: boolean }> {
  const store = openStore(storageRoot);
  return { exists: store.has(hash) };
 }

-export async function cmdCasRefs(
-  storageRoot: string,
-  hash: string,
-): Promise<{ refs: string[] }> {
+export async function cmdCasRefs(storageRoot: string, hash: string): Promise<{ refs: string[] }> {
  const store = openStore(storageRoot);
  const node = store.get(hash);
  if (node === null) {
@@ -73,10 +69,7 @@ export async function cmdCasRefs(
  return { refs: refs(store, node) };
 }

-export async function cmdCasWalk(
-  storageRoot: string,
-  hash: string,
-): Promise<{ hashes: string[] }> {
+export async function cmdCasWalk(storageRoot: string, hash: string): Promise<{ hashes: string[] }> {
  const store = openStore(storageRoot);
  const result: string[] = [];
  walk(store, hash, (h) => {
@@ -90,9 +83,7 @@ export type SchemaListEntry = {
  title: string;
 };

-export async function cmdCasSchemaList(
-  storageRoot: string,
-): Promise<SchemaListEntry[]> {
+export async function cmdCasSchemaList(storageRoot: string): Promise<SchemaListEntry[]> {
  const store = openStore(storageRoot);
  const metaHash = await bootstrap(store);
  const entries: SchemaListEntry[] = [];
@@ -115,9 +106,7 @@ export async function cmdCasSchemaList(
  return entries;
 }

-export async function cmdCasReindex(
-  storageRoot: string,
-): Promise<{ status: string }> {
+export async function cmdCasReindex(storageRoot: string): Promise<{ status: string }> {
  const indexDir = join(storageRoot, "cas", "_index");
  const { rmSync } = await import("node:fs");
  rmSync(indexDir, { recursive: true, force: true });
@@ -126,10 +115,7 @@ export async function cmdCasReindex(
  return { status: "reindexed" };
 }

-export async function cmdCasSchemaGet(
-  storageRoot: string,
-  hash: string,
-): Promise<unknown> {
+export async function cmdCasSchemaGet(storageRoot: string, hash: string): Promise<unknown> {
  const store = openStore(storageRoot);
  const schema = getSchema(store, hash);
  if (schema === null) {
@@ -137,3 +123,10 @@ export async function cmdCasSchemaGet(
  }
  return schema;
 }
+
+export async function cmdCasPutText(storageRoot: string, text: string): Promise<{ hash: string }> {
+  const store = openStore(storageRoot);
+  const typeHash = await putSchema(store, TEXT_SCHEMA);
+  const hash = await store.put(typeHash, text);
+  return { hash };
+}
@@ -0,0 +1,116 @@
+import { readdir, readFile, stat, unlink } from "node:fs/promises";
+import { join } from "node:path";
+
+type LogListItem = {
+  name: string;
+  size: number;
+  date: string;
+};
+
+type LogShowFilter = {
+  thread: string | null;
+  process: string | null;
+  date: string | null;
+};
+
+type LogEntry = {
+  ts: string;
+  pid: string;
+  tag: string;
+  msg: string;
+  thread: string | null;
+  workflow: string | null;
+};
+
+type LogCleanResult = {
+  deleted: number;
+};
+
+function logsDir(storageRoot: string): string {
+  return join(storageRoot, "logs");
+}
+
+async function listLogFiles(dir: string): Promise<Array<string>> {
+  try {
+    const files = await readdir(dir);
+    return files.filter((f) => f.endsWith(".jsonl")).sort();
+  } catch {
+    return [];
+  }
+}
+
+function dateFromFilename(name: string): string {
+  return name.replace(".jsonl", "");
+}
+
+async function parseJsonlFile(path: string): Promise<Array<LogEntry>> {
+  const content = await readFile(path, "utf-8");
+  const lines = content
+    .trim()
+    .split("\n")
+    .filter((l) => l.length > 0);
+  return lines.map((line) => JSON.parse(line) as LogEntry);
+}
+
+export async function cmdLogList(storageRoot: string): Promise<Array<LogListItem>> {
+  const dir = logsDir(storageRoot);
+  const files = await listLogFiles(dir);
+  const items: Array<LogListItem> = [];
+  for (const name of files) {
+    const s = await stat(join(dir, name));
+    items.push({ name, size: s.size, date: dateFromFilename(name) });
+  }
+  // sort by date descending
+  items.sort((a, b) => (a.date > b.date ? -1 : a.date < b.date ? 1 : 0));
+  return items;
+}
+
+export async function cmdLogShow(
+  storageRoot: string,
+  filter: LogShowFilter,
+): Promise<Array<LogEntry>> {
+  const dir = logsDir(storageRoot);
+  let files: Array<string>;
+
+  if (filter.date !== null) {
+    files = [`${filter.date}.jsonl`];
+  } else {
+    files = await listLogFiles(dir);
+  }
+
+  let entries: Array<LogEntry> = [];
+  for (const file of files) {
+    try {
+      const parsed = await parseJsonlFile(join(dir, file));
+      entries = entries.concat(parsed);
+    } catch {
+      // file doesn't exist or is unreadable, skip
+    }
+  }
+
+  if (filter.thread !== null) {
+    entries = entries.filter((e) => e.thread === filter.thread);
+  }
+  if (filter.process !== null) {
+    entries = entries.filter((e) => e.pid === filter.process);
+  }
+
+  entries.sort((a, b) => (a.ts < b.ts ? -1 : a.ts > b.ts ? 1 : 0));
+  return entries;
+}
+
+export async function cmdLogClean(storageRoot: string, before: string): Promise<LogCleanResult> {
+  const dir = logsDir(storageRoot);
+  const files = await listLogFiles(dir);
+  let deleted = 0;
+
+  for (const name of files) {
+    const date = dateFromFilename(name);
+    if (date < before) {
+      await unlink(join(dir, name));
+      deleted++;
+    }
+  }
+
+  return { deleted };
+}
@@ -1,10 +1,45 @@
-import { existsSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
-import { homedir } from "node:os";
-import { join, resolve } from "node:path";
-import { createInterface } from "node:readline/promises";
+import { existsSync, mkdirSync, readdirSync, readFileSync, statSync, writeFileSync } from "node:fs";
+import { join } from "node:path";
 import { stdin as input, stdout as output } from "node:process";
+import { createInterface } from "node:readline/promises";
+import type { Result } from "@uncaged/workflow-util";
+import { parse, stringify } from "yaml";

-import { stringify, parse } from "yaml";
+/**
+ * Send a minimal chat completion request to verify the model is reachable.
+ * Returns ok on 2xx, error with reason string otherwise.
+ */
+export async function validateModel(
+  baseUrl: string,
+  apiKey: string,
+  model: string,
+): Promise<Result<void, string>> {
+  try {
+    const url = `${baseUrl.replace(/\/+$/, "")}/chat/completions`;
+    const res = await fetch(url, {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        model,
+        messages: [{ role: "user", content: "hi" }],
+        max_tokens: 1,
+      }),
+      signal: AbortSignal.timeout(15_000),
+    });
+    if (!res.ok) {
+      return { ok: false, error: `HTTP ${res.status} ${res.statusText}` };
+    }
+    return { ok: true, value: undefined };
+  } catch (err: unknown) {
+    if (err instanceof DOMException && err.name === "AbortError") {
+      return { ok: false, error: "Request timed out — model endpoint unreachable" };
+    }
+    return { ok: false, error: `Network error — could not reach endpoint (${String(err)})` };
+  }
+}

 /**
 * Preset provider list — embedded to avoid runtime YAML loading dependency.
@@ -17,10 +52,18 @@ const PRESET_PROVIDERS = [
  { name: "openrouter", label: "OpenRouter", baseUrl: "https://openrouter.ai/api/v1" },
  { name: "venice", label: "Venice", baseUrl: "https://api.venice.ai/api/v1" },
  // China
-  { name: "dashscope", label: "DashScope (Alibaba)", baseUrl: "https://dashscope.aliyuncs.com/compatible-mode/v1" },
+  {
+    name: "dashscope",
+    label: "DashScope (Alibaba)",
+    baseUrl: "https://dashscope.aliyuncs.com/compatible-mode/v1",
+  },
  { name: "deepseek", label: "DeepSeek", baseUrl: "https://api.deepseek.com/v1" },
  { name: "siliconflow", label: "SiliconFlow", baseUrl: "https://api.siliconflow.cn/v1" },
-  { name: "volcengine", label: "Volcengine (ByteDance)", baseUrl: "https://ark.cn-beijing.volces.com/api/v3" },
+  {
+    name: "volcengine",
+    label: "Volcengine (ByteDance)",
+    baseUrl: "https://ark.cn-beijing.volces.com/api/v3",
+  },
  { name: "kimi", label: "Kimi (Moonshot)", baseUrl: "https://api.moonshot.cn/v1" },
  { name: "glm", label: "GLM (Zhipu AI)", baseUrl: "https://open.bigmodel.cn/api/paas/v4" },
  { name: "stepfun", label: "StepFun", baseUrl: "https://api.stepfun.com/v1" },
@@ -94,29 +137,286 @@ function apiKeyEnvName(providerName: string): string {
  return `${providerName.toUpperCase().replace(/[^A-Z0-9]/g, "_")}_API_KEY`;
 }

+// ──────────────────────────────────────────────────────────────────────────────
+// Extracted helpers — _discoverAgents
+// ──────────────────────────────────────────────────────────────────────────────
+
+/**
+ * Scans directories from a PATH string for uwf-* executables.
+ */
+export async function _searchPathDirs(pathEnv: string): Promise<string[]> {
+  if (!pathEnv) return [];
+  const dirs = pathEnv.split(":").filter((d) => d.length > 0);
+  const agents = new Set<string>();
+  for (const dir of dirs) {
+    _scanDirForAgents(dir, agents);
+  }
+  return Array.from(agents).sort();
+}
+
+function _scanDirForAgents(dir: string, agents: Set<string>): void {
+  try {
+    if (!existsSync(dir)) return;
+    const entries = readdirSync(dir);
+    for (const entry of entries) {
+      if (!entry.startsWith("uwf-") || entry === "uwf") continue;
+      if (_isExecutableFile(join(dir, entry))) {
+        agents.add(entry);
+      }
+    }
+  } catch {
+    // Skip inaccessible directories
+  }
+}
+
+function _isExecutableFile(fullPath: string): boolean {
+  try {
+    const s = statSync(fullPath);
+    return s.isFile() && (s.mode & 0o111) !== 0;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Parses the stdout of `which -a` into sorted unique basenames.
+ */
+export function _parseWhichOutput(text: string): string[] {
+  if (!text) return [];
+  const agents = new Set<string>();
+  for (const line of text.trim().split("\n")) {
+    if (!line) continue;
+    const basename = line.split("/").pop() ?? "";
+    if (basename.startsWith("uwf-") && basename !== "uwf") {
+      agents.add(basename);
+    }
+  }
+  return Array.from(agents).sort();
+}
+
+/**
+ * Discover uwf-* agent binaries in PATH.
+ * Returns sorted list of binary names (e.g., ["uwf-hermes", "uwf-claude-code"]).
+ */
+export async function _discoverAgents(): Promise<string[]> {
+  try {
+    const agents = await _tryWhichDiscovery();
+    if (agents !== null) return agents;
+    return await _searchPathDirs(process.env.PATH ?? "");
+  } catch {
+    return [];
+  }
+}
+
+async function _tryWhichDiscovery(): Promise<string[] | null> {
+  try {
+    const proc = Bun.spawn(["which", "-a", "uwf-hermes", "uwf-claude-code", "uwf-cursor"], {
+      stdout: "pipe",
+      stderr: "pipe",
+    });
+    const text = await new Response(proc.stdout).text();
+    await proc.exited;
+    if (proc.exitCode !== 0) return null;
+    return _parseWhichOutput(text);
+  } catch {
+    return null;
+  }
+}
+
+// ──────────────────────────────────────────────────────────────────────────────
+// Extracted helpers — onData closure (promptSecret)
+// ──────────────────────────────────────────────────────────────────────────────
+
+/** Returns true for newline, carriage return, or EOF (EOT). */
+export function _isTerminator(c: string): boolean {
+  return c === "\n" || c === "\r" || c === "";
+}
+
+/** Returns true for DEL or backspace. */
+export function _isBackspace(c: string): boolean {
+  return c === "" || c === "\b";
+}
+
+// ──────────────────────────────────────────────────────────────────────────────
+// Extracted helpers — cmdSetupInteractive
+// ──────────────────────────────────────────────────────────────────────────────
+
+type ProviderEntry = { name: string; label: string; baseUrl: string };
+
+/** Prints the numbered provider list and custom option to stdout. */
+export function _printProviderMenu(providers: readonly ProviderEntry[]): void {
+  const numWidth = String(providers.length + 1).length;
+  for (let i = 0; i < providers.length; i++) {
+    const p = providers[i];
+    if (!p) continue;
+    const num = String(i + 1).padStart(numWidth);
+    console.log(`  ${num}) ${p.label.padEnd(28)} ${p.baseUrl}`);
+  }
+  const customNum = String(providers.length + 1).padStart(numWidth);
+  console.log(`  ${customNum}) Custom (enter name and URL manually)\n`);
+}
+
+/** Resolves a numeric choice string to a preset provider, or null for custom/invalid. */
+export function _resolveProviderChoice(
+  choice: string,
+  providers: readonly ProviderEntry[],
+): { providerName: string; baseUrl: string } | null {
+  const n = Number.parseInt(choice, 10);
+  if (Number.isNaN(n) || n < 1 || n > providers.length) return null;
+  const p = providers[n - 1];
+  if (!p) return null;
+  return { providerName: p.name, baseUrl: p.baseUrl };
+}
+
+/** Resolves numeric index or literal model name to a model string. */
+export function _resolveModelChoice(input: string, models: string[]): string {
+  const n = Number.parseInt(input, 10);
+  if (!Number.isNaN(n) && n >= 1 && n <= models.length) {
+    return models[n - 1] ?? input;
+  }
+  return input;
+}
+
+/** Prints the multi-column model list to stdout. */
+export function _printModelMenu(models: string[], termCols: number): void {
+  const nw = String(models.length).length;
+  const maxLen = models.reduce((m, s) => Math.max(m, s.length), 0);
+  const colWidth = nw + 2 + maxLen + 4;
+  const cols = Math.max(1, Math.floor(termCols / colWidth));
+  const rows = Math.ceil(models.length / cols);
+  for (let r = 0; r < rows; r++) {
+    let line = "";
+    for (let c = 0; c < cols; c++) {
+      const idx = c * rows + r;
+      if (idx >= models.length) break;
+      const num = String(idx + 1).padStart(nw);
+      const name = (models[idx] ?? "").padEnd(maxLen);
+      line += `  ${num}) ${name}  `;
+    }
+    console.log(line.trimEnd());
+  }
+}
+
+// ──────────────────────────────────────────────────────────────────────────────
+// Agent selection prompt
+// ──────────────────────────────────────────────────────────────────────────────
+
+/** Known agent binary → display label mapping. */
+const KNOWN_AGENTS: Record<string, string> = {
+  "uwf-hermes": "Hermes (hermes-agent)",
+  "uwf-claude-code": "Claude Code",
+  "uwf-cursor": "Cursor",
+  "uwf-builtin": "Built-in (lightweight, no external agent)",
+};
+
+/** Extract short agent name from binary name: uwf-claude-code → claude-code */
+export function _agentNameFromBinary(binary: string): string {
+  return binary.replace(/^uwf-/, "");
+}
+
+/** Prints numbered agent list to stdout. */
+export function _printAgentMenu(agents: string[]): void {
+  const numWidth = String(agents.length).length;
+  for (let i = 0; i < agents.length; i++) {
+    const bin = agents[i] ?? "";
+    const label = KNOWN_AGENTS[bin] ?? bin;
+    const num = String(i + 1).padStart(numWidth);
+    console.log(`  ${num}) ${label}  (${bin})`);
+  }
+  console.log("");
+}
+
+/**
+ * Interactive agent selection. Discovers uwf-* binaries, lets user pick default.
+ * Returns short agent name (e.g. "hermes", "claude-code").
+ */
+export async function _promptAgentSelection(
+  rl: ReturnType<typeof createInterface>,
+): Promise<string> {
+  console.log("Discovering installed agents...\n");
+  const agents = await _discoverAgents();
+
+  if (agents.length === 0) {
+    console.log("  No uwf-* agent binaries found in PATH.\n");
+    console.log("  Install one first, for example:");
+    console.log("    npm i -g @uncaged/workflow-agent-hermes");
+    console.log("    npm i -g @uncaged/workflow-agent-claude-code\n");
+    const manual = (
+      await rl.question("Agent binary name (e.g. uwf-hermes), or press Enter to skip: ")
+    ).trim();
+    if (!manual) return "hermes";
+    return _agentNameFromBinary(manual.startsWith("uwf-") ? manual : `uwf-${manual}`);
+  }
+
+  if (agents.length === 1) {
+    const name = _agentNameFromBinary(agents[0] ?? "uwf-hermes");
+    const label = KNOWN_AGENTS[agents[0] ?? ""] ?? agents[0];
+    console.log(`  Found 1 agent: ${label} — auto-selected.\n`);
+    return name;
+  }
+
+  console.log(`  Found ${agents.length} agents:\n`);
+  _printAgentMenu(agents);
+  const choice = (await rl.question(`Choose default agent [1-${agents.length}]: `)).trim();
+  const n = Number.parseInt(choice, 10);
+  if (!Number.isNaN(n) && n >= 1 && n <= agents.length) {
+    const selected = agents[n - 1] ?? "uwf-hermes";
+    const name = _agentNameFromBinary(selected);
+    console.log(`  → ${name}\n`);
+    return name;
+  }
+  // Treat as literal name
+  const name = _agentNameFromBinary(choice.startsWith("uwf-") ? choice : `uwf-${choice}`);
+  console.log(`  → ${name}\n`);
+  return name;
+}
+
+type ValidationResult = { ok: boolean; error: string | null };
+
+/** Prints the model validation result to stdout. */
+export function _printValidationResult(validation: ValidationResult): void {
+  if (validation.ok) {
+    console.log("✓ Model verified — connection successful.\n");
+  } else {
+    console.log(`\n⚠ Warning: Could not reach model — ${validation.error}`);
+    console.log(
+      "  Config saved, but you may want to try a different model or check your API key.\n",
+    );
+  }
+}
+
+// ──────────────────────────────────────────────────────────────────────────────
+
 /**
 * Merge setup args into config.yaml structure. Non-destructive — preserves existing entries.
 */
 function mergeConfig(existing: Record<string, unknown>, args: SetupArgs): Record<string, unknown> {
-  const providers = (typeof existing.providers === "object" && existing.providers !== null
-    ? { ...(existing.providers as Record<string, unknown>) }
-    : {}) as Record<string, unknown>;
+  const providers = (
+    typeof existing.providers === "object" && existing.providers !== null
+      ? { ...(existing.providers as Record<string, unknown>) }
+      : {}
+  ) as Record<string, unknown>;

  const envName = apiKeyEnvName(args.provider);
  providers[args.provider] = { baseUrl: args.baseUrl, apiKeyEnv: envName };

-  const models = (typeof existing.models === "object" && existing.models !== null
-    ? { ...(existing.models as Record<string, unknown>) }
-    : {}) as Record<string, unknown>;
+  const models = (
+    typeof existing.models === "object" && existing.models !== null
+      ? { ...(existing.models as Record<string, unknown>) }
+      : {}
+  ) as Record<string, unknown>;
  models.default = { provider: args.provider, name: args.model };

-  const agents = (typeof existing.agents === "object" && existing.agents !== null
-    ? { ...(existing.agents as Record<string, unknown>) }
-    : {}) as Record<string, unknown>;
+  const agents = (
+    typeof existing.agents === "object" && existing.agents !== null
+      ? { ...(existing.agents as Record<string, unknown>) }
+      : {}
+  ) as Record<string, unknown>;

  const agentName = args.agent ?? "hermes";
-  if (Object.keys(agents).length === 0) {
-    agents.hermes = { command: "uwf-hermes", args: [] };
+  // Ensure the selected agent has an entry
+  if (!agents[agentName]) {
+    agents[agentName] = { command: `uwf-${agentName}`, args: [] };
  }

  return {
@@ -124,7 +424,7 @@ function mergeConfig(existing: Record<string, unknown>, args: SetupArgs): Record
    providers,
    models,
    agents,
-    defaultAgent: existing.defaultAgent ?? agentName,
+    defaultAgent: agentName,
    defaultModel: existing.defaultModel ?? "default",
  };
 }
@@ -150,15 +450,59 @@ export async function cmdSetup(args: SetupArgs): Promise<Record<string, unknown>
  envData[envName] = args.apiKey;
  saveEnvFile(envPath, envData);

+  // Validate model connectivity
+  const validation = await validateModel(args.baseUrl, args.apiKey, args.model);
+
  return {
    configPath,
    envPath,
    provider: args.provider,
    model: args.model,
    defaultAgent: merged.defaultAgent,
+    validation,
  };
 }

+type SecretState = {
+  buf: string;
+  rawWasSet: boolean;
+  resolve: (value: string) => void;
+  onData: (chunk: string) => void;
+};
+
+function _handleSecretTerminator(state: SecretState): void {
+  if (process.stdin.isTTY) process.stdin.setRawMode(state.rawWasSet);
+  process.stdin.pause();
+  process.stdin.removeListener("data", state.onData);
+  process.stdout.write("\n");
+  state.resolve(state.buf.trim());
+}
+
+function _handleSecretBackspace(state: SecretState): void {
+  if (state.buf.length > 0) {
+    state.buf = state.buf.slice(0, -1);
+    process.stdout.write("\b \b");
+  }
+}
+
+function _handleSecretChar(c: string, state: SecretState): boolean {
+  if (_isTerminator(c)) {
+    _handleSecretTerminator(state);
+    return true;
+  }
+  if (_isBackspace(c)) {
+    _handleSecretBackspace(state);
+    return false;
+  }
+  if (c === "") {
+    if (process.stdin.isTTY) process.stdin.setRawMode(state.rawWasSet);
+    process.exit(130);
+  }
+  state.buf += c;
+  process.stdout.write("*");
+  return false;
+}
+
 /** Read a line with terminal echo disabled (for secrets). */
 async function promptSecret(label: string): Promise<string> {
  process.stdout.write(label);
@@ -170,33 +514,13 @@ async function promptSecret(label: string): Promise<string> {
    process.stdin.resume();
    process.stdin.setEncoding("utf8");

-    let buf = "";
-    const onData = (chunk: string) => {
+    const state: SecretState = { buf: "", rawWasSet, resolve, onData: () => {} };
+    state.onData = (chunk: string) => {
      for (const c of chunk.toString()) {
-        if (c === "\n" || c === "\r" || c === "\u0004") {
-          if (process.stdin.isTTY) process.stdin.setRawMode(rawWasSet);
-          process.stdin.pause();
-          process.stdin.removeListener("data", onData);
-          process.stdout.write("\n");
-          resolve(buf.trim());
-          return;
-        }
-        if (c === "\u007F" || c === "\b") {
-          if (buf.length > 0) {
-            buf = buf.slice(0, -1);
-            process.stdout.write("\b \b");
-          }
-          continue;
-        }
-        if (c === "\u0003") {
-          if (process.stdin.isTTY) process.stdin.setRawMode(rawWasSet);
-          process.exit(130);
-        }
-        buf += c;
-        process.stdout.write("*");
+        if (_handleSecretChar(c, state)) return;
      }
    };
-    process.stdin.on("data", onData);
+    process.stdin.on("data", state.onData);
  });
 }

@@ -211,13 +535,67 @@ async function fetchModels(baseUrl: string, apiKey: string): Promise<string[]> {
    if (!res.ok) return [];
    const body = (await res.json()) as { data?: { id: string }[] };
    if (!Array.isArray(body.data)) return [];
-    const NON_CHAT = /speech|embed|image|video|audio|ocr|rerank|tts|asr|paraformer|sambert|cosyvoice|wordart|wanx|wan2|flux|stable-diffusion|gui-/i;
-    return body.data.map((m) => m.id).filter((id) => !NON_CHAT.test(id)).sort();
+    const NON_CHAT =
+      /speech|embed|image|video|audio|ocr|rerank|tts|asr|paraformer|sambert|cosyvoice|wordart|wanx|wan2|flux|stable-diffusion|gui-/i;
+    return body.data
+      .map((m) => m.id)
+      .filter((id) => !NON_CHAT.test(id))
+      .sort();
  } catch {
    return [];
  }
 }

+async function _promptProviderSelection(
+  rl: ReturnType<typeof createInterface>,
+): Promise<{ providerName: string; baseUrl: string }> {
+  console.log("Select a provider:\n");
+  _printProviderMenu(PRESET_PROVIDERS);
+
+  const choice = (await rl.question(`Choose [1-${PRESET_PROVIDERS.length + 1}]: `)).trim();
+  const choiceNum = Number.parseInt(choice, 10);
+  if (Number.isNaN(choiceNum) || choiceNum < 1 || choiceNum > PRESET_PROVIDERS.length + 1) {
+    throw new Error(`Invalid choice: ${choice}`);
+  }
+
+  const preset = _resolveProviderChoice(choice, PRESET_PROVIDERS);
+  if (preset) {
+    const selected = PRESET_PROVIDERS[choiceNum - 1];
+    if (selected) {
+      console.log(`\n  → ${selected.label} (${selected.baseUrl})\n`);
+    }
+    return preset;
+  }
+
+  const providerName = (await rl.question("Provider name (e.g. my-proxy): ")).trim();
+  if (!providerName) throw new Error("Provider name required");
+  const baseUrl = (await rl.question("OpenAI-compatible API base URL: ")).trim();
+  if (!baseUrl) throw new Error("Base URL required");
+  return { providerName, baseUrl };
+}
+
+async function _promptModelSelection(
+  rl: ReturnType<typeof createInterface>,
+  baseUrl: string,
+  apiKey: string,
+): Promise<string> {
+  console.log("\nFetching available models...");
+  const models = await fetchModels(baseUrl, apiKey);
+
+  if (models.length === 0) {
+    console.log("Could not fetch models. Enter model name manually.");
+    const model = (await rl.question("Default model (e.g. qwen-plus, gpt-4o): ")).trim();
+    if (!model) throw new Error("Model required");
+    return model;
+  }
+  console.log(`\nAvailable models (${models.length}):\n`);
+  _printModelMenu(models, process.stdout.columns || 100);
+  console.log(`\nChoose a number, or type a model name directly.`);
+  const modelInput = (await rl.question(`Default model [1-${models.length}]: `)).trim();
+  if (!modelInput) throw new Error("Model required");
+  return _resolveModelChoice(modelInput, models);
+}
+
 /**
 * Interactive setup — prompts user for provider, API key, model.
 */
@@ -227,39 +605,7 @@ export async function cmdSetupInteractive(storageRoot: string): Promise<Record<s
  try {
    console.log("Configure LLM provider for uwf workflow agents.\n");

-    // 1. Provider selection
-    const numWidth = String(PRESET_PROVIDERS.length + 1).length;
-    console.log("Select a provider:\n");
-    for (let i = 0; i < PRESET_PROVIDERS.length; i++) {
-      const p = PRESET_PROVIDERS[i];
-      if (!p) continue;
-      const num = String(i + 1).padStart(numWidth);
-      console.log(`  ${num}) ${p.label.padEnd(28)} ${p.baseUrl}`);
-    }
-    const customNum = String(PRESET_PROVIDERS.length + 1).padStart(numWidth);
-    console.log(`  ${customNum}) Custom (enter name and URL manually)\n`);
-
-    const choice = (await rl.question(`Choose [1-${PRESET_PROVIDERS.length + 1}]: `)).trim();
-    const choiceNum = Number.parseInt(choice, 10);
-    if (Number.isNaN(choiceNum) || choiceNum < 1 || choiceNum > PRESET_PROVIDERS.length + 1) {
-      throw new Error(`Invalid choice: ${choice}`);
-    }
-
-    let providerName: string;
-    let baseUrl: string;
-
-    if (choiceNum <= PRESET_PROVIDERS.length) {
-      const selected = PRESET_PROVIDERS[choiceNum - 1];
-      if (!selected) throw new Error("Invalid selection");
-      providerName = selected.name;
-      baseUrl = selected.baseUrl;
-      console.log(`\n  → ${selected.label} (${selected.baseUrl})\n`);
-    } else {
-      providerName = (await rl.question("Provider name (e.g. my-proxy): ")).trim();
-      if (!providerName) throw new Error("Provider name required");
-      baseUrl = (await rl.question("OpenAI-compatible API base URL: ")).trim();
-      if (!baseUrl) throw new Error("Base URL required");
-    }
+    const { providerName, baseUrl } = await _promptProviderSelection(rl);

    // 2. API key
    rl.close();
@@ -268,57 +614,28 @@ export async function cmdSetupInteractive(storageRoot: string): Promise<Record<s

    // 3. Model selection
    const rl2 = createInterface({ input, output });
-    console.log("\nFetching available models...");
-    const models = await fetchModels(baseUrl, apiKey);
-
-    let model: string;
-    if (models.length > 0) {
-      console.log(`\nAvailable models (${models.length}):\n`);
-      const nw = String(models.length).length;
-      // Multi-column layout
-      const maxLen = models.reduce((m, s) => Math.max(m, s.length), 0);
-      const colWidth = nw + 2 + maxLen + 4; // "  N) name    "
-      const termCols = process.stdout.columns || 100;
-      const cols = Math.max(1, Math.floor(termCols / colWidth));
-      const rows = Math.ceil(models.length / cols);
-      for (let r = 0; r < rows; r++) {
-        let line = "";
-        for (let c = 0; c < cols; c++) {
-          const idx = c * rows + r;
-          if (idx >= models.length) break;
-          const num = String(idx + 1).padStart(nw);
-          const name = (models[idx] ?? "").padEnd(maxLen);
-          line += `  ${num}) ${name}  `;
-        }
-        console.log(line.trimEnd());
-      }
-      console.log(`\nChoose a number, or type a model name directly.`);
-      const modelInput = (await rl2.question(`Default model [1-${models.length}]: `)).trim();
-      if (!modelInput) throw new Error("Model required");
-      const modelNum = Number.parseInt(modelInput, 10);
-      if (!Number.isNaN(modelNum) && modelNum >= 1 && modelNum <= models.length) {
-        model = models[modelNum - 1] ?? modelInput;
-      } else {
-        model = modelInput;
-      }
-    } else {
-      console.log("Could not fetch models. Enter model name manually.");
-      model = (await rl2.question("Default model (e.g. qwen-plus, gpt-4o): ")).trim();
-      if (!model) throw new Error("Model required");
-    }
-
+    const model = await _promptModelSelection(rl2, baseUrl, apiKey);
    rl2.close();
-
    console.log(`  → ${providerName}/${model}\n`);

-    await cmdSetup({
+    // 4. Agent discovery & selection
+    const rl3 = createInterface({ input, output });
+    const agentName = await _promptAgentSelection(rl3);
+    rl3.close();
+
+    const setupResult = await cmdSetup({
      provider: providerName,
      baseUrl,
      apiKey,
      model,
+      agent: agentName,
      storageRoot,
    });

+    // Show validation result
+    if (setupResult.validation && typeof setupResult.validation === "object") {
+      _printValidationResult(setupResult.validation as ValidationResult);
+    }
    console.log("Setup complete! Get started:\n");
    console.log("  uwf workflow put <workflow.yaml>   Register a workflow");
    console.log('  uwf thread start <name> -p "..."   Start a thread');
@@ -0,0 +1,231 @@
+import type { Store as CasStore, JSONSchema } from "@uncaged/json-cas";
+import { getSchema } from "@uncaged/json-cas";
+import type {
+  CasRef,
+  StartNodePayload,
+  StepNodePayload,
+  ThreadId,
+} from "@uncaged/workflow-protocol";
+import { findThreadInHistory, loadThreadsIndex, type UwfStore } from "../store.js";
+
+type ChainState = {
+  startHash: CasRef;
+  start: StartNodePayload;
+  stepsNewestFirst: StepNodePayload[];
+  headIsStart: boolean;
+};
+
+type OrderedStepItem = {
+  hash: CasRef;
+  payload: StepNodePayload;
+  timestamp: number;
+};
+
+function fail(message: string): never {
+  process.stderr.write(`${message}\n`);
+  process.exit(1);
+}
+
+function walkChain(uwf: UwfStore, headHash: CasRef): ChainState {
+  const headNode = uwf.store.get(headHash);
+  if (headNode === null) {
+    fail(`CAS node not found: ${headHash}`);
+  }
+
+  if (headNode.type === uwf.schemas.startNode) {
+    return {
+      startHash: headHash,
+      start: headNode.payload as StartNodePayload,
+      stepsNewestFirst: [],
+      headIsStart: true,
+    };
+  }
+
+  if (headNode.type !== uwf.schemas.stepNode) {
+    fail(`head ${headHash} is not a StartNode or StepNode`);
+  }
+
+  const stepsNewestFirst: StepNodePayload[] = [];
+  let hash: CasRef | null = headHash;
+
+  while (hash !== null) {
+    const node = uwf.store.get(hash);
+    if (node === null) {
+      fail(`CAS node not found while walking chain: ${hash}`);
+    }
+    if (node.type !== uwf.schemas.stepNode) {
+      break;
+    }
+    const payload = node.payload as StepNodePayload;
+    stepsNewestFirst.push(payload);
+    hash = payload.prev;
+  }
+
+  const newest = stepsNewestFirst[0];
+  if (newest === undefined) {
+    fail(`empty step chain at head ${headHash}`);
+  }
+
+  const startNode = uwf.store.get(newest.start);
+  if (startNode === null || startNode.type !== uwf.schemas.startNode) {
+    fail(`StartNode not found: ${newest.start}`);
+  }
+
+  return {
+    startHash: newest.start,
+    start: startNode.payload as StartNodePayload,
+    stepsNewestFirst,
+    headIsStart: false,
+  };
+}
+
+function expandOutput(uwf: UwfStore, outputRef: CasRef): unknown {
+  const node = uwf.store.get(outputRef);
+  if (node === null) {
+    return {};
+  }
+  return node.payload;
+}
+
+/**
+ * Recursively expand all cas_ref fields in a CAS node's payload,
+ * replacing hash strings with the referenced node's expanded payload.
+ */
+function expandDeep(store: CasStore, hash: CasRef, visited?: Set<string>): unknown {
+  const seen = visited ?? new Set<string>();
+  if (seen.has(hash)) return hash; // cycle guard
+  seen.add(hash);
+
+  const node = store.get(hash);
+  if (node === null) return hash;
+
+  const schema = getSchema(store, node.type);
+  if (schema === null) return node.payload;
+
+  return expandValue(store, schema, node.payload, seen);
+}
+
+function expandCasRefField(store: CasStore, value: unknown, visited: Set<string>): unknown {
+  if (typeof value === "string") {
+    return expandDeep(store, value as CasRef, visited);
+  }
+  return value;
+}
+
+function expandAnyOfField(
+  store: CasStore,
+  schema: JSONSchema,
+  value: unknown,
+  visited: Set<string>,
+): unknown {
+  if (!Array.isArray(schema.anyOf)) return value;
+  for (const sub of schema.anyOf as JSONSchema[]) {
+    if (sub.format === "cas_ref" && typeof value === "string") {
+      return expandDeep(store, value as CasRef, visited);
+    }
+  }
+  return value;
+}
+
+function expandArrayField(
+  store: CasStore,
+  schema: JSONSchema,
+  value: unknown,
+  visited: Set<string>,
+): unknown {
+  if (!schema.items || !Array.isArray(value)) return value;
+  const itemSchema = schema.items as JSONSchema;
+  return (value as unknown[]).map((item) => expandValue(store, itemSchema, item, visited));
+}
+
+function expandObjectField(
+  store: CasStore,
+  schema: JSONSchema,
+  value: unknown,
+  visited: Set<string>,
+): unknown {
+  if (value === null || typeof value !== "object" || Array.isArray(value) || !schema.properties) {
+    return value;
+  }
+  const props = schema.properties as Record<string, JSONSchema>;
+  const obj = value as Record<string, unknown>;
+  const result: Record<string, unknown> = {};
+  for (const [key, val] of Object.entries(obj)) {
+    const propSchema = props[key];
+    result[key] = propSchema ? expandValue(store, propSchema, val, visited) : val;
+  }
+  return result;
+}
+
+function expandValue(
+  store: CasStore,
+  schema: JSONSchema,
+  value: unknown,
+  visited: Set<string>,
+): unknown {
+  if (schema.format === "cas_ref") return expandCasRefField(store, value, visited);
+  if (Array.isArray(schema.anyOf)) return expandAnyOfField(store, schema, value, visited);
+  if (schema.type === "array") return expandArrayField(store, schema, value, visited);
+  return expandObjectField(store, schema, value, visited);
+}
+
+function collectOrderedSteps(
+  uwf: UwfStore,
+  headHash: CasRef,
+  chain: ChainState,
+): OrderedStepItem[] {
+  let hash: CasRef | null = headHash;
+  const hashToNode = new Map<string, { payload: StepNodePayload; timestamp: number }>();
+  while (hash !== null) {
+    const node = uwf.store.get(hash);
+    if (node === null || node.type !== uwf.schemas.stepNode) {
+      break;
+    }
+    const payload = node.payload as StepNodePayload;
+    hashToNode.set(hash, { payload, timestamp: node.timestamp });
+    hash = payload.prev;
+  }
+
+  let cur: CasRef | null = chain.headIsStart ? null : headHash;
+  const ordered: OrderedStepItem[] = [];
+  while (cur !== null) {
+    const entry = hashToNode.get(cur);
+    if (entry === undefined) {
+      break;
+    }
+    ordered.push({ hash: cur, ...entry });
+    cur = entry.payload.prev;
+  }
+
+  ordered.reverse();
+  return ordered;
+}
+
+async function resolveHeadHash(storageRoot: string, threadId: ThreadId): Promise<CasRef> {
+  const index = await loadThreadsIndex(storageRoot);
+  const activeHead = index[threadId];
+  if (activeHead !== undefined) {
+    return activeHead;
+  }
+  const hist = await findThreadInHistory(storageRoot, threadId);
+  if (hist !== null) {
+    return hist.head;
+  }
+  fail(`thread not found: ${threadId}`);
+}
+
+export {
+  type ChainState,
+  collectOrderedSteps,
+  expandAnyOfField,
+  expandArrayField,
+  expandCasRefField,
+  expandDeep,
+  expandObjectField,
+  expandOutput,
+  expandValue,
+  fail,
+  type OrderedStepItem,
+  resolveHeadHash,
+  walkChain,
+};
@@ -0,0 +1,12 @@
+export {
+  generateArchitectureReference as cmdSkillArchitecture,
+  generateCliReference as cmdSkillCli,
+  generateModeratorReference as cmdSkillModerator,
+  generateYamlReference as cmdSkillYaml,
+} from "@uncaged/workflow-util";
+
+const SKILL_NAMES = ["cli", "architecture", "yaml", "moderator"] as const;
+
+export function cmdSkillList(): ReadonlyArray<string> {
+  return [...SKILL_NAMES];
+}
@@ -0,0 +1,320 @@
+import type { BootstrapCapableStore } from "@uncaged/json-cas";
+import type {
+  CasRef,
+  StartEntry,
+  StepEntry,
+  StepNodePayload,
+  ThreadForkOutput,
+  ThreadId,
+  ThreadStepsOutput,
+} from "@uncaged/workflow-protocol";
+import { generateUlid } from "@uncaged/workflow-util";
+import { createUwfStore, loadThreadsIndex, saveThreadsIndex } from "../store.js";
+import {
+  collectOrderedSteps,
+  expandDeep,
+  expandOutput,
+  fail,
+  resolveHeadHash,
+  walkChain,
+} from "./shared.js";
+
+type TurnToolCall = {
+  name: string;
+  args: string;
+};
+
+type TurnData = {
+  index: number;
+  role: string;
+  content: string;
+  toolCalls: TurnToolCall[] | null;
+};
+
+/**
+ * List all steps in a thread (previously: thread steps)
+ */
+export async function cmdStepList(
+  storageRoot: string,
+  threadId: ThreadId,
+): Promise<ThreadStepsOutput> {
+  const headHash = await resolveHeadHash(storageRoot, threadId);
+  const uwf = await createUwfStore(storageRoot);
+  const chain = walkChain(uwf, headHash);
+
+  const startNode = uwf.store.get(chain.startHash);
+  if (startNode === null) {
+    fail(`StartNode not found: ${chain.startHash}`);
+  }
+
+  const startEntry: StartEntry = {
+    hash: chain.startHash,
+    workflow: chain.start.workflow,
+    prompt: chain.start.prompt,
+    timestamp: startNode.timestamp,
+  };
+
+  const stepEntries: StepEntry[] = [];
+  const ordered = collectOrderedSteps(uwf, headHash, chain);
+
+  for (const item of ordered) {
+    stepEntries.push({
+      hash: item.hash,
+      role: item.payload.role,
+      output: expandOutput(uwf, item.payload.output),
+      detail: item.payload.detail ?? null,
+      agent: item.payload.agent,
+      timestamp: item.timestamp,
+      durationMs: item.payload.completedAtMs - item.payload.startedAtMs,
+    });
+  }
+
+  return {
+    thread: threadId,
+    workflow: chain.start.workflow,
+    steps: [startEntry, ...stepEntries],
+  };
+}
+
+/**
+ * Show details of a specific step (previously: thread step-details)
+ */
+export async function cmdStepShow(storageRoot: string, stepHash: CasRef): Promise<unknown> {
+  const uwf = await createUwfStore(storageRoot);
+  const node = uwf.store.get(stepHash);
+  if (node === null) {
+    fail(`CAS node not found: ${stepHash}`);
+  }
+  if (node.type !== uwf.schemas.stepNode) {
+    fail(`node ${stepHash} is not a StepNode`);
+  }
+  const payload = node.payload as StepNodePayload;
+  if (!payload.detail) {
+    fail(`step ${stepHash} has no detail`);
+  }
+  return expandDeep(uwf.store, payload.detail);
+}
+
+/**
+ * Fork a thread from a specific step (previously: thread fork)
+ */
+export async function cmdStepFork(
+  storageRoot: string,
+  stepHash: CasRef,
+): Promise<ThreadForkOutput> {
+  const uwf = await createUwfStore(storageRoot);
+  const node = uwf.store.get(stepHash);
+  if (node === null) {
+    fail(`CAS node not found: ${stepHash}`);
+  }
+  if (node.type !== uwf.schemas.startNode && node.type !== uwf.schemas.stepNode) {
+    fail(`node ${stepHash} is not a StartNode or StepNode`);
+  }
+
+  const newThreadId = generateUlid(Date.now()) as ThreadId;
+  const index = await loadThreadsIndex(storageRoot);
+  index[newThreadId] = stepHash;
+  await saveThreadsIndex(storageRoot, index);
+
+  return {
+    thread: newThreadId,
+    forkedFrom: {
+      step: stepHash,
+    },
+  };
+}
+
+/**
+ * Load and validate step detail node from CAS store
+ */
+function loadStepDetail(store: BootstrapCapableStore, detailRef: CasRef): Record<string, unknown> {
+  const detailNode = store.get(detailRef);
+  if (detailNode === null) {
+    fail(`detail node not found: ${detailRef}`);
+  }
+  return detailNode.payload as Record<string, unknown>;
+}
+
+function parseTurnToolCalls(raw: unknown): TurnToolCall[] | null {
+  if (!Array.isArray(raw) || raw.length === 0) {
+    return null;
+  }
+  const calls: TurnToolCall[] = [];
+  for (const entry of raw) {
+    if (typeof entry !== "object" || entry === null) {
+      continue;
+    }
+    const record = entry as Record<string, unknown>;
+    const name = record.name;
+    const args = record.args;
+    if (typeof name === "string") {
+      calls.push({ name, args: typeof args === "string" ? args : "" });
+    }
+  }
+  return calls.length > 0 ? calls : null;
+}
+
+function formatTurnBody(turn: TurnData): string {
+  const parts: string[] = [];
+  parts.push(`**Turn role:** ${turn.role}`);
+
+  if (turn.toolCalls !== null) {
+    for (const call of turn.toolCalls) {
+      const argsSuffix = call.args !== "" ? ` — \`${call.args}\`` : "";
+      parts.push(`- **${call.name}**${argsSuffix}`);
+    }
+  }
+
+  if (turn.content !== "") {
+    if (parts.length > 0) {
+      parts.push("");
+    }
+    parts.push(turn.content);
+  }
+
+  return parts.join("\n");
+}
+
+function parseSingleTurn(
+  store: BootstrapCapableStore,
+  turnRef: unknown,
+  fallbackIndex: number,
+): TurnData | null {
+  if (typeof turnRef !== "string") {
+    return null;
+  }
+  const turnNode = store.get(turnRef as CasRef);
+  if (turnNode === null) {
+    return null;
+  }
+  const turn = turnNode.payload as Record<string, unknown>;
+  const content = typeof turn.content === "string" ? turn.content : "";
+  const toolCalls = parseTurnToolCalls(turn.toolCalls);
+  if (content === "" && toolCalls === null) {
+    return null;
+  }
+  return {
+    index: typeof turn.index === "number" ? turn.index : fallbackIndex,
+    role: typeof turn.role === "string" ? turn.role : "assistant",
+    content,
+    toolCalls,
+  };
+}
+
+/**
+ * Load all turn nodes from CAS store and extract display fields
+ */
+function loadTurnData(store: BootstrapCapableStore, turns: unknown): TurnData[] {
+  if (!Array.isArray(turns) || turns.length === 0) {
+    return [];
+  }
+
+  const turnData: TurnData[] = [];
+  for (const turnRef of turns) {
+    const parsed = parseSingleTurn(store, turnRef, turnData.length);
+    if (parsed !== null) {
+      turnData.push(parsed);
+    }
+  }
+  return turnData;
+}
+
+/**
+ * Select turns that fit within quota, working backwards from most recent
+ */
+function selectTurnsForQuota(turnData: TurnData[], availableQuota: number): TurnData[] {
+  const selectedTurns: TurnData[] = [];
+  let totalChars = 0;
+
+  for (let i = turnData.length - 1; i >= 0; i--) {
+    const turn = turnData[i];
+    if (turn === undefined) continue;
+
+    const turnHeader = `## Turn ${turn.index + 1}\n\n`;
+    const turnBlock = turnHeader + formatTurnBody(turn);
+    const separatorCost = selectedTurns.length > 0 ? 2 : 0;
+    const addCost = turnBlock.length + separatorCost;
+
+    if (totalChars + addCost > availableQuota && selectedTurns.length > 0) {
+      break;
+    }
+
+    selectedTurns.unshift(turn);
+    totalChars += addCost;
+  }
+
+  return selectedTurns;
+}
+
+/**
+ * Assemble final markdown output from header and selected turns
+ */
+function formatStepMarkdown(
+  stepHash: CasRef,
+  role: string,
+  agent: string,
+  turnData: TurnData[],
+  selectedTurns: TurnData[],
+): string {
+  const parts: string[] = [];
+  parts.push(`# Step ${stepHash}`);
+  parts.push("");
+  parts.push(`**Role:** ${role}`);
+  parts.push(`**Agent:** ${agent}`);
+
+  if (selectedTurns.length === 0) {
+    return parts.join("\n");
+  }
+
+  const skippedCount = turnData.length - selectedTurns.length;
+  if (skippedCount > 0) {
+    parts.push("");
+    parts.push(`_[Earlier turns omitted due to quota. Use --quota to increase.]_`);
+  }
+
+  for (const turn of selectedTurns) {
+    parts.push("");
+    parts.push(`## Turn ${turn.index + 1}`);
+    parts.push("");
+    parts.push(formatTurnBody(turn));
+  }
+
+  return parts.join("\n");
+}
+
+/**
+ * Read a step's agent turns as human-readable markdown with quota enforcement
+ */
+export async function cmdStepRead(
+  storageRoot: string,
+  stepHash: CasRef,
+  quota: number,
+): Promise<string> {
+  const uwf = await createUwfStore(storageRoot);
+  const node = uwf.store.get(stepHash);
+  if (node === null) {
+    fail(`CAS node not found: ${stepHash}`);
+  }
+  if (node.type !== uwf.schemas.stepNode) {
+    fail(`node ${stepHash} is not a StepNode`);
+  }
+  const payload = node.payload as StepNodePayload;
+
+  if (payload.detail === null) {
+    return formatStepMarkdown(stepHash, payload.role, payload.agent, [], []);
+  }
+
+  const detail = loadStepDetail(uwf.store, payload.detail);
+  const turnData = loadTurnData(uwf.store, detail.turns);
+
+  if (turnData.length === 0) {
+    return formatStepMarkdown(stepHash, payload.role, payload.agent, [], []);
+  }
+
+  const headerSection = formatStepMarkdown(stepHash, payload.role, payload.agent, [], []);
+  const BUFFER = 200;
+  const availableQuota = quota - headerSection.length - BUFFER;
+  const selectedTurns = selectTurnsForQuota(turnData, availableQuota);
+
+  return formatStepMarkdown(stepHash, payload.role, payload.agent, turnData, selectedTurns);
+}
@@ -0,0 +1,23 @@
+/**
+ * Parse time input: ISO date (YYYY-MM-DD, YYYY-MM-DDTHH:MM:SS) or relative (7d, 24h, 30m)
+ * Returns Unix timestamp in milliseconds.
+ */
+export function parseTimeInput(input: string, nowMs: number): number {
+  const trimmed = input.trim();
+
+  // Relative time: 7d, 24h, 30m
+  const relativeMatch = /^(\d+)(d|h|m)$/.exec(trimmed);
+  if (relativeMatch !== null) {
+    const value = Number.parseInt(relativeMatch[1], 10);
+    const unit = relativeMatch[2];
+    const multiplier = unit === "d" ? 86400000 : unit === "h" ? 3600000 : 60000;
+    return nowMs - value * multiplier;
+  }
+
+  // ISO date: try parsing
+  const parsed = Date.parse(trimmed);
+  if (Number.isNaN(parsed)) {
+    throw new Error(`invalid time format: ${trimmed} (expected ISO date or relative like '7d')`);
+  }
+  return parsed;
+}
@@ -2,25 +2,30 @@ import { readFile } from "node:fs/promises";

 import type { JSONSchema } from "@uncaged/json-cas";
 import { putSchema, validate } from "@uncaged/json-cas";
-import type { CasRef, RoleDefinition, WorkflowPayload } from "@uncaged/workflow-protocol";
+import type { CasRef, RoleDefinition, Target, WorkflowPayload } from "@uncaged/workflow-protocol";
 import { parse } from "yaml";

 import {
  createUwfStore,
+  discoverProjectWorkflows,
  findRegistryName,
  loadWorkflowRegistry,
  resolveWorkflowHash,
  saveWorkflowRegistry,
  type UwfStore,
 } from "../store.js";
-import { parseWorkflowPayload } from "../validate.js";
+import { checkWorkflowFilenameConsistency, parseWorkflowPayload } from "../validate.js";
+import { validateWorkflow } from "../validate-semantic.js";
+
+export type WorkflowOrigin = "local" | "global";

 export type WorkflowListEntry = {
  name: string;
  hash: CasRef;
+  origin: WorkflowOrigin;
 };

-export type WorkflowPutOutput = {
+export type WorkflowAddOutput = {
  name: string;
  hash: CasRef;
 };
@@ -42,53 +47,72 @@ function isJsonSchema(value: unknown): value is JSONSchema {
  return typeof value === "object" && value !== null && !Array.isArray(value);
 }

-async function resolveOutputSchemaRef(
+/** Normalize graph: validate each status → target mapping. */
+function normalizeGraph(
+  graph: Record<string, Record<string, Target>>,
+): Record<string, Record<string, Target>> {
+  const result: Record<string, Record<string, Target>> = {};
+  for (const [node, statusMap] of Object.entries(graph)) {
+    const normalized: Record<string, Target> = {};
+    for (const [status, target] of Object.entries(statusMap)) {
+      if (typeof target.prompt !== "string" || target.prompt.trim() === "") {
+        fail(`graph[${node}][${status}] → "${target.role}": prompt is required (non-empty string)`);
+      }
+      normalized[status] = {
+        role: target.role,
+        prompt: target.prompt,
+      };
+    }
+    result[node] = normalized;
+  }
+  return result;
+}
+
+async function resolveFrontmatterRef(
  uwf: UwfStore,
  roleName: string,
-  outputSchema: unknown,
+  frontmatter: unknown,
 ): Promise<CasRef> {
-  if (!isJsonSchema(outputSchema)) {
-    fail(`role "${roleName}": outputSchema must be a JSON Schema object`);
+  if (!isJsonSchema(frontmatter)) {
+    fail(`role "${roleName}": frontmatter must be a JSON Schema object`);
  }
-  const schema: JSONSchema = outputSchema.title === undefined
-    ? { ...outputSchema, title: roleName }
-    : outputSchema;
+  const schema: JSONSchema =
+    frontmatter.title === undefined ? { ...frontmatter, title: roleName } : frontmatter;
  return putSchema(uwf.store, schema);
 }

-async function materializeWorkflowPayload(
+export async function materializeWorkflowPayload(
  uwf: UwfStore,
  raw: WorkflowPayload,
 ): Promise<WorkflowPayload> {
  const roles: Record<string, RoleDefinition> = {};
  for (const [roleName, role] of Object.entries(raw.roles)) {
-    const outputSchema = await resolveOutputSchemaRef(
+    const frontmatter = await resolveFrontmatterRef(
      uwf,
      `${raw.name}.${roleName}`,
-      role.outputSchema,
+      role.frontmatter,
    );
    roles[roleName] = {
      description: role.description,
-      identity: role.identity,
-      prepare: role.prepare,
-      execute: role.execute,
-      report: role.report,
-      outputSchema,
+      goal: role.goal,
+      capabilities: role.capabilities,
+      procedure: role.procedure,
+      output: role.output,
+      frontmatter,
    };
  }
  return {
    name: raw.name,
    description: raw.description,
    roles,
-    conditions: raw.conditions,
-    graph: raw.graph,
+    graph: normalizeGraph(raw.graph),
  };
 }

-export async function cmdWorkflowPut(
+export async function cmdWorkflowAdd(
  storageRoot: string,
  filePath: string,
-): Promise<WorkflowPutOutput> {
+): Promise<WorkflowAddOutput> {
  let text: string;
  try {
    text = await readFile(filePath, "utf8");
@@ -108,6 +132,16 @@ export async function cmdWorkflowPut(
    fail("invalid workflow YAML: expected WorkflowPayload shape");
  }

+  const filenameError = checkWorkflowFilenameConsistency(filePath, payload);
+  if (filenameError !== null) {
+    fail(filenameError);
+  }
+
+  const semanticErrors = validateWorkflow(payload);
+  if (semanticErrors.length > 0) {
+    fail(`workflow validation failed:\n${semanticErrors.map((e) => `  - ${e}`).join("\n")}`);
+  }
+
  const uwf = await createUwfStore(storageRoot);
  const materialized = await materializeWorkflowPayload(uwf, payload);

@@ -150,7 +184,26 @@ export async function cmdWorkflowShow(
  };
 }

-export async function cmdWorkflowList(storageRoot: string): Promise<WorkflowListEntry[]> {
+export async function cmdWorkflowList(
+  storageRoot: string,
+  projectRoot: string,
+): Promise<WorkflowListEntry[]> {
+  const localEntries = await discoverProjectWorkflows(projectRoot);
  const registry = await loadWorkflowRegistry(storageRoot);
-  return Object.entries(registry).map(([name, hash]) => ({ name, hash }));
+
+  const result: WorkflowListEntry[] = [];
+  const localNames = new Set<string>();
+
+  for (const entry of localEntries) {
+    localNames.add(entry.name);
+    result.push({ name: entry.name, hash: "(local)", origin: "local" });
+  }
+
+  for (const [name, hash] of Object.entries(registry)) {
+    if (!localNames.has(name)) {
+      result.push({ name, hash, origin: "global" });
+    }
+  }
+
+  return result;
 }
@@ -7,6 +7,6 @@ export function formatOutput(data: unknown, format: OutputFormat): string {
    case "json":
      return JSON.stringify(data);
    case "yaml":
-      return stringify(data).trimEnd();
+      return stringify(data, { aliasDuplicateObjects: false }).trimEnd();
  }
 }
@@ -0,0 +1,53 @@
+import type { Target } from "@uncaged/workflow-protocol";
+import mustache from "mustache";
+
+import type { EvaluateResult, Result } from "./types.js";
+
+// Disable HTML escaping — prompts are plain text, not HTML.
+mustache.escape = (text: string) => text;
+
+const START_ROLE = "$START";
+const UNIT_STATUS = "_";
+
+type LastOutput = Record<string, unknown>;
+
+const STATUS_KEY = "$status";
+
+export function evaluate(
+  graph: Record<string, Record<string, Target>>,
+  lastRole: string,
+  lastOutput: LastOutput,
+): Result<EvaluateResult, Error> {
+  const status =
+    lastRole === START_ROLE
+      ? UNIT_STATUS
+      : typeof lastOutput[STATUS_KEY] === "string"
+        ? (lastOutput[STATUS_KEY] as string)
+        : UNIT_STATUS;
+
+  const roleTargets = graph[lastRole];
+  if (roleTargets === undefined) {
+    return {
+      ok: false,
+      error: new Error(`no transitions defined for role "${lastRole}"`),
+    };
+  }
+
+  const target = roleTargets[status];
+  if (target === undefined) {
+    return {
+      ok: false,
+      error: new Error(`no transition for role "${lastRole}" with status "${status}"`),
+    };
+  }
+
+  try {
+    const prompt = mustache.render(target.prompt, lastOutput);
+    return { ok: true, value: { role: target.role, prompt } };
+  } catch (error) {
+    return {
+      ok: false,
+      error: error instanceof Error ? error : new Error(String(error)),
+    };
+  }
+}
@@ -0,0 +1,2 @@
+export { evaluate } from "./evaluate.js";
+export type { EvaluateResult } from "./types.js";
@@ -0,0 +1,7 @@
+export type Result<T, E> = { ok: true; value: T } | { ok: false; error: E };
+
+/** The result of moderator evaluation — which role to go to, and the edge prompt. */
+export type EvaluateResult = {
+  role: string;
+  prompt: string;
+};
@@ -1,15 +1,14 @@
 import type { Hash, Store } from "@uncaged/json-cas";
 import { putSchema } from "@uncaged/json-cas";
-import {
-  START_NODE_SCHEMA,
-  STEP_NODE_SCHEMA,
-  WORKFLOW_SCHEMA,
-} from "@uncaged/workflow-protocol";
+import { START_NODE_SCHEMA, STEP_NODE_SCHEMA, WORKFLOW_SCHEMA } from "@uncaged/workflow-protocol";
+
+export const TEXT_SCHEMA = { type: "string" as const };

 export type UwfSchemaHashes = {
  workflow: Hash;
  startNode: Hash;
  stepNode: Hash;
+  text: Hash;
 };

 /**
@@ -17,10 +16,11 @@ export type UwfSchemaHashes = {
 * Idempotent: safe to call on every CLI invocation.
 */
 export async function registerUwfSchemas(store: Store): Promise<UwfSchemaHashes> {
-  const [workflow, startNode, stepNode] = await Promise.all([
+  const [workflow, startNode, stepNode, text] = await Promise.all([
    putSchema(store, WORKFLOW_SCHEMA),
    putSchema(store, START_NODE_SCHEMA),
    putSchema(store, STEP_NODE_SCHEMA),
+    putSchema(store, TEXT_SCHEMA),
  ]);
-  return { workflow, startNode, stepNode };
+  return { workflow, startNode, stepNode, text };
 }
@@ -1,4 +1,4 @@
-import { appendFile, mkdir, readFile, writeFile } from "node:fs/promises";
+import { appendFile, mkdir, readdir, readFile, writeFile } from "node:fs/promises";
 import { homedir } from "node:os";
 import { join } from "node:path";

@@ -11,6 +11,44 @@ import { registerUwfSchemas, type UwfSchemaHashes } from "./schemas.js";

 export type WorkflowRegistry = Record<string, CasRef>;

+/** A workflow entry discovered from the project-local .workflows/ directory. */
+export type ProjectWorkflowEntry = {
+  /** Workflow name (from YAML `name` field, equals filename stem). */
+  name: string;
+  /** Absolute path to the YAML file. */
+  filePath: string;
+};
+
+/**
+ * Scan `<projectRoot>/.workflows/*.yaml` (non-recursive) and return discovered entries.
+ * Returns an empty array if the directory does not exist.
+ */
+export async function discoverProjectWorkflows(
+  projectRoot: string,
+): Promise<ProjectWorkflowEntry[]> {
+  const dir = join(projectRoot, ".workflows");
+  let entries: string[];
+  try {
+    entries = await readdir(dir);
+  } catch (e) {
+    const err = e as NodeJS.ErrnoException;
+    if (err.code === "ENOENT" || err.code === "ENOTDIR") {
+      return [];
+    }
+    throw e;
+  }
+
+  const result: ProjectWorkflowEntry[] = [];
+  for (const entry of entries) {
+    if (!entry.endsWith(".yaml") && !entry.endsWith(".yml")) {
+      continue;
+    }
+    const stem = entry.endsWith(".yaml") ? entry.slice(0, -5) : entry.slice(0, -4);
+    result.push({ name: stem, filePath: join(dir, entry) });
+  }
+  return result;
+}
+
 /** Default filesystem root for uwf data (`~/.uncaged/workflow`). */
 export function getDefaultStorageRoot(): string {
  return join(homedir(), ".uncaged", "workflow");
@@ -104,6 +142,22 @@ export function resolveWorkflowHash(registry: WorkflowRegistry, id: string): Cas
  return registry[id] !== undefined ? registry[id] : id;
 }

+/**
+ * Resolve a workflow name to a project-local YAML file path.
+ * Returns null if the name is not found in the local entries.
+ */
+export function resolveProjectWorkflowFile(
+  localEntries: ProjectWorkflowEntry[],
+  name: string,
+): string | null {
+  for (const entry of localEntries) {
+    if (entry.name === name) {
+      return entry.filePath;
+    }
+  }
+  return null;
+}
+
 export function findRegistryName(registry: WorkflowRegistry, hash: Hash): string | null {
  for (const [name, h] of Object.entries(registry)) {
    if (h === hash) {
@@ -0,0 +1,326 @@
+import type { WorkflowPayload } from "@uncaged/workflow-protocol";
+
+type SchemaObj = Record<string, unknown>;
+
+const RESERVED_NAMES = new Set(["$START", "$END"]);
+
+/** Extract mustache variable names from a prompt string. */
+function extractMustacheVars(prompt: string): string[] {
+  const vars: string[] = [];
+  const re = /\{\{\{?([^}]+)\}\}\}?/g;
+  let m: RegExpExecArray | null = re.exec(prompt);
+  while (m !== null) {
+    vars.push(m[1]);
+    m = re.exec(prompt);
+  }
+  return vars;
+}
+
+/** Check if a frontmatter schema is a oneOf (multi-exit) type. */
+function isOneOfSchema(fm: unknown): fm is SchemaObj & { oneOf: SchemaObj[] } {
+  if (typeof fm !== "object" || fm === null) return false;
+  const obj = fm as SchemaObj;
+  return Array.isArray(obj.oneOf);
+}
+
+/** Check if a frontmatter schema uses enum-based multi-exit ($status with multiple enum values). */
+function isEnumMultiExit(fm: unknown): boolean {
+  if (typeof fm !== "object" || fm === null) return false;
+  const obj = fm as SchemaObj;
+  const props = obj.properties as Record<string, SchemaObj> | undefined;
+  if (!props?.$status) return false;
+  const statusDef = props.$status;
+  if (!Array.isArray(statusDef.enum)) return false;
+  // Filter out "_" (wildcard) — if remaining values > 1, it's multi-exit
+  const statuses = (statusDef.enum as string[]).filter((s) => s !== "_");
+  return statuses.length > 1;
+}
+
+/** Extract status values from an enum-based $status field. */
+function getEnumStatuses(fm: SchemaObj): string[] {
+  const props = fm.properties as Record<string, SchemaObj> | undefined;
+  if (!props?.$status) return [];
+  const statusDef = props.$status;
+  if (!Array.isArray(statusDef.enum)) return [];
+  return (statusDef.enum as string[]).filter((s) => s !== "_");
+}
+
+/** Get property names from a schema object. */
+function getPropertyNames(schema: SchemaObj): Set<string> {
+  const props = schema.properties;
+  if (typeof props !== "object" || props === null) return new Set();
+  return new Set(Object.keys(props as Record<string, unknown>));
+}
+
+/** Extract $status const values from oneOf variants. */
+function getOneOfStatuses(variants: SchemaObj[]): string[] {
+  const statuses: string[] = [];
+  for (const variant of variants) {
+    const props = variant.properties as Record<string, SchemaObj> | undefined;
+    if (props?.$status) {
+      const statusDef = props.$status;
+      if (typeof statusDef.const === "string") {
+        statuses.push(statusDef.const);
+      }
+    }
+  }
+  return statuses;
+}
+
+/** Check reserved names and role/graph reference integrity. */
+function checkRoleReferences(payload: WorkflowPayload, errors: string[]): void {
+  const roleNames = new Set(Object.keys(payload.roles));
+  const graphNodes = new Set(Object.keys(payload.graph));
+
+  for (const name of roleNames) {
+    if (RESERVED_NAMES.has(name)) {
+      errors.push(`reserved name "${name}" must not appear in roles`);
+    }
+  }
+
+  for (const node of graphNodes) {
+    if (!RESERVED_NAMES.has(node) && !roleNames.has(node)) {
+      errors.push(`graph references unknown role "${node}"`);
+    }
+  }
+
+  for (const name of roleNames) {
+    if (RESERVED_NAMES.has(name)) continue;
+    if (!graphNodes.has(name)) {
+      errors.push(`role "${name}" is defined but not referenced in graph`);
+    }
+  }
+}
+
+/** Check $START/$END constraints, edge targets, and reachability. */
+function checkGraphStructure(payload: WorkflowPayload, errors: string[]): void {
+  const roleNames = new Set(Object.keys(payload.roles));
+  const graphNodes = new Set(Object.keys(payload.graph));
+
+  if (!graphNodes.has("$START")) {
+    errors.push("$START must be defined in graph");
+  } else {
+    const startKeys = Object.keys(payload.graph.$START);
+    if (startKeys.length !== 1 || startKeys[0] !== "_") {
+      errors.push('$START must have exactly one edge with status "_"');
+    }
+  }
+
+  if (graphNodes.has("$END")) {
+    errors.push("$END must not have outgoing edges");
+  }
+
+  for (const [node, statusMap] of Object.entries(payload.graph)) {
+    for (const [status, target] of Object.entries(statusMap)) {
+      if (target.role !== "$END" && !roleNames.has(target.role)) {
+        errors.push(`edge ${node}→${status}: unknown target role "${target.role}"`);
+      }
+    }
+  }
+
+  checkReachability(roleNames, collectReachableRoles(payload.graph), errors);
+}
+
+/** BFS to collect all roles reachable from $START. */
+function collectReachableRoles(graph: WorkflowPayload["graph"]): Set<string> {
+  const reachable = new Set<string>();
+  const startEdges = graph.$START;
+  if (!startEdges) return reachable;
+
+  const queue: string[] = [];
+  for (const target of Object.values(startEdges)) {
+    if (target.role !== "$END" && !reachable.has(target.role)) {
+      reachable.add(target.role);
+      queue.push(target.role);
+    }
+  }
+
+  while (queue.length > 0) {
+    const current = queue.shift() as string;
+    const edges = graph[current];
+    if (!edges) continue;
+    for (const target of Object.values(edges)) {
+      if (target.role !== "$END" && !reachable.has(target.role)) {
+        reachable.add(target.role);
+        queue.push(target.role);
+      }
+    }
+  }
+
+  return reachable;
+}
+
+/** Check that all defined roles are reachable from $START. */
+function checkReachability(roleNames: Set<string>, reachable: Set<string>, errors: string[]): void {
+  for (const name of roleNames) {
+    if (RESERVED_NAMES.has(name)) continue;
+    if (!reachable.has(name)) {
+      errors.push(`role "${name}" is not reachable from $START`);
+    }
+  }
+}
+
+/** Check oneOf discriminant validity for a role. */
+function checkOneOfDiscriminant(
+  roleName: string,
+  variants: SchemaObj[],
+  statuses: string[],
+  errors: string[],
+): void {
+  if (statuses.length === variants.length) return;
+
+  let foundMissing = false;
+  for (const variant of variants) {
+    const props = variant.properties as Record<string, SchemaObj> | undefined;
+    if (!props?.$status) {
+      errors.push(`role "${roleName}": oneOf variants must have "$status" as const discriminant`);
+      foundMissing = true;
+      break;
+    }
+    if (typeof props.$status.const !== "string") {
+      errors.push(`role "${roleName}": oneOf variant $status must be a const value`);
+      foundMissing = true;
+      break;
+    }
+  }
+
+  if (!foundMissing) {
+    errors.push(`role "${roleName}": oneOf variant $status must be a const value`);
+  }
+}
+
+/** Check status-edge consistency for a multi-exit role. */
+function checkMultiExitEdges(
+  roleName: string,
+  graphKeys: Set<string>,
+  statusSet: Set<string>,
+  errors: string[],
+): void {
+  if (graphKeys.has("_")) {
+    errors.push(`role "${roleName}" is multi-exit but graph uses "_"`);
+    return;
+  }
+
+  const extraKeys = [...graphKeys].filter((k) => !statusSet.has(k));
+  const missingKeys = [...statusSet].filter((k) => !graphKeys.has(k));
+  if (extraKeys.length > 0) {
+    errors.push(`role "${roleName}" graph has extra status keys: ${extraKeys.join(", ")}`);
+  }
+  if (missingKeys.length > 0) {
+    errors.push(`role "${roleName}" graph is missing status keys: ${missingKeys.join(", ")}`);
+  }
+}
+
+/** Check mustache variables for multi-exit role. */
+function checkMultiExitMustache(
+  roleName: string,
+  graphEntry: Record<string, { role: string; prompt: string }>,
+  variants: SchemaObj[],
+  errors: string[],
+): void {
+  for (const [status, target] of Object.entries(graphEntry)) {
+    const vars = extractMustacheVars(target.prompt);
+    const variant = variants.find((v) => {
+      const props = v.properties as Record<string, SchemaObj> | undefined;
+      return props?.$status?.const === status;
+    });
+    if (!variant) continue;
+    const propNames = getPropertyNames(variant);
+    for (const v of vars) {
+      if (v === "$status") continue;
+      if (!propNames.has(v)) {
+        errors.push(`prompt variable "${v}" not found in role "${roleName}" variant "${status}"`);
+      }
+    }
+  }
+}
+
+/** Check status-edge consistency and mustache for each role. */
+function checkRoleConsistency(payload: WorkflowPayload, errors: string[]): void {
+  for (const [roleName, role] of Object.entries(payload.roles)) {
+    if (RESERVED_NAMES.has(roleName)) continue;
+    const graphEntry = payload.graph[roleName];
+    if (!graphEntry) continue;
+
+    const fm = role.frontmatter as unknown;
+    const graphKeys = new Set(Object.keys(graphEntry));
+
+    if (isOneOfSchema(fm)) {
+      const variants = fm.oneOf as SchemaObj[];
+      const statuses = getOneOfStatuses(variants);
+
+      checkOneOfDiscriminant(roleName, variants, statuses, errors);
+      checkMultiExitEdges(roleName, graphKeys, new Set(statuses), errors);
+      checkMultiExitMustache(roleName, graphEntry, variants, errors);
+    } else if (isEnumMultiExit(fm)) {
+      const statuses = getEnumStatuses(fm as SchemaObj);
+      checkMultiExitEdges(roleName, graphKeys, new Set(statuses), errors);
+      // For enum-based schemas, mustache vars come from the flat properties
+      checkSingleExitMustache(roleName, graphEntry, fm as SchemaObj, errors);
+    } else {
+      checkSingleExitRole(roleName, graphKeys, graphEntry, fm as SchemaObj | null, errors);
+    }
+  }
+}
+
+/** Check single-exit role status and mustache. */
+function checkSingleExitRole(
+  roleName: string,
+  graphKeys: Set<string>,
+  graphEntry: Record<string, { role: string; prompt: string }>,
+  fm: SchemaObj | null,
+  errors: string[],
+): void {
+  if (graphKeys.size > 1 || (graphKeys.size === 1 && !graphKeys.has("_"))) {
+    if (!graphKeys.has("_")) {
+      errors.push(`role "${roleName}" is single-exit but graph has no "_" key`);
+    } else {
+      errors.push(`role "${roleName}" is single-exit but has status keys other than "_"`);
+    }
+  }
+
+  const singleTarget = graphEntry._;
+  if (!singleTarget) return;
+
+  const vars = extractMustacheVars(singleTarget.prompt);
+  const propNames = fm ? getPropertyNames(fm) : new Set<string>();
+  for (const v of vars) {
+    if (v === "$status") continue;
+    if (!propNames.has(v)) {
+      errors.push(`prompt variable "${v}" not found in role "${roleName}" frontmatter`);
+    }
+  }
+}
+
+/** Check mustache vars in all edge prompts against flat schema properties. */
+function checkSingleExitMustache(
+  roleName: string,
+  graphEntry: Record<string, { role: string; prompt: string }>,
+  fm: SchemaObj,
+  errors: string[],
+): void {
+  const propNames = getPropertyNames(fm);
+  for (const [status, target] of Object.entries(graphEntry)) {
+    const vars = extractMustacheVars(target.prompt);
+    for (const v of vars) {
+      if (v === "$status") continue;
+      if (!propNames.has(v)) {
+        errors.push(
+          `prompt variable "${v}" in graph[${roleName}][${status}] not found in role "${roleName}" frontmatter`,
+        );
+      }
+    }
+  }
+}
+
+/**
+ * Validate a parsed WorkflowPayload for semantic correctness.
+ * Returns an array of error messages. Empty array = valid.
+ */
+export function validateWorkflow(payload: WorkflowPayload): string[] {
+  const errors: string[] = [];
+  checkRoleReferences(payload, errors);
+  checkGraphStructure(payload, errors);
+  checkRoleConsistency(payload, errors);
+  return errors;
+}
@@ -1,3 +1,4 @@
+import { basename } from "node:path";
 import type { CasRef, WorkflowPayload } from "@uncaged/workflow-protocol";

 const CAS_REF_PATTERN = /^[0-9A-HJKMNP-TV-Z]{13}$/;
@@ -14,31 +15,30 @@ function isRoleDefinition(value: unknown): boolean {
  if (!isRecord(value)) {
    return false;
  }
-  const outputSchema = value.outputSchema;
-  const schemaOk = isRecord(outputSchema) && typeof outputSchema.type === "string";
+  const frontmatter = value.frontmatter;
+  const frontmatterOk =
+    isRecord(frontmatter) &&
+    (typeof frontmatter.type === "string" || Array.isArray(frontmatter.oneOf));
+  const capabilities = value.capabilities;
+  const capabilitiesOk =
+    Array.isArray(capabilities) && capabilities.every((c) => typeof c === "string");
  return (
    typeof value.description === "string" &&
-    typeof value.identity === "string" &&
-    typeof value.prepare === "string" &&
-    typeof value.execute === "string" &&
-    typeof value.report === "string" &&
-    schemaOk
+    typeof value.goal === "string" &&
+    capabilitiesOk &&
+    typeof value.procedure === "string" &&
+    typeof value.output === "string" &&
+    frontmatterOk
  );
 }

-function isConditionDefinition(value: unknown): boolean {
+function isTarget(value: unknown): boolean {
  if (!isRecord(value)) {
    return false;
  }
-  return typeof value.description === "string" && typeof value.expression === "string";
-}
-
-function isTransition(value: unknown): boolean {
-  if (!isRecord(value)) {
-    return false;
-  }
-  const condition = value.condition;
-  return typeof value.role === "string" && (condition === null || typeof condition === "string");
+  return (
+    typeof value.role === "string" && typeof value.prompt === "string" && value.prompt.trim() !== ""
+  );
 }

 function isStringRecord(value: unknown, itemCheck: (item: unknown) => boolean): boolean {
@@ -53,10 +53,37 @@ function isGraph(value: unknown): boolean {
    return false;
  }
  return Object.values(value).every(
-    (transitions) => Array.isArray(transitions) && transitions.every((t) => isTransition(t)),
+    (statusMap) => isRecord(statusMap) && Object.values(statusMap).every((t) => isTarget(t)),
  );
 }

+/**
+ * Derive the expected workflow name from a file path (stem without extension).
+ * Returns the stem for `.yaml` / `.yml` files.
+ */
+export function workflowNameFromPath(filePath: string): string {
+  const base = basename(filePath);
+  if (base.endsWith(".yaml")) return base.slice(0, -5);
+  if (base.endsWith(".yml")) return base.slice(0, -4);
+  return base;
+}
+
+/**
+ * Check that the `name` field in a parsed payload matches the expected name
+ * derived from the file path.  Returns an error message string on mismatch,
+ * or null when the names are consistent.
+ */
+export function checkWorkflowFilenameConsistency(
+  filePath: string,
+  payload: WorkflowPayload,
+): string | null {
+  const expected = workflowNameFromPath(filePath);
+  if (payload.name !== expected) {
+    return `workflow name mismatch: file "${basename(filePath)}" implies name "${expected}" but YAML declares name "${payload.name}"`;
+  }
+  return null;
+}
+
 /** Validate YAML-parsed workflow document shape (outputSchema may be inline JSON Schema). */
 export function parseWorkflowPayload(raw: unknown): WorkflowPayload | null {
  if (!isRecord(raw)) {
@@ -65,11 +92,7 @@ export function parseWorkflowPayload(raw: unknown): WorkflowPayload | null {
  if (typeof raw.name !== "string" || typeof raw.description !== "string") {
    return null;
  }
-  if (
-    !isStringRecord(raw.roles, isRoleDefinition) ||
-    !isStringRecord(raw.conditions, isConditionDefinition) ||
-    !isGraph(raw.graph)
-  ) {
+  if (!isStringRecord(raw.roles, isRoleDefinition) || !isGraph(raw.graph)) {
    return null;
  }
  return raw as WorkflowPayload;
@@ -5,9 +5,5 @@
    "outDir": "dist"
  },
  "include": ["src"],
-  "references": [
-    { "path": "../workflow-protocol" },
-    { "path": "../workflow-moderator" },
-    { "path": "../workflow-agent-kit" }
-  ]
+  "references": [{ "path": "../workflow-protocol" }, { "path": "../workflow-util-agent" }]
 }
@@ -0,0 +1,141 @@
+# @uncaged/workflow-agent-builtin
+
+`uwf-builtin` agent — built-in LLM agent with file read/write and shell tools.
+
+## Overview
+
+Layer 3 agent implementation. Runs an OpenAI-compatible chat completion loop with built-in tools (`read_file`, `write_file`, `run_command`). Uses the configured provider/model from `config.yaml`. Produces frontmatter markdown output and stores turn-by-turn session detail in CAS.
+
+Useful when you want a self-contained agent without an external CLI like Hermes or Claude Code.
+
+**Dependencies:** `@uncaged/json-cas`, `@uncaged/workflow-util-agent`, `@uncaged/workflow-util`
+
+## Installation
+
+Included as the `uwf-builtin` binary when you install `@uncaged/workflow-agent-builtin`:
+
+```bash
+bun add -g @uncaged/workflow-agent-builtin
+```
+
+## CLI Usage
+
+Invoked by `uwf thread step`:
+
+```bash
+uwf-builtin <thread-id> <role>
+```
+
+Configure as default agent:
+
+```bash
+uwf setup --agent builtin
+```
+
+Override per step:
+
+```bash
+uwf thread step <thread-id> --agent uwf-builtin
+```
+
+Environment variables set by the engine:
+
+| Variable | Purpose |
+|----------|---------|
+| `UWF_EDGE_PROMPT` | Moderator edge instruction for this step |
+
+## API
+
+All exports come from `src/index.ts`.
+
+### Agent factory
+
+```typescript
+function createBuiltinAgent(): () => Promise<void>
+function buildBuiltinMessages(ctx: AgentContext): ChatMessage[]
+```
+
+### LLM loop
+
+```typescript
+const BUILTIN_MAX_TURNS = 30;
+const BUILTIN_CONTINUE_MAX_TURNS = 5;
+
+function runBuiltinLoop(/* options: RunBuiltinLoopOptions */): Promise<RunBuiltinLoopResult>
+function chatCompletionWithTools(
+  provider: ResolvedLlmProvider,
+  messages: ChatMessage[],
+  tools: OpenAiToolDefinition[],
+): Promise<LlmAssistantResponse>
+```
+
+`RunBuiltinLoopOptions` and `RunBuiltinLoopResult` are internal to `loop.ts` and not re-exported from `index.ts`.
+
+### Tools
+
+```typescript
+function getBuiltinTools(): readonly BuiltinTool[]
+function executeBuiltinTool(
+  name: string,
+  args: Record<string, unknown>,
+  ctx: ToolContext,
+): Promise<string>
+```
+
+### Session and detail
+
+```typescript
+function initSessionDir(storageRoot: string): Promise<void>
+function appendSessionTurn(storageRoot: string, sessionId: string, turn: BuiltinTurnPayload): Promise<void>
+function readSessionTurns(storageRoot: string, sessionId: string): Promise<BuiltinTurnPayload[]>
+function removeSession(storageRoot: string, sessionId: string): Promise<void>
+function registerBuiltinSchemas(store: Store): Promise<BuiltinSchemaHashes>
+function storeBuiltinDetail(store: Store, payload: BuiltinDetailPayload): Promise<string>
+```
+
+### Types
+
+```typescript
+type ChatMessage = /* system | user | assistant | tool */;
+type LlmAssistantResponse = { content: string | null; toolCalls: LlmToolCall[] | null };
+type LlmToolCall = { id: string; name: string; arguments: string };
+type BuiltinTool = { name: string; description: string; parameters: Record<string, unknown> };
+type ToolContext = { cwd: string; storageRoot: string };
+type BuiltinDetailPayload = { /* session turns, model, timestamps */ };
+type BuiltinLoopTurn = { /* single loop iteration record */ };
+type BuiltinToolCallRecord = { /* tool call audit */ };
+type BuiltinToolResultRecord = { /* tool result audit */ };
+type BuiltinTurnPayload = { /* persisted turn */ };
+```
+
+## Internal Structure
+
+```
+src/
+├── index.ts
+├── cli.ts              Binary entrypoint
+├── agent.ts            createBuiltinAgent
+├── loop.ts             Multi-turn LLM + tool loop
+├── prompt.ts           buildBuiltinMessages
+├── session.ts          Session directory persistence
+├── detail.ts           CAS detail node storage
+├── schemas.ts          Builtin CAS schemas
+├── types.ts            Detail and turn payload types
+├── llm/
+│   ├── index.ts
+│   ├── llm.ts          chatCompletionWithTools
+│   └── types.ts        ChatMessage, LlmToolCall, etc.
+└── tools/
+    ├── index.ts        getBuiltinTools, executeBuiltinTool
+    ├── read-file.ts
+    ├── write-file.ts
+    ├── run-command.ts
+    ├── path.ts
+    └── types.ts
+```
+
+## Configuration
+
+Requires a configured OpenAI-compatible provider and model in `~/.uncaged/workflow/config.yaml` (via `uwf setup`). API keys are loaded from `~/.uncaged/workflow/.env`.
+
+Tools run with the current working directory as `ToolContext.cwd` (typically the directory where `uwf thread step` was invoked).
@@ -0,0 +1,16 @@
+import { describe, expect, test } from "bun:test";
+
+import type { LlmToolCall } from "../src/llm/types.js";
+
+/** Mirror OpenAI response shape for parser coverage via chatCompletionWithTools integration later. */
+describe("LlmToolCall shape", () => {
+  test("tool call record fields", () => {
+    const call: LlmToolCall = {
+      id: "call_1",
+      name: "read_file",
+      arguments: '{"path":"README.md"}',
+    };
+    expect(call.name).toBe("read_file");
+    expect(JSON.parse(call.arguments)).toEqual({ path: "README.md" });
+  });
+});
@@ -0,0 +1,256 @@
+import { beforeEach, describe, expect, mock, test } from "bun:test";
+
+const mockChatCompletionWithTools = mock(async () => ({
+  content: "---\nstatus: done\n---",
+  toolCalls: [],
+}));
+const mockAppendSessionTurn = mock(async () => {});
+const mockExecuteBuiltinTool = mock(async () => "tool-result");
+
+mock.module("../src/llm/index.js", () => ({
+  chatCompletionWithTools: mockChatCompletionWithTools,
+}));
+mock.module("../src/session.js", () => ({
+  appendSessionTurn: mockAppendSessionTurn,
+}));
+mock.module("../src/tools/index.js", () => ({
+  builtinToolsToOpenAi: () => [],
+  executeBuiltinTool: mockExecuteBuiltinTool,
+  getBuiltinTools: () => [],
+}));
+
+import {
+  executeTurnTools,
+  extractFinalText,
+  runBuiltinLoop,
+  shouldInjectDeadlineWarning,
+  shouldNudge,
+  shouldProcessToolCalls,
+} from "../src/loop.js";
+
+const fakeProvider = {} as any;
+const fakeToolCtx = {} as any;
+
+function makeOptions(overrides: Partial<Parameters<typeof runBuiltinLoop>[0]> = {}) {
+  return {
+    provider: fakeProvider,
+    messages: [{ role: "system" as const, content: "sys" }],
+    toolCtx: fakeToolCtx,
+    maxTurns: 5,
+    storageRoot: "/tmp",
+    sessionId: "sess",
+    noTools: false,
+    ...overrides,
+  };
+}
+
+beforeEach(() => {
+  mockChatCompletionWithTools.mockReset();
+  mockAppendSessionTurn.mockReset();
+  mockExecuteBuiltinTool.mockReset();
+});
+
+describe("shouldNudge", () => {
+  test("2.1 returns true when all conditions met", () => {
+    expect(shouldNudge({ noTools: false, text: "some text", turn: 0, maxTurns: 5 })).toBe(true);
+  });
+  test("2.2 returns false when noTools=true", () => {
+    expect(shouldNudge({ noTools: true, text: "some text", turn: 0, maxTurns: 5 })).toBe(false);
+  });
+  test("2.3 returns false when text starts with ---", () => {
+    expect(shouldNudge({ noTools: false, text: "---\nstatus: done", turn: 0, maxTurns: 5 })).toBe(
+      false,
+    );
+  });
+  test("2.4 returns false on last turn", () => {
+    expect(shouldNudge({ noTools: false, text: "some text", turn: 4, maxTurns: 5 })).toBe(false);
+  });
+  test("2.5 returns true on second-to-last turn", () => {
+    expect(shouldNudge({ noTools: false, text: "some text", turn: 3, maxTurns: 5 })).toBe(true);
+  });
+  test("2.6 leading whitespace before --- suppresses nudge", () => {
+    expect(shouldNudge({ noTools: false, text: "  ---\nstatus: done", turn: 0, maxTurns: 5 })).toBe(
+      false,
+    );
+  });
+});
+
+describe("executeTurnTools", () => {
+  test("4.1 executes each tool call and pushes tool result messages", async () => {
+    mockExecuteBuiltinTool.mockResolvedValue("result");
+    const messages: any[] = [];
+    const calls = [
+      { id: "c1", name: "tool_a", arguments: "{}" },
+      { id: "c2", name: "tool_b", arguments: "{}" },
+    ];
+    const count = await executeTurnTools(calls, fakeToolCtx, messages, "/tmp", "sess");
+    expect(messages.length).toBe(2);
+    expect(messages[0].role).toBe("tool");
+    expect(messages[1].role).toBe("tool");
+    expect(count).toBe(2);
+  });
+  test("4.2 tool result content matches executeBuiltinTool return value", async () => {
+    mockExecuteBuiltinTool.mockResolvedValue("result-A");
+    const messages: any[] = [];
+    await executeTurnTools(
+      [{ id: "c1", name: "read_file", arguments: "{}" }],
+      fakeToolCtx,
+      messages,
+      "/tmp",
+      "sess",
+    );
+    expect(messages[0].content).toBe("result-A");
+  });
+});
+
+describe("runBuiltinLoop integration", () => {
+  test("3.1 single text-only response returns finalText immediately", async () => {
+    mockChatCompletionWithTools.mockResolvedValue({
+      content: "---\nstatus: done\n---",
+      toolCalls: [],
+    });
+    const result = await runBuiltinLoop(makeOptions());
+    expect(result.finalText).toBe("---\nstatus: done\n---");
+    expect(result.turnCount).toBe(1);
+  });
+  test("3.2 noTools=true suppresses tool calls", async () => {
+    mockChatCompletionWithTools.mockResolvedValue({
+      content: "ok",
+      toolCalls: [{ id: "c1", name: "read_file", arguments: "{}" }],
+    });
+    const result = await runBuiltinLoop(makeOptions({ noTools: true }));
+    expect(result.finalText).toBe("ok");
+    expect(result.turnCount).toBe(1);
+  });
+  test("3.3 tool call followed by text response", async () => {
+    mockChatCompletionWithTools
+      .mockResolvedValueOnce({
+        content: null,
+        toolCalls: [{ id: "c1", name: "read_file", arguments: "{}" }],
+      })
+      .mockResolvedValueOnce({ content: "---\nstatus: done\n---", toolCalls: [] });
+    mockExecuteBuiltinTool.mockResolvedValue("file contents");
+    const result = await runBuiltinLoop(makeOptions());
+    expect(result.finalText).toBe("---\nstatus: done\n---");
+    expect(result.turnCount).toBe(3);
+  });
+  test("3.4 nudge cycle inserts nudge message", async () => {
+    mockChatCompletionWithTools
+      .mockResolvedValueOnce({ content: "I am thinking", toolCalls: [] })
+      .mockResolvedValueOnce({ content: "---\nstatus: done\n---", toolCalls: [] });
+    const result = await runBuiltinLoop(makeOptions());
+    expect(result.finalText).toBe("---\nstatus: done\n---");
+    const nudgeMsg = result.messages.find(
+      (m) =>
+        m.role === "user" && typeof m.content === "string" && m.content.includes("frontmatter"),
+    );
+    expect(nudgeMsg).toBeDefined();
+  });
+  test("3.5 maxTurns exhaustion falls back to last assistant content", async () => {
+    mockChatCompletionWithTools.mockResolvedValue({ content: "still thinking", toolCalls: [] });
+    const result = await runBuiltinLoop(makeOptions({ maxTurns: 3 }));
+    expect(result.finalText).toBe("still thinking");
+  });
+  test("3.6 original messages array is not mutated", async () => {
+    mockChatCompletionWithTools.mockResolvedValue({
+      content: "---\nstatus: done\n---",
+      toolCalls: [],
+    });
+    const original = [{ role: "system" as const, content: "sys" }];
+    await runBuiltinLoop(makeOptions({ messages: original }));
+    expect(original.length).toBe(1);
+  });
+});
+
+describe("shouldInjectDeadlineWarning", () => {
+  test("5.1 returns true when turn count reaches warning threshold and not yet warned", () => {
+    expect(shouldInjectDeadlineWarning(7, 10, false, false)).toBe(true);
+  });
+  test("5.2 returns false when already warned", () => {
+    expect(shouldInjectDeadlineWarning(7, 10, true, false)).toBe(false);
+  });
+  test("5.3 returns false when noTools is true", () => {
+    expect(shouldInjectDeadlineWarning(7, 10, false, true)).toBe(false);
+  });
+  test("5.4 returns false when turns remaining > DEADLINE_WARNING_TURNS", () => {
+    expect(shouldInjectDeadlineWarning(5, 10, false, false)).toBe(false);
+  });
+  test("5.5 returns true when exactly at warning threshold", () => {
+    expect(shouldInjectDeadlineWarning(7, 10, false, false)).toBe(true);
+  });
+  test("5.6 returns false when turns remaining is 0", () => {
+    expect(shouldInjectDeadlineWarning(10, 10, false, false)).toBe(false);
+  });
+});
+
+describe("shouldProcessToolCalls", () => {
+  test("6.1 returns true when toolCalls present and noTools=false", () => {
+    expect(shouldProcessToolCalls([{ id: "x", name: "read", arguments: "{}" }], false)).toBe(true);
+  });
+  test("6.2 returns false when toolCalls is null", () => {
+    expect(shouldProcessToolCalls(null, false)).toBe(false);
+  });
+  test("6.3 returns false when toolCalls is empty array", () => {
+    expect(shouldProcessToolCalls([], false)).toBe(false);
+  });
+  test("6.4 returns false when noTools=true", () => {
+    expect(shouldProcessToolCalls([{ id: "x", name: "read", arguments: "{}" }], true)).toBe(false);
+  });
+  test("6.5 returns true when multiple tool calls present", () => {
+    expect(
+      shouldProcessToolCalls(
+        [
+          { id: "x1", name: "read", arguments: "{}" },
+          { id: "x2", name: "write", arguments: "{}" },
+        ],
+        false,
+      ),
+    ).toBe(true);
+  });
+});
+
+describe("extractFinalText", () => {
+  test("7.1 returns last assistant message content", () => {
+    const messages = [
+      { role: "system" as const, content: "sys", tool_calls: null },
+      { role: "assistant" as const, content: "first", tool_calls: null },
+      { role: "assistant" as const, content: "last", tool_calls: null },
+    ];
+    expect(extractFinalText(messages)).toBe("last");
+  });
+  test("7.2 returns empty string when no assistant messages", () => {
+    expect(extractFinalText([{ role: "system" as const, content: "sys", tool_calls: null }])).toBe(
+      "",
+    );
+  });
+  test("7.3 skips assistant messages with null content", () => {
+    const messages = [
+      { role: "assistant" as const, content: "first", tool_calls: null },
+      {
+        role: "assistant" as const,
+        content: null,
+        tool_calls: [{ id: "x", name: "t", arguments: "{}" }],
+      },
+      { role: "assistant" as const, content: "second", tool_calls: null },
+    ];
+    expect(extractFinalText(messages)).toBe("second");
+  });
+  test("7.4 skips assistant messages with empty content", () => {
+    const messages = [
+      { role: "assistant" as const, content: "first", tool_calls: null },
+      { role: "assistant" as const, content: "", tool_calls: null },
+      { role: "user" as const, content: "nudge", tool_calls: null },
+    ];
+    expect(extractFinalText(messages)).toBe("first");
+  });
+  test("7.5 handles empty messages array", () => {
+    expect(extractFinalText([])).toBe("");
+  });
+  test("7.6 handles messages with only user and system roles", () => {
+    const messages = [
+      { role: "system" as const, content: "sys", tool_calls: null },
+      { role: "user" as const, content: "query", tool_calls: null },
+    ];
+    expect(extractFinalText(messages)).toBe("");
+  });
+});
@@ -0,0 +1,21 @@
+import { describe, expect, test } from "bun:test";
+import { resolve } from "node:path";
+import { resolvePath } from "../src/tools/path.js";
+
+describe("resolvePath", () => {
+  test("resolves relative paths against cwd", () => {
+    const root = "/workspace/project";
+    const resolved = resolvePath(root, "src/foo.ts");
+    expect(resolved).toBe(resolve(root, "src/foo.ts"));
+  });
+
+  test("resolves absolute paths as-is", () => {
+    const resolved = resolvePath("/workspace", "/etc/hosts");
+    expect(resolved).toBe("/etc/hosts");
+  });
+
+  test("resolves parent traversal normally", () => {
+    const resolved = resolvePath("/workspace/project", "../other/file.ts");
+    expect(resolved).toBe(resolve("/workspace/project", "../other/file.ts"));
+  });
+});
--- a/Show More
+++ b/Show More