debug: dump raw NDJSON for issue #439 investigation

Saves Claude Code stdout to /tmp/uwf-ndjson-dump/ before parsing. Temporary — remove after root cause confirmed. Refs #439
Merge pull request 'feat(cli-workflow): implement multi-strategy workflow resolution' (#438 ) from fix/428-multi-strategy-workflow-resolution into main
2026-05-23 12:24:05 +00:00 · 2026-05-23 11:12:56 +00:00 · 2026-05-23 11:11:37 +00:00 · 2026-05-23 11:09:11 +00:00 · 2026-05-23 19:07:36 +08:00 · 2026-05-23 10:57:44 +00:00
153 changed files with 11354 additions and 277 deletions
@@ -11,3 +11,5 @@ solve-issue-entry.ts
 packages/workflow-template-develop/develop.esm.js
 .DS_Store
 *.py
+.claude
+tmp
@@ -0,0 +1,83 @@
+# Test Spec: uwf setup model connectivity validation (#335)
+
+## Context
+
+File: `packages/cli-workflow/src/commands/setup.ts`
+Test file: `packages/cli-workflow/src/__tests__/setup-validate.test.ts`
+
+After `cmdSetup` writes config, it should send a test chat completion request to verify the configured model is reachable. If validation fails, warn the user (don't abort — config is already saved).
+
+## Implementation Notes
+
+- Add a `validateModel(baseUrl, apiKey, model)` function that sends a minimal chat completion request (`POST /chat/completions` with `messages: [{role:"user",content:"hi"}]`, `max_tokens: 1`)
+- Returns `Result<void, string>` — ok if 2xx response, error with reason string otherwise
+- Use `AbortSignal.timeout(15_000)` for the request
+- Both `cmdSetup` and `cmdSetupInteractive` should call it after saving config
+- `cmdSetup` returns validation result in its return object: `{ ...existing, validation: { ok: true } | { ok: false, error: string } }`
+- `cmdSetupInteractive` prints a warning to console if validation fails, success message if it passes
+- Use the project logger (`createLogger`) — no raw `console.log` except in interactive CLI output (per CLAUDE.md)
+
+## Test Cases (vitest)
+
+### 1. `validateModel` — success path
+- Mock `fetch` to return `{ status: 200, ok: true, json: () => ({}) }`
+- Call `validateModel(baseUrl, apiKey, model)`
+- Assert returns `{ ok: true, value: undefined }`
+- Assert fetch was called with correct URL (`${baseUrl}/chat/completions`), correct headers (`Authorization: Bearer ${apiKey}`), correct body (model, messages, max_tokens: 1)
+
+### 2. `validateModel` — HTTP error (401 unauthorized)
+- Mock `fetch` to return `{ status: 401, ok: false, statusText: "Unauthorized" }`
+- Call `validateModel(baseUrl, apiKey, model)`
+- Assert returns `{ ok: false, error: <string containing "401"> }`
+
+### 3. `validateModel` — HTTP error (404 model not found)
+- Mock `fetch` to return `{ status: 404, ok: false, statusText: "Not Found" }`
+- Assert returns `{ ok: false, error: <string containing "404"> }`
+
+### 4. `validateModel` — network timeout
+- Mock `fetch` to throw `DOMException` with name `AbortError`
+- Assert returns `{ ok: false, error: <string containing "timeout" or "unreachable"> }`
+
+### 5. `validateModel` — network error (DNS failure, connection refused)
+- Mock `fetch` to throw `TypeError("fetch failed")`
+- Assert returns `{ ok: false, error: <string mentioning connectivity> }`
+
+### 6. `cmdSetup` — includes validation result on success
+- Mock global `fetch` for `/chat/completions` to succeed
+- Call `cmdSetup({ provider, baseUrl, apiKey, model, storageRoot })`
+- Assert returned object has `validation: { ok: true, value: undefined }`
+- Assert config files are still written (existing behavior preserved)
+
+### 7. `cmdSetup` — includes validation result on failure (config still saved)
+- Mock global `fetch` for `/chat/completions` to return 401
+- Call `cmdSetup({ ... })`
+- Assert returned object has `validation: { ok: false, error: ... }`
+- Assert `config.yaml` and `.env` are still written (validation failure doesn't prevent saving)
+
+### 8. `cmdSetupInteractive` — prints success message on validation pass
+- Mock `fetch` for both `/models` and `/chat/completions` to succeed
+- Mock stdin to provide valid selections
+- Capture console output
+- Assert output contains a success message like "Model verified" or "✓"
+
+### 9. `cmdSetupInteractive` — prints warning on validation failure
+- Mock `fetch`: `/models` succeeds, `/chat/completions` returns 401
+- Mock stdin for valid selections
+- Capture console output
+- Assert output contains a warning about model not being reachable and suggests trying a different model
+
+### 10. `validateModel` — request body correctness
+- Mock `fetch` to capture the request body
+- Call `validateModel(baseUrl, apiKey, "test-model")`
+- Assert body is `{ model: "test-model", messages: [{role: "user", content: "hi"}], max_tokens: 1 }`
+
+## Export Requirements
+
+- `validateModel` must be exported (for direct unit testing)
+- Signature: `async function validateModel(baseUrl: string, apiKey: string, model: string): Promise<Result<void, string>>`
+- `Result` type: `{ ok: true; value: T } | { ok: false; error: E }` (project convention)
+
+## Files to Create/Modify
+
+- **New**: `packages/cli-workflow/src/__tests__/setup-validate.test.ts` — all test cases above
+- **Modify**: `packages/cli-workflow/src/commands/setup.ts` — add `validateModel`, integrate into `cmdSetup` and `cmdSetupInteractive`
@@ -0,0 +1,197 @@
+name: "solve-issue"
+description: "TDD-driven issue resolution for small, focused changes. Loop protection relies on engine maxRounds."
+roles:
+  planner:
+    description: "Analyzes issue and outputs a TDD test spec"
+    goal: "You are a planning agent. You analyze Gitea issues and produce a TDD test specification that downstream roles will implement and verify."
+    capabilities:
+      - issue-analysis
+      - planning
+    procedure: |
+      On first run (no previous steps):
+      1. Read the issue and all comments from Gitea using `tea issues <number> -r <owner/repo>`
+      2. Read CLAUDE.md (or equivalent project conventions file) to understand coding standards
+      3. Assess whether the issue has enough information to produce a test spec
+      4. If insufficient info: comment on the issue via `echo "..." | tea comment <number> -r <owner/repo>` (skip if you already commented), then output status=insufficient_info and terminate
+      5. If sufficient: produce a detailed TDD test spec in markdown covering all scenarios
+
+      On subsequent runs (bounced back by tester with fix_spec):
+      1. Read the tester's output from the previous step to understand what's wrong with the spec
+      2. Revise the test spec accordingly
+
+      After producing the test spec:
+      1. Store it via `uwf cas put-text "<markdown content>"` and capture the returned hash
+      2. Put the hash in frontmatter.plan (required when status=ready)
+    output: "Output a brief summary of the test spec. Frontmatter must include: status (ready or insufficient_info) and plan (CAS hash of the test spec, required when status=ready)."
+    frontmatter:
+      type: object
+      properties:
+        status:
+          type: string
+          enum: [ready, insufficient_info]
+        plan:
+          type: string
+      required: [status]
+  developer:
+    description: "TDD implementation per test spec"
+    goal: "You are a developer agent. You implement code changes following TDD — write tests first, then implementation."
+    capabilities:
+      - coding
+    procedure: |
+      Before starting any work, ensure a clean worktree:
+      1. `git checkout main && git pull` to get the latest code
+      2. `git checkout -b fix/<issue-number>-<short-description>` to create a fresh branch
+         - If bounced back from reviewer or tester, reuse the existing branch and rebase onto latest main:
+           `git checkout main && git pull && git checkout <branch> && git rebase main`
+
+      Then implement TDD:
+      3. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the latest planner step's frontmatter.plan)
+      4. If bounced back from reviewer or tester: read the previous role's output to understand what needs fixing
+      5. Write tests first based on the spec
+      6. Implement the code to make tests pass
+      7. Ensure `bun run build` passes with no errors
+      8. Run `bun test` to verify all tests pass
+    output: "List all files changed and provide a summary. Frontmatter must include: status (done or failed)."
+    frontmatter:
+      type: object
+      properties:
+        status:
+          type: string
+          enum: [done, failed]
+      required: [status]
+  reviewer:
+    description: "Code standards compliance check"
+    goal: "You are a code reviewer. You verify code standards compliance — NOT functionality (that's the tester's job)."
+    capabilities:
+      - code-review
+      - static-analysis
+    procedure: |
+      Before reviewing, verify the git branch:
+      1. Run `git branch --show-current` — confirm the branch name references the issue number being worked on
+      2. If the branch doesn't correspond to the issue, flag it in your output and reject
+
+      Then perform code review:
+      Hard checks (must all pass):
+      3. `bun run build` — no build errors
+      4. `bunx biome check` — no lint violations
+      5. TypeScript strict mode — no type errors
+
+      Soft checks (review against CLAUDE.md conventions):
+      - Functional-first: `function` + `type`, not `class` + `interface`
+      - No optional properties (`?:`) — use `T | null`
+      - Naming conventions (kebab-case files, PascalCase types, camelCase functions)
+      - Module boundary discipline (folder exports via index.ts)
+      - No `console.log` (use structured logger)
+      - No dynamic imports in production code
+
+      Only review standards compliance. Do NOT test functionality.
+      If rejecting, you MUST explain the specific reason in your output.
+    output: "Explain your decision with specific file/line references. Frontmatter must include: approved (true or false)."
+    frontmatter:
+      type: object
+      properties:
+        approved:
+          type: boolean
+      required: [approved]
+  tester:
+    description: "Functional correctness verification"
+    goal: "You are a tester agent. You verify that the implementation correctly satisfies every scenario in the test spec."
+    capabilities:
+      - testing
+    procedure: |
+      1. Run `bun test` for automated test verification
+      2. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the latest planner step's frontmatter.plan)
+      3. Verify each scenario in the spec is covered and passing
+      4. Determine outcome:
+         - passed: all scenarios verified, tests pass
+         - fix_code: tests fail or implementation doesn't match spec → send back to developer
+         - fix_spec: the spec itself is wrong or incomplete → send back to planner
+    output: "Report test results per scenario. Frontmatter must include: status (passed, fix_code, or fix_spec)."
+    frontmatter:
+      type: object
+      properties:
+        status:
+          type: string
+          enum: [passed, fix_code, fix_spec]
+      required: [status]
+  committer:
+    description: "Commits and creates PR"
+    goal: "You are a committer agent. You create a clean commit and push a PR linking the original issue."
+    capabilities: []
+    procedure: |
+      Note: You inherit the developer's worktree and branch. Do NOT create a new branch.
+      1. Stage all changes: `git add -A`
+      2. Commit with a descriptive message referencing the issue: `git commit -m "type: description\n\nFixes #N"`
+      3. Push the branch: `git push -u origin <branch-name>`
+         - If push hook fails: capture the error log in your output, mark hook_failed
+      4. On push success: create a PR via `tea pr create --title "..." --description "..."`
+         - PR description must follow the project template: What / Why / Changes / Ref sections, with `Fixes #N` in Ref
+    output: "Include PR URL on success or error log on failure. Frontmatter must include: success (true or false)."
+    frontmatter:
+      type: object
+      properties:
+        success:
+          type: boolean
+      required: [success]
+conditions:
+  insufficientInfo:
+    description: "Planner determined there's not enough info to proceed"
+    expression: "$last('planner').status = 'insufficient_info'"
+  devFailed:
+    description: "Developer failed to implement"
+    expression: "$last('developer').status = 'failed'"
+  rejected:
+    description: "Reviewer rejected the implementation"
+    expression: "$last('reviewer').approved = false"
+  fixCode:
+    description: "Tester found code issues"
+    expression: "$last('tester').status = 'fix_code'"
+  fixSpec:
+    description: "Tester found spec issues"
+    expression: "$last('tester').status = 'fix_spec'"
+  hookFailed:
+    description: "Push hook failed"
+    expression: "$last('committer').success = false"
+graph:
+  $START:
+    - role: "planner"
+      condition: null
+      prompt: "Analyze the issue and produce an implementation plan."
+  planner:
+    - role: "$END"
+      condition: "insufficientInfo"
+      prompt: "Insufficient information to proceed; end the workflow."
+    - role: "developer"
+      condition: null
+      prompt: "Implement the plan from the planner."
+  developer:
+    - role: "$END"
+      condition: "devFailed"
+      prompt: "Development failed; end the workflow."
+    - role: "reviewer"
+      condition: null
+      prompt: "Send the implementation to the reviewer."
+  reviewer:
+    - role: "developer"
+      condition: "rejected"
+      prompt: "Reviewer rejected the implementation; fix the issues."
+    - role: "tester"
+      condition: null
+      prompt: "Review passed; run tests on the implementation."
+  tester:
+    - role: "developer"
+      condition: "fixCode"
+      prompt: "Tests found code issues; return to developer."
+    - role: "planner"
+      condition: "fixSpec"
+      prompt: "Tests found spec issues; return to planner."
+    - role: "committer"
+      condition: null
+      prompt: "Tests passed; commit and push the changes."
+  committer:
+    - role: "developer"
+      condition: "hookFailed"
+      prompt: "Push hook failed; return to developer to fix."
+    - role: "$END"
+      condition: null
+      prompt: "Commit succeeded; complete the workflow."
@@ -5,6 +5,8 @@
      "**",
      "!**/dist",
      "!**/node_modules",
+      "!**/legacy-packages",
+      "!scripts",
      "!packages/workflow/workflow",
      "!xiaoju/scripts/bundle.ts"
    ]
@@ -15,6 +17,15 @@
    "indentWidth": 2,
    "lineWidth": 100
  },
+  "css": {
+    "parser": {
+      "cssModules": true,
+      "tailwindDirectives": true
+    },
+    "linter": {
+      "enabled": false
+    }
+  },
  "javascript": {
    "formatter": {
      "quoteStyle": "double",
@@ -36,7 +47,7 @@
      }
    },
    {
-      "includes": ["**/*.d.ts"],
+      "includes": ["**/*.d.ts", "**/vitest.config.*"],
      "linter": {
        "rules": {
          "style": {
@@ -44,6 +55,16 @@
          }
        }
      }
+    },
+    {
+      "includes": ["**/cli.ts", "**/setup.ts"],
+      "linter": {
+        "rules": {
+          "suspicious": {
+            "noConsole": "off"
+          }
+        }
+      }
    }
  ],
  "linter": {
@@ -0,0 +1,779 @@
+# Built-in Role Agent 调研
+
+## 目标
+
+实现一个内置的 role agent（暂称 `uwf-builtin`），不依赖 hermes/openclaw 等外部 agent 进程。
+直接使用 workflow config 中配置的 model，自己实现 agent run loop 和关键 toolkit。
+
+---
+
+## 关键问题
+
+### Q1: Agent 接口协议
+
+现有 agent 是怎么被 CLI 调用的？输入（argv、环境变量）和输出（stdout、CAS）格式是什么？
+
+**调研要点：**
+- `cli-workflow` 里 `spawnAgent` 的完整实现
+- AgentConfig 类型定义
+- agent 进程的 exit code 约定
+- 环境变量传递（UWF_STORAGE_ROOT 等）
+
+**答案：**
+
+#### 调用链
+
+`uwf thread step` → `cmdThreadStepOnce` → moderator 求值下一 role → `resolveAgentConfig` → `spawnAgent`。
+
+#### AgentConfig 类型
+
+```146:149:packages/workflow-protocol/src/types.ts
+export type AgentConfig = {
+  command: string;
+  args: string[];
+};
+```
+
+在 `config.yaml` 的 `agents` 段注册，例如 `hermes: { command: "uwf-hermes", args: [] }`。
+
+#### spawnAgent 行为
+
+```627:653:packages/cli-workflow/src/commands/thread.ts
+function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): CasRef {
+  const argv = [...agent.args, threadId, role];
+  let stdout: string;
+  try {
+    stdout = execFileSync(agent.command, argv, {
+      encoding: "utf8",
+      env: process.env,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+  } catch (e) {
+  // ... stderr 拼进 fail 消息
+  }
+
+  const line = stdout.trim().split("\n").pop()?.trim() ?? "";
+  if (!isCasRef(line)) {
+    fail(`agent stdout is not a valid CAS hash: ${line || "(empty)"}`);
+  }
+  return line;
+}
+```
+
+| 项目 | 约定 |
+|------|------|
+| **argv** | `[...agent.args, <thread-id>, <role>]`，即 `process.argv[2]`=threadId，`process.argv[3]`=role（与 `createAgent` 的 `parseArgv` 一致） |
+| **stdin** | 忽略 |
+| **stdout** | 纯文本，**最后一行**必须是新 `StepNode` 的 CAS hash（13 字符 Crockford Base32） |
+| **stderr** | 失败时 CLI 会附带 stderr；成功时无约定 |
+| **exit code** | `0` = 成功；非 0 时 `execFileSync` 抛错，step 失败 |
+| **环境变量** | 继承父进程 `process.env`（含 storage root、API key 等） |
+| **链头更新** | **不由 agent 负责**；agent 只写 CAS StepNode，CLI 在拿到 stdout hash 后更新 `threads.yaml` |
+
+Agent 解析优先级（`resolveAgentConfig`）：
+
+1. CLI `--agent` override（整段 command + args 字符串）
+2. `config.agentOverrides[workflow.name][role]`
+3. `config.defaultAgent`
+
+#### 环境变量：Storage Root
+
+文档中写的 `UWF_STORAGE_ROOT` **在当前代码中不存在**。实际优先级（`workflow-agent-kit` / `cli-workflow` 一致）：
+
+```33:43:packages/workflow-agent-kit/src/storage.ts
+export function resolveStorageRoot(): string {
+  const internal = process.env.UNCAGED_WORKFLOW_STORAGE_ROOT;
+  if (internal !== undefined && internal !== "") {
+    return internal;
+  }
+  const userOverride = process.env.WORKFLOW_STORAGE_ROOT;
+  if (userOverride !== undefined && userOverride !== "") {
+    return userOverride;
+  }
+  return getDefaultStorageRoot();
+}
+```
+
+Agent 子进程通过继承的 `process.env` 与父 CLI 共享同一 storage root；`createAgent` 内还会 `loadDotenv({ path: getEnvPath(storageRoot) })` 加载 `~/.uncaged/workflow/.env`。
+
+#### Agent 侧职责（设计文档 + 实现）
+
+- 读 `threads.yaml` 链头，构建 context，执行 role
+- 将 `StepNode` 写入 CAS（`output` / `detail` / `agent` / `prev` / `start`）
+- stdout 打印 step hash
+- **不**更新 `threads.yaml`
+
+---
+
+### Q2: createAgent 工厂
+
+workflow-agent-kit 的 `createAgent` 做了什么？它的完整生命周期是什么？
+
+**调研要点：**
+- `AgentOptions` 类型的 `run` 和 `continue` 回调签名
+- `AgentRunResult` 的完整定义
+- retry 逻辑（frontmatter 校验失败后的重试机制）
+- `persistStep` 写入 CAS 的 StepNode 结构
+
+**答案：**
+
+#### 类型定义
+
+```4:35:packages/workflow-agent-kit/src/types.ts
+export type AgentContext = ModeratorContext & {
+  threadId: ThreadId;
+  role: string;
+  store: Store;
+  workflow: WorkflowPayload;
+  outputFormatInstruction: string;
+};
+
+export type AgentRunResult = {
+  output: string;
+  detailHash: CasRef;
+  sessionId: string;
+};
+
+export type AgentContinueFn = (
+  sessionId: string,
+  message: string,
+  store: AgentContext["store"],
+) => Promise<AgentRunResult>;
+
+export type AgentRunFn = (ctx: AgentContext) => Promise<AgentRunResult>;
+
+export type AgentOptions = {
+  name: string;
+  run: AgentRunFn;
+  continue: AgentContinueFn;
+};
+```
+
+- **`run(ctx)`**：首次执行，返回原始 agent 文本 `output`、审计用 `detailHash`、用于续聊的 `sessionId`。
+- **`continue(sessionId, message, store)`**：在同一 session 上追加用户消息（用于 frontmatter 纠错），再次返回 `AgentRunResult`。
+
+`createAgent(options)` 返回 `() => Promise<void>`，作为 agent CLI 的 `main`（见 `uwf-hermes` 的 `cli.ts`）。
+
+#### 生命周期（按执行顺序）
+
+```101:152:packages/workflow-agent-kit/src/run.ts
+export function createAgent(options: AgentOptions): () => Promise<void> {
+  return async function main(): Promise<void> {
+    const { threadId, role } = parseArgv(process.argv);
+    const storageRoot = resolveStorageRoot();
+    loadDotenv({ path: getEnvPath(storageRoot) });
+
+    const ctx = await buildContextWithMeta(threadId, role);
+    // 1. 校验 role 存在
+    // 2. 从 CAS 取 frontmatter JSON Schema → buildOutputFormatInstruction → ctx.outputFormatInstruction
+
+    let agentResult = await options.run(ctx);
+
+    let outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
+
+    for (let retry = 0; retry < MAX_FRONTMATTER_RETRIES && outputHash === null; retry++) {
+      const correctionMessage = "Your previous response did not contain valid YAML frontmatter...";
+      agentResult = await options.continue(agentResult.sessionId, correctionMessage, ctx.meta.store);
+      outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
+    }
+
+    if (outputHash === null) { fail(...); }
+
+    const stepHash = await persistStep({ ctx, outputHash, detailHash: agentResult.detailHash, agentName });
+    process.stdout.write(`${stepHash}\n`);
+  };
+}
+```
+
+| 阶段 | 行为 |
+|------|------|
+| 解析 argv | `argv[2]=threadId`, `argv[3]=role`，缺失则 `stderr` + `exit(1)` |
+| Context | `buildContextWithMeta` + 可选 `outputFormatInstruction` |
+| Run | `options.run(ctx)` |
+| Extract | **仅** `tryFrontmatterFastPath`（见 Q4）；**不**调用 `extract()` LLM fallback |
+| Retry | 最多 `MAX_FRONTMATTER_RETRIES = 2` 次 `continue` + 再试 fast-path |
+| Persist | `persistStep` → `writeStepNode` |
+| 输出 | stdout 一行 step CAS hash |
+
+#### StepNode 写入结构
+
+```44:68:packages/workflow-agent-kit/src/run.ts
+async function writeStepNode(options: {
+  store: AgentStore["store"];
+  schemas: AgentStore["schemas"];
+  startHash: CasRef;
+  prevHash: CasRef | null;
+  role: string;
+  outputHash: CasRef;
+  detailHash: CasRef;
+  agentName: string;
+}): Promise<CasRef> {
+  const payload: StepNodePayload = {
+    start: options.startHash,
+    prev: options.prevHash,
+    role: options.role,
+    output: options.outputHash,
+    detail: options.detailHash,
+    agent: options.agentName,
+  };
+  // store.put(stepNode schema) + validate
+}
+```
+
+`agentName` 经 `agentLabel(name)` 规范化：已有 `uwf-` 前缀则原样，否则加 `uwf-`（如 `hermes` → `uwf-hermes`）。
+
+`prevHash`：若链头仍是 `StartNode` 则为 `null`，否则为当前 head step hash。
+
+---
+
+### Q3: Context Builder
+
+`buildContextWithMeta` 构建了什么上下文给 agent？
+
+**调研要点：**
+- `AgentContext` 完整类型定义（所有字段）
+- context 构建过程（CAS chain walk）
+- `outputFormatInstruction` 怎么生成的
+- role definition 怎么获取（从 workflow YAML）
+
+**答案：**
+
+#### AgentContext 字段
+
+继承 `ModeratorContext`：
+
+```60:68:packages/workflow-protocol/src/types.ts
+export type ModeratorContext = {
+  start: StartNodePayload;
+  steps: StepContext[];
+};
+```
+
+```48:51:packages/workflow-protocol/src/types.ts
+export type StartNodePayload = {
+  workflow: CasRef;
+  prompt: string;
+};
+```
+
+```61:63:packages/workflow-protocol/src/types.ts
+export type StepContext = Omit<StepRecord, "output"> & {
+  output: unknown;
+};
+```
+
+`AgentContext` 额外字段：
+
+| 字段 | 类型 | 含义 |
+|------|------|------|
+| `threadId` | `ThreadId` | 当前线程 |
+| `role` | `string` | 本步要执行的角色名 |
+| `store` | `Store` | CAS store（读写节点） |
+| `workflow` | `WorkflowPayload` | 已从 CAS 加载的 workflow 定义 |
+| `outputFormatInstruction` | `string` | 由 `createAgent` 根据 role 的 frontmatter schema 生成；`buildContext*` 初始为 `""` |
+
+`buildContextWithMeta` 还返回 `meta`：
+
+```148:154:packages/workflow-agent-kit/src/context.ts
+export type BuildContextMeta = {
+  storageRoot: string;
+  store: Store;
+  schemas: AgentStore["schemas"];
+  headHash: CasRef;
+  chain: ChainState;
+};
+```
+
+#### CAS chain walk
+
+1. 从 `threads.yaml[threadId]` 取 `headHash`
+2. `walkChain`：若 head 是 `StartNode`，`stepsNewestFirst=[]`；否则沿 `prev` 收集所有 `StepNode`， newest-first
+3. `buildHistory`：反转为时间序，`expandOutput` 把每步 `output` CasRef 展开为 JSON payload（供 prompt / JSONata 使用）
+4. `loadWorkflow`：从 `start.workflow` CasRef 加载 `WorkflowPayload`
+
+#### Role definition 来源
+
+- 作者写在 workflow YAML 的 `roles.<name>`（`goal`, `capabilities`, `procedure`, `output`, `frontmatter` 等）
+- `uwf workflow put` 时 `frontmatter` 内联 JSON Schema 经 `putSchema` 存入 CAS，workflow 里存的是 **CasRef**
+- Agent 运行时：`ctx.workflow.roles[ctx.role]` → `RoleDefinition`
+
+#### outputFormatInstruction
+
+在 `createAgent` 中，若 `getSchema(store, roleDef.frontmatter)` 非空，则：
+
+```typescript
+ctx.outputFormatInstruction = buildOutputFormatInstruction(frontmatterSchema);
+```
+
+`buildOutputFormatInstruction` 根据 JSON Schema 的 `properties` 生成「必须以 `---` YAML frontmatter 开头」的说明和示例字段列表（见 `build-output-format-instruction.ts`）。
+
+各 agent 实现（Hermes / Claude Code）在组装 prompt 时把该块放在最前，再接 `buildRolePrompt(roleDef)`。
+
+---
+
+### Q4: Extract Pipeline
+
+agent 输出怎么被处理成结构化数据？
+
+**调研要点：**
+- frontmatter fast-path 的完整逻辑
+- LLM extract fallback 的实现（`extract.ts`）
+- frontmatter schema 从哪里来（role 定义里的 `frontmatter` 字段）
+- 校验失败时的 correction prompt 是什么
+
+**答案：**
+
+#### Schema 来源
+
+Workflow YAML 中每个 role 的 `frontmatter:` 段是 JSON Schema 对象；注册时：
+
+```66:76:packages/cli-workflow/src/commands/workflow.ts
+async function resolveFrontmatterRef(..., frontmatter: unknown): Promise<CasRef> {
+  // 校验为 JSON Schema → putSchema → 返回 CasRef
+}
+```
+
+运行时 `roleDef.frontmatter` 即该 schema 的 CAS hash；structured `output` 节点用**同一 schema** 写入 CAS。
+
+#### Frontmatter fast-path（createAgent 实际使用的路径）
+
+```148:195:packages/workflow-agent-kit/src/frontmatter.ts
+export async function tryFrontmatterFastPath(
+  raw: string,
+  outputSchema: CasRef,
+  store: Store,
+): Promise<FrontmatterFastPathResult | null>
+```
+
+流程：
+
+1. `parseFrontmatterMarkdown(raw)` → 标准 agent 字段（`status`, `next`, `confidence`, `artifacts`, `scope`）+ body
+2. `validateFrontmatter` 失败 → `null`
+3. `getSchema(store, outputSchema)` + `extractSchemaFields` 得到 role 需要的属性名
+4. `buildCandidate`：从标准 frontmatter + YAML 原始字段拼出符合 schema 的对象
+5. `store.put(outputSchema, candidate)` + `validate` → 成功则 `{ body, outputHash }`
+
+**永不抛错**，失败返回 `null`。
+
+#### LLM extract fallback（已实现但未接入 createAgent）
+
+```135:181:packages/workflow-agent-kit/src/extract.ts
+export async function extract(
+  rawOutput: string,
+  outputSchema: CasRef,
+  config: WorkflowConfig,
+): Promise<ExtractResult>
+```
+
+- 模型：`resolveExtractModelAlias(config)` → `modelOverrides.extract` → `models.extract` → `models.default` → `defaultModel`
+- HTTP：`POST {baseUrl}/chat/completions`，`response_format: { type: "json_object" }`
+- System：要求按 JSON Schema 从 agent 输出提取单个 JSON 对象
+- 校验通过后 `store.put(outputSchema, structured)`
+
+**重要：`createAgent` 当前未调用 `extract()`**。fast-path 失败且 2 次 `continue` 仍失败则直接 `fail()`。builtin agent 若希望无 frontmatter 也能跑，需在 kit 或 builtin 层显式接入 `extract()`。
+
+#### Correction prompt（retry）
+
+```125:128:packages/workflow-agent-kit/src/run.ts
+const correctionMessage =
+  "Your previous response did not contain valid YAML frontmatter matching the role schema.\n" +
+  "You MUST begin your response with a YAML frontmatter block (--- delimited).\n" +
+  "Please output ONLY the corrected frontmatter block followed by your work.";
+```
+
+通过 `options.continue(sessionId, correctionMessage, store)` 发给外部 agent；builtin 需在自有 message 历史里 append 同等语义的 user 消息。
+
+---
+
+### Q5: Model 配置与 LLM 调用
+
+workflow 怎么配置和使用 model？
+
+**调研要点：**
+- `WorkflowConfig` 中 providers/models/defaultModel/modelOverrides 的完整定义
+- `resolveModel` 函数的实现
+- `chatCompletionText` 的实现（OpenAI 兼容 HTTP 客户端）
+- 有没有 streaming 支持？tool calling 支持？
+
+**答案：**
+
+#### WorkflowConfig
+
+```136:160:packages/workflow-protocol/src/types.ts
+export type ProviderConfig = {
+  baseUrl: string;
+  apiKeyEnv: string;
+};
+
+export type ModelConfig = {
+  provider: ProviderAlias;
+  name: string;
+};
+
+export type WorkflowConfig = {
+  providers: Record<ProviderAlias, ProviderConfig>;
+  models: Record<ModelAlias, ModelConfig>;
+  agents: Record<AgentAlias, AgentConfig>;
+  defaultAgent: AgentAlias;
+  agentOverrides: Record<WorkflowName, Record<RoleName, AgentAlias>> | null;
+  defaultModel: ModelAlias;
+  modelOverrides: Record<Scenario, ModelAlias> | null;
+};
+```
+
+示例见 `docs/architecture.md`（`providers` / `models` / `defaultModel` / `modelOverrides.extract`）。
+
+#### resolveModel
+
+```32:50:packages/workflow-agent-kit/src/extract.ts
+export function resolveModel(config: WorkflowConfig, alias: ModelAlias): ResolvedLlmProvider {
+  const modelEntry = config.models[alias];
+  const providerEntry = config.providers[modelEntry.provider];
+  const apiKey = process.env[providerEntry.apiKeyEnv];
+  return { baseUrl: providerEntry.baseUrl, apiKey, model: modelEntry.name };
+}
+```
+
+`ResolvedLlmProvider = { baseUrl, apiKey, model }`。
+
+Extract 专用别名解析：
+
+```18:30:packages/workflow-agent-kit/src/extract.ts
+export function resolveExtractModelAlias(config: WorkflowConfig): ModelAlias {
+  return config.modelOverrides?.extract ?? (config.models.extract ? "extract" : config.models.default ? "default" : config.defaultModel);
+}
+```
+
+**尚无** `modelOverrides` 按 role/workflow 解析 agent 主模型的函数；builtin 首版可用 `config.defaultModel`，扩展时可加 `modelOverrides.agent` 或与 `agentOverrides` 对称的表。
+
+#### chatCompletionText
+
+```87:124:packages/workflow-agent-kit/src/extract.ts
+async function chatCompletionText(
+  provider: ResolvedLlmProvider,
+  messages: Array<{ role: "system" | "user"; content: string }>,
+): Promise<string>
+```
+
+| 能力 | 现状 |
+|------|------|
+| 协议 | OpenAI 兼容 `POST /chat/completions` |
+| Streaming | **无**（一次性 `response.text()`） |
+| Tool calling | **无**（无 `tools` / `tool_calls` 字段） |
+| 多模态 | **无**（仅 text `content`） |
+| Extract 专用 | `response_format: { type: "json_object" }` |
+
+builtin agent 的 run loop 需要**新写**带 `tools` 的 completion 客户端（可放在 `workflow-agent-builtin` 或扩展 `workflow-agent-kit` 的 `llm/` 模块），不能复用当前 `chatCompletionText` 而不改。
+
+---
+
+### Q6: Hermes Agent 参考实现
+
+`uwf-hermes` 是怎么实现 `run` 和 `continue` 的？
+
+**调研要点：**
+- prompt 怎么组装的（outputFormatInstruction + rolePrompt + task + history）
+- hermes CLI 的调用参数
+- session management（resume）
+- 输出怎么捕获
+
+**答案：**
+
+#### Prompt 组装
+
+```40:53:packages/workflow-agent-hermes/src/hermes.ts
+export function buildHermesPrompt(ctx: AgentContext): string {
+  const roleDef = ctx.workflow.roles[ctx.role];
+  const rolePrompt = roleDef !== undefined ? buildRolePrompt(roleDef) : "";
+  const parts: string[] = [];
+  if (ctx.outputFormatInstruction !== "") {
+    parts.push(ctx.outputFormatInstruction, "");
+  }
+  parts.push(rolePrompt, "", "## Task", ctx.start.prompt);
+  const historyBlock = buildHistorySummary(ctx.steps);
+  if (historyBlock !== "") {
+    parts.push("", historyBlock);
+  }
+  return parts.join("\n");
+}
+```
+
+`buildRolePrompt` 生成 `## Goal` / `## Capabilities` / `## Prepare`（含 `generateCliReference()`）/ `## Procedure` / `## Output`。
+
+`buildHistorySummary`：每步 `role`、`JSON.stringify(step.output)`、`agent`。
+
+Hermes 把**整段 prompt 作为单条 user 消息**传给 `hermes chat -q`（无独立 system channel）。
+
+#### Hermes CLI 参数
+
+首次：
+
+```88:97:packages/workflow-agent-hermes/src/hermes.ts
+spawnHermes(["chat", "-q", prompt, "--yolo", "--max-turns", "90", "--quiet"]);
+```
+
+续聊：
+
+```100:114:packages/workflow-agent-hermes/src/hermes.ts
+spawnHermes(["chat", "--resume", sessionId, "-q", message, "--yolo", "--max-turns", "90", "--quiet"]);
+```
+
+#### Session
+
+- stdout/stderr 中解析 `session_id: <id>`（`parseSessionIdFromStdout`）
+- 会话文件：`~/.hermes/sessions/session_<id>.json`
+- `loadHermesSession` → `storeHermesSessionDetail`：每 assistant/tool 消息写成 CAS turn 节点，汇总为 `detail`；**output 文本** = 最后一条非空 `assistant` 的 `content`
+
+#### 与 createAgent 的衔接
+
+```157:164:packages/workflow-agent-hermes/src/hermes.ts
+export function createHermesAgent(): () => Promise<void> {
+  return createAgent({ name: "hermes", run: runHermes, continue: continueHermes });
+}
+```
+
+`uwf-hermes` 入口：`createHermesAgent()` 即 main。
+
+Claude Code 包（`workflow-agent-claude-code`）结构相同：`buildClaudeCodePrompt` 同构，`claude -p` + `--resume` + JSON stdout 解析。
+
+---
+
+### Q7: Toolkit 需求分析
+
+要实现一个自给自足的 agent，最少需要哪些 tool？
+
+**调研要点：**
+- 现有 workflow example（solve-issue.yaml）里 role 都做什么任务
+- hermes agent 在 workflow 场景下常用哪些 tool
+- 哪些 tool 是 agent loop 必须的（如 file read/write、shell exec、web fetch）
+
+**答案：**
+
+#### solve-issue.yaml 角色能力
+
+| Role | capabilities | 隐含需求 |
+|------|----------------|----------|
+| planner | issue-analysis, planning | 读上下文/仓库、总结，通常不需写代码 |
+| developer | file-edit, shell, testing | **读文件、写文件、执行命令** |
+| reviewer | code-review, static-analysis | 读 diff/文件、静态分析（可读+可选 shell） |
+
+#### Hermes 侧
+
+Hermes 自带完整 agent runtime（`--yolo`、max-turns），tool 集由 Hermes 项目定义，workflow 不配置。从 session JSON 可见 `tool_calls` 被记入 detail，常见包括文件与 shell 类工具。
+
+#### Builtin 最小 toolkit 建议
+
+| 优先级 | Tool | 用途 |
+|--------|------|------|
+| P0 | `read_file` | 读仓库/配置/issue 上下文 |
+| P0 | `write_file` / `edit_file` | developer 改代码 |
+| P0 | `run_command` | 测试、构建、git（需 cwd + timeout + 输出截断） |
+| P1 | `list_dir` / `glob` | 导航代码库 |
+| P1 | `grep` | 搜索符号/引用 |
+| P2 | `fetch_url` | 查文档（planner 偶尔需要） |
+
+**不需要**在 builtin 里实现 moderator / workflow 路由工具——仍由 `uwf thread step` + JSONata 负责。
+
+#### Agent loop 必须能力
+
+1. 多轮 LLM 调用 + **OpenAI-style tool_calls** 解析与执行
+2. 将 tool 结果 append 回 messages
+3. 终止条件：模型不再请求 tool，或达到 `maxTurns`
+4. 最终响应须含合法 YAML frontmatter（满足 Q4），供 `createAgent` fast-path
+
+---
+
+## 方案草案
+
+（调研完成后基于以上答案撰写）
+
+### 架构设计
+
+```mermaid
+flowchart TB
+  subgraph cli ["cli-workflow"]
+    Step["uwf thread step"]
+    Spawn["spawnAgent(uwf-builtin, threadId, role)"]
+    Step --> Spawn
+  end
+
+  subgraph builtin_pkg ["@uncaged/workflow-agent-builtin"]
+    Main["createBuiltinAgent() = createAgent({...})"]
+    Prompt["buildBuiltinPrompt(ctx)"]
+    Loop["runBuiltinLoop(provider, messages, tools)"]
+    Tools["Toolkit: read/write/exec/..."]
+    Detail["storeBuiltinDetail(turns)"]
+    Main --> Prompt
+    Main --> Loop
+    Loop --> Tools
+    Loop --> Detail
+  end
+
+  subgraph kit ["workflow-agent-kit"]
+    Ctx["buildContextWithMeta"]
+    FM["tryFrontmatterFastPath"]
+    Persist["persistStep"]
+    Ctx --> Main
+    Main --> FM
+    FM --> Persist
+  end
+
+  subgraph cas ["CAS / config"]
+    Config["config.yaml models/providers"]
+    CAS["cas/ + threads.yaml"]
+  end
+
+  Spawn --> Main
+  Config --> Loop
+  CAS --> Ctx
+  Persist --> CAS
+  Spawn -->|"stdout: step hash"| Step
+```
+
+**新包**：`packages/workflow-agent-builtin`，bin `uwf-builtin`，仅依赖 `workflow-agent-kit`、`workflow-protocol`、`workflow-util`（可选 `@uncaged/json-cas` 写 detail schema）。
+
+**分层**：
+
+| 层 | 职责 |
+|----|------|
+| `createAgent`（kit） | argv、context、frontmatter extract、StepNode、stdout 协议 — **不变** |
+| `builtin/agent.ts` | `run` / `continue` 实现 |
+| `builtin/llm.ts` | OpenAI 兼容 chat + tools（可后续抽到 kit） |
+| `builtin/tools/*.ts` | 各 tool 的 JSON Schema + handler |
+| `builtin/prompt.ts` | 复用 Hermes 的 prompt 拼接逻辑（或抽到 kit 的 `buildAgentPrompt`） |
+| `builtin/detail.ts` | 类似 Hermes：每轮 assistant/tool 写入 CAS detail |
+
+**配置集成**：
+
+```yaml
+agents:
+  builtin:
+    command: "uwf-builtin"
+    args: []
+defaultAgent: "builtin"   # 或 agentOverrides 按 role 指定
+```
+
+模型：首版 `resolveModel(config, config.defaultModel)`；后续可增加 `modelOverrides.agent` 或 per-role 映射。
+
+---
+
+### Agent Run Loop
+
+伪代码（单次 `run(ctx)`）：
+
+```
+1. provider ← resolveModel(loadWorkflowConfig(), defaultModel)
+2. system ← buildBuiltinPrompt(ctx)   // outputFormatInstruction + buildRolePrompt + Task + History
+3. messages ← [{ role: "system", content: system }]
+4. sessionId ← newULID()              // 内存或临时目录，供 continue 使用
+5. turns ← []
+
+6. for turn in 1..MAX_TURNS:
+     response ← chatCompletionWithTools(provider, messages, TOOL_DEFINITIONS)
+     record assistant message + tool_calls in turns
+
+     if response has no tool_calls:
+       finalText ← response.content
+       break
+
+     for each tool_call:
+       result ← executeTool(tool_call, { cwd: process.cwd() })
+       messages.push tool result
+       record in turns
+
+7. if no finalText with valid frontmatter after loop:
+     optionally one-shot "finalize" message without tools
+
+8. detailHash ← storeBuiltinDetail(store, sessionId, turns, metadata)
+9. return { output: finalText, detailHash, sessionId }
+```
+
+**`continue(sessionId, message, store)`**：
+
+- 从内存/磁盘恢复 `messages` + `turns`
+- `messages.push({ role: "user", content: message })`（correction 或续聊）
+- 从步骤 6 继续，步数上限可单独设小一点（如 3）
+- 返回新的 `AgentRunResult`
+
+**与 frontmatter 的配合**：
+
+- system prompt 已含 `outputFormatInstruction`；最后一轮可强制 user：`Now output your final answer with YAML frontmatter only if you have not yet.`
+- 仍依赖 `createAgent` 的 fast-path + 最多 2 次 continue
+
+**安全**：
+
+- `run_command`：白名单或需 `UWF_BUILTIN_ALLOW_SHELL=1`，默认工作区限定在 `process.cwd()` 或 `start` 中将来扩展的 `workspace` 字段
+- 路径：禁止 `..` 逃逸出 workspace root
+
+---
+
+### Toolkit 设计
+
+统一注册表：
+
+```typescript
+type BuiltinTool = {
+  name: string;
+  description: string;
+  parameters: JSONSchema; // object type
+  execute: (args: unknown, ctx: ToolContext) => Promise<string>;
+};
+
+type ToolContext = {
+  cwd: string;
+  storageRoot: string;
+};
+```
+
+| Tool name | OpenAI function | 行为摘要 |
+|-----------|-----------------|----------|
+| `read_file` | `read_file` | `{ path }` → UTF-8 文本，大小上限 |
+| `write_file` | `write_file` | `{ path, content }` → 写盘，返回确认 |
+| `edit_file` | 可选 | search/replace 块，减少 token |
+| `run_command` | `run_command` | `{ command, cwd? }` → stdout/stderr 截断 |
+| `list_dir` | `list_dir` | `{ path }` → 条目列表 |
+| `grep` | `grep` | `{ pattern, path? }` → 匹配行 |
+
+**LLM 请求形状**（扩展 extract 客户端）：
+
+```json
+{
+  "model": "...",
+  "messages": [...],
+  "tools": [{ "type": "function", "function": { "name", "description", "parameters" } }],
+  "tool_choice": "auto"
+}
+```
+
+解析 `choices[0].message.tool_calls`，执行后以 `{ role: "tool", tool_call_id, content }` 回传。
+
+**不提供** streaming 首版；detail CAS 记录每轮 tool 名/参数/结果摘要供 `uwf thread step-details` 调试。
+
+---
+
+### 与现有架构的集成
+
+| 集成点 | 方式 |
+|--------|------|
+| CLI 协议 | 实现标准 agent CLI：`uwf-builtin <thread-id> <role>`，stdout 一行 step hash，exit 0/1 |
+| 工厂 | `export function createBuiltinAgent()` → `createAgent({ name: "builtin", run, continue })` |
+| Context / Prompt | 复用 `buildContextWithMeta`、`buildRolePrompt`、`buildOutputFormatInstruction`；prompt 布局对齐 `buildHermesPrompt` |
+| 结构化输出 | 优先 YAML frontmatter fast-path；可选后续在 `createAgent` 增加 `extract()` fallback 开关 |
+| 配置 | `config.yaml` 增加 `agents.builtin`；`uwf setup` 可选默认 agent |
+| 存储 | `resolveStorageRoot()` + `loadWorkflowConfig` + `getEnvPath`；与 Hermes 相同，**不**改 `threads.yaml` 写入方 |
+| 测试 | 单元测试：tool handlers、prompt 组装、mock LLM tool loop；集成测试：临时 storage root + fake provider |
+| 发布 | 新包 `@uncaged/workflow-agent-builtin`，bin `uwf-builtin`，加入 `scripts/publish-all.mjs` |
+
+**明确不做**：
+
+- 不替代 moderator / 不在 agent 内调用 `uwf thread step`
+- 不依赖 Hermes/OpenClaw/Claude Code 二进制
+- 首版不实现 streaming、不实现 MCP
+
+**建议实现顺序**：
+
+1. `llm.ts`：tool calling HTTP 客户端 + 单测
+2. P0 tools + `runBuiltinLoop`
+3. `createBuiltinAgent` + detail CAS
+4. `config` / docs / `examples` 可选 `agentOverrides` 演示
+5. （可选）`createAgent` 接入 `extract()` fallback
@@ -0,0 +1,73 @@
+# Issue #418: ACP session/resume 返回空文本
+
+## 调研日期: 2026-05-23
+
+## 根因
+
+`session/resume` 在 restore 路径下 `_make_agent()` 失败，异常被静默吞掉。
+
+### 完整调用链
+
+```
+resume_session(sid)
+  → update_cwd(sid)
+    → get_session(sid) → _restore(sid)
+      → _make_agent()
+        → resolve_runtime_provider("custom") 失败（line 548-561）
+        → AIAgent() 抛出 "No LLM provider configured"（line 564）
+      → except Exception 静默吞掉（line 482-484）→ return None
+    → return None
+  → state is None → fallback: create_session()（新 sid，无历史）
+```
+
+### 关键代码位置（acp_adapter/session.py）
+
+- `_restore()` line 426-498: 从 DB 恢复 session，但 except 太宽泛
+- `_make_agent()` line 520-568: provider 解析在 restore 路径下不完整
+- Line 548-561: `resolve_runtime_provider("custom")` 失败后，`base_url` 虽然从 DB 取到了但没传给 AIAgent
+
+### 实测行为
+
+1. Phase 1: `session/new` + `prompt` → 正常，有 `agent_message_chunk`
+2. Phase 2: `session/resume` + `prompt`
+   - resume 返回成功，但 `available_commands_update` 里 sessionId 是新的（create_session fallback）
+   - 用原始 sid 发 prompt → `stopReason: "refusal"`（session 不在内存中）
+   - 用新 sid 发 prompt → 能跑但无历史（agent 回答"不知道 secret code"）
+
+### 验证脚本
+
+```python
+# 直接调用 _restore 验证
+cd ~/.hermes/hermes-agent
+python3 -c "
+import sys; sys.path.insert(0, '.')
+from acp_adapter.session import SessionManager
+sm = SessionManager()
+result = sm._restore('SESSION_ID_HERE')
+print(result)  # None — _make_agent 抛异常被吞掉
+"
+```
+
+### 两个 bug
+
+1. **`_make_agent` provider fallback 不完整**: restore 时 DB 里有 `base_url` 和 `api_mode`，但 `resolve_runtime_provider` 失败后这些值没被正确传递给 AIAgent
+2. **`_restore` 的 except 太宽泛**: 静默吞掉所有异常，连 warning 都只在 debug 级别，导致 resume 失败完全无感知
+
+### Hermes 版本
+
+- v0.10.0 (2026.4.16) — 初始测试
+- v0.14.0 (2026.5.16) — 更新后重新测试，bug 仍在
+- 代码路径: ~/.hermes/hermes-agent/acp_adapter/session.py
+
+### v0.14.0 测试结果 (2026-05-23)
+
+- `_restore` 仍因 `custom` provider 解析失败返回 None
+- 日志更清晰了：`WARNING: Failed to recreate agent for ACP session ...`
+- resume fallback 创建新 session（新 sid），但 agent 居然能回答之前的问题（可能通过 memory/session search）
+- 核心问题不变：sessionId 变了，client 用旧 sid 发 prompt → refusal
+
+### 上游 Issue
+
+- https://github.com/NousResearch/hermes-agent/issues/13489 — 已评论根因分析
+- https://github.com/NousResearch/hermes-agent/issues/8083 — resume 静默创建新 session
+- https://github.com/NousResearch/hermes-agent/issues/18452 — _make_agent fallback 不完整
@@ -19,7 +19,7 @@ roles:
    output: |
      Provide your analysis as markdown under the frontmatter.
      The frontmatter must include your structured findings.
-    meta:
+    frontmatter:
      type: object
      properties:
        thesis:
@@ -36,6 +36,8 @@ graph:
  $START:
    - role: "analyst"
      condition: null
+      prompt: "Analyze the topic in the task and produce a structured summary with key points."
  analyst:
    - role: "$END"
      condition: null
+      prompt: "Analysis complete. Finish the workflow."
@@ -0,0 +1,77 @@
+name: "debate"
+description: "Structured debate between two sides. Tests cross-process session resume."
+roles:
+  against:
+    description: "Argues against the proposition"
+    goal: |
+      You are a skilled debater arguing AGAINST the proposition.
+      Be logical, cite evidence, and directly address your opponent's points.
+      Keep each argument concise (under 200 words).
+    capabilities:
+      - argumentation
+      - critical-thinking
+    procedure: |
+      1. If this is the opening, present your strongest argument against the proposition.
+      2. If responding to the other side, directly counter their points with evidence and logic.
+      3. If you find yourself genuinely convinced by the other side, you may concede.
+    output: |
+      Provide your argument in the frontmatter.
+      Set conceded to true ONLY if you are genuinely convinced and wish to stop debating.
+    frontmatter:
+      type: object
+      properties:
+        argument:
+          type: string
+        conceded:
+          type: boolean
+      required: [argument, conceded]
+  for:
+    description: "Argues for the proposition"
+    goal: |
+      You are a skilled debater arguing FOR the proposition.
+      Be logical, cite evidence, and directly address your opponent's points.
+      Keep each argument concise (under 200 words).
+    capabilities:
+      - argumentation
+      - critical-thinking
+    procedure: |
+      1. Read the opposing side's latest argument carefully.
+      2. Counter their points with evidence and logic.
+      3. If you find yourself genuinely convinced by the other side, you may concede.
+    output: |
+      Provide your argument in the frontmatter.
+      Set conceded to true ONLY if you are genuinely convinced and wish to stop debating.
+    frontmatter:
+      type: object
+      properties:
+        argument:
+          type: string
+        conceded:
+          type: boolean
+      required: [argument, conceded]
+conditions:
+  againstConceded:
+    description: "The against side conceded"
+    expression: "$last('against').conceded = true"
+  forConceded:
+    description: "The for side conceded"
+    expression: "$last('for').conceded = true"
+graph:
+  $START:
+    - role: "against"
+      condition: null
+      prompt: "Present your opening argument against the proposition."
+  against:
+    - role: "$END"
+      condition: "againstConceded"
+      prompt: "The against side conceded. Debate over."
+    - role: "for"
+      condition: null
+      prompt: "Counter the opposing argument. Address their points directly."
+  for:
+    - role: "$END"
+      condition: "forConceded"
+      prompt: "The for side conceded. Debate over."
+    - role: "against"
+      condition: null
+      prompt: "Counter the opposing argument. Address their points directly."
@@ -3,22 +3,35 @@ description: "End-to-end issue resolution"
 roles:
  planner:
    description: "Creates implementation plan"
-    goal: "You are a planning agent. You analyze issues and create step-by-step plans."
+    goal: "You are a planning agent. You analyze issues and create implementation plans grounded in the actual codebase."
    capabilities:
      - issue-analysis
      - planning
-    procedure: "Analyze the issue and create a detailed, actionable implementation plan."
-    output: "Output the plan summary and list of concrete steps."
-    meta:
+      - file-read
+      - shell
+    procedure: |
+      1. Locate the code repository:
+         - Check if the current working directory is the repo (look for package.json, .git, etc.)
+         - If the task mentions a repo URL, clone it first.
+         - If this is a new project, create the repo and note the path.
+      2. Explore the codebase — read the relevant source files mentioned in the issue. Understand the current architecture, types, and conventions (check CLAUDE.md, CONTRIBUTING.md, .cursor/rules/).
+      3. Identify which files need changes and what the changes should be, with specific code references.
+      4. Output the plan with:
+         - `repoPath`: absolute path to the repository root
+         - `plan`: detailed implementation plan with file paths and code references
+         - `steps`: concrete action items for the developer
+    output: |
+      Provide repoPath, plan summary, and steps in the frontmatter.
+      The plan MUST reference actual file paths and code structures you found by reading the source.
+      Do NOT guess — if you haven't read a file, read it before referencing it.
+    frontmatter:
      type: object
      properties:
+        repoPath:
+          type: string
        plan:
          type: string
-        steps:
-          type: array
-          items:
-            type: string
-      required: [plan, steps]
+      required: [repoPath, plan]
  developer:
    description: "Implements code changes"
    goal: "You are a developer agent. You implement code changes according to plans."
@@ -26,9 +39,14 @@ roles:
      - file-edit
      - shell
      - testing
-    procedure: "Implement the plan. Write code, tests, and ensure existing tests pass."
+    procedure: |
+      1. Read the planner's output to get the repoPath and implementation plan.
+      2. cd to the repoPath before making any changes.
+      3. Create a feature branch from the default branch.
+      4. Implement the plan — write code, tests, and ensure existing tests pass.
+      5. Commit your changes with a descriptive message referencing the issue.
    output: "List all files changed and provide a summary of the implementation."
-    meta:
+    frontmatter:
      type: object
      properties:
        filesChanged:
@@ -46,7 +64,7 @@ roles:
      - static-analysis
    procedure: "Review the implementation against the plan. Check for bugs, edge cases, and style."
    output: "Approve or reject with detailed comments explaining your decision."
-    meta:
+    frontmatter:
      type: object
      properties:
        approved:
@@ -57,19 +75,24 @@ roles:
 conditions:
  notApproved:
    description: "Reviewer rejected the implementation"
-    expression: "steps[-1].output.approved = false"
+    expression: "$last('reviewer').approved = false"
 graph:
  $START:
    - role: "planner"
      condition: null
+      prompt: "Analyze the issue described in the task and produce a detailed implementation plan."
  planner:
    - role: "developer"
      condition: null
+      prompt: "Implement the plan from the planner. Write code, tests, and ensure existing tests pass."
  developer:
    - role: "reviewer"
      condition: null
+      prompt: "Review the developer's implementation against the plan for correctness and quality."
  reviewer:
    - role: "developer"
      condition: "notApproved"
+      prompt: "The reviewer rejected your implementation. Read their feedback and fix the issues."
    - role: "$END"
      condition: null
+      prompt: "The review passed. Complete the workflow."
@@ -15,10 +15,12 @@
    "release": "bun run build && bun test && node scripts/publish-all.mjs"
  },
  "devDependencies": {
+    "@agentclientprotocol/sdk": "^0.22.1",
    "@biomejs/biome": "^2.4.14",
    "@changesets/cli": "^2.31.0",
    "@types/node": "^25.7.0",
    "@types/xxhashjs": "^0.2.4",
+    "@uncaged/workflow-agent-hermes": "workspace:*",
    "bun-types": "^1.3.13"
  }
 }
@@ -0,0 +1,181 @@
+import { mkdir, readdir, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { cmdLogClean, cmdLogList, cmdLogShow } from "../commands/log.js";
+
+let storageRoot: string;
+
+beforeEach(async () => {
+  storageRoot = join(tmpdir(), `uwf-log-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+  await mkdir(join(storageRoot, "logs"), { recursive: true });
+});
+
+afterEach(async () => {
+  await rm(storageRoot, { recursive: true, force: true });
+});
+
+const entry1 = JSON.stringify({
+  ts: "2026-05-20T10:00:00.000Z",
+  pid: "1716200000000-1234",
+  tag: "W9F3RK2M",
+  msg: "process start",
+  thread: "01J1234ABCDEF",
+  workflow: "solve-issue",
+});
+
+const entry2 = JSON.stringify({
+  ts: "2026-05-20T10:00:01.000Z",
+  pid: "1716200000000-1234",
+  tag: "ABC12345",
+  msg: "step executed",
+  thread: "01J1234ABCDEF",
+  workflow: "solve-issue",
+});
+
+const entry3 = JSON.stringify({
+  ts: "2026-05-20T10:00:02.000Z",
+  pid: "1716200000000-5678",
+  tag: "XYZ98765",
+  msg: "different process",
+  thread: "01JOTHER000000",
+  workflow: "review-code",
+});
+
+const oldEntry = JSON.stringify({
+  ts: "2026-05-19T08:00:00.000Z",
+  pid: "1716200000000-9999",
+  tag: "OLD1TAG1",
+  msg: "old entry",
+  thread: "01JOLD0000000",
+  workflow: "solve-issue",
+});
+
+const olderEntry = JSON.stringify({
+  ts: "2026-05-18T08:00:00.000Z",
+  pid: "1716200000000-0001",
+  tag: "OLD2TAG2",
+  msg: "older entry",
+  thread: "01JOLDER00000",
+  workflow: "review-code",
+});
+
+async function writeLogFiles(): Promise<void> {
+  const logsDir = join(storageRoot, "logs");
+  await writeFile(join(logsDir, "2026-05-20.jsonl"), `${[entry1, entry2, entry3].join("\n")}\n`);
+  await writeFile(join(logsDir, "2026-05-19.jsonl"), `${oldEntry}\n`);
+  await writeFile(join(logsDir, "2026-05-18.jsonl"), `${olderEntry}\n`);
+}
+
+describe("cmdLogList", () => {
+  test("lists log files with sizes sorted by date descending", async () => {
+    await writeLogFiles();
+    const result = await cmdLogList(storageRoot);
+    expect(result).toHaveLength(3);
+    expect(result[0].name).toBe("2026-05-20.jsonl");
+    expect(result[0].date).toBe("2026-05-20");
+    expect(result[0].size).toBeGreaterThan(0);
+    expect(result[1].name).toBe("2026-05-19.jsonl");
+    expect(result[2].name).toBe("2026-05-18.jsonl");
+  });
+
+  test("returns empty array when no log files exist", async () => {
+    const result = await cmdLogList(storageRoot);
+    expect(result).toEqual([]);
+  });
+
+  test("returns empty array when logs directory does not exist", async () => {
+    const noLogsRoot = join(storageRoot, "nonexistent");
+    await mkdir(noLogsRoot, { recursive: true });
+    const result = await cmdLogList(noLogsRoot);
+    expect(result).toEqual([]);
+  });
+});
+
+describe("cmdLogShow", () => {
+  test("filters by thread ID", async () => {
+    await writeLogFiles();
+    const result = await cmdLogShow(storageRoot, {
+      thread: "01J1234ABCDEF",
+      process: null,
+      date: null,
+    });
+    expect(result).toHaveLength(2);
+    expect(result.every((e) => e.thread === "01J1234ABCDEF")).toBe(true);
+  });
+
+  test("filters by process ID", async () => {
+    await writeLogFiles();
+    const result = await cmdLogShow(storageRoot, {
+      thread: null,
+      process: "1716200000000-1234",
+      date: null,
+    });
+    expect(result).toHaveLength(2);
+    expect(result.every((e) => e.pid === "1716200000000-1234")).toBe(true);
+  });
+
+  test("filters by date", async () => {
+    await writeLogFiles();
+    const result = await cmdLogShow(storageRoot, {
+      thread: null,
+      process: null,
+      date: "2026-05-19",
+    });
+    expect(result).toHaveLength(1);
+    expect(result[0].msg).toBe("old entry");
+  });
+
+  test("reads all files when no date filter", async () => {
+    await writeLogFiles();
+    const result = await cmdLogShow(storageRoot, { thread: null, process: null, date: null });
+    expect(result).toHaveLength(5);
+    // sorted by ts ascending
+    expect(result[0].ts).toBe("2026-05-18T08:00:00.000Z");
+    expect(result[4].ts).toBe("2026-05-20T10:00:02.000Z");
+  });
+
+  test("returns empty when no matches", async () => {
+    await writeLogFiles();
+    const result = await cmdLogShow(storageRoot, {
+      thread: "NONEXISTENT",
+      process: null,
+      date: null,
+    });
+    expect(result).toEqual([]);
+  });
+
+  test("combined thread + date filter", async () => {
+    await writeLogFiles();
+    const result = await cmdLogShow(storageRoot, {
+      thread: "01J1234ABCDEF",
+      process: null,
+      date: "2026-05-20",
+    });
+    expect(result).toHaveLength(2);
+    expect(result.every((e) => e.thread === "01J1234ABCDEF")).toBe(true);
+  });
+});
+
+describe("cmdLogClean", () => {
+  test("deletes files before given date", async () => {
+    await writeLogFiles();
+    const result = await cmdLogClean(storageRoot, "2026-05-20");
+    expect(result.deleted).toBe(2);
+    const remaining = await readdir(join(storageRoot, "logs"));
+    expect(remaining).toEqual(["2026-05-20.jsonl"]);
+  });
+
+  test("deletes nothing when all files are newer", async () => {
+    await writeLogFiles();
+    const result = await cmdLogClean(storageRoot, "2026-05-18");
+    expect(result.deleted).toBe(0);
+  });
+
+  test("handles missing logs directory gracefully", async () => {
+    const noLogsRoot = join(storageRoot, "nonexistent");
+    await mkdir(noLogsRoot, { recursive: true });
+    const result = await cmdLogClean(noLogsRoot, "2026-05-20");
+    expect(result).toEqual({ deleted: 0 });
+  });
+});
@@ -0,0 +1,150 @@
+import { mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
+import { cmdSetup, validateModel } from "../commands/setup.js";
+
+describe("validateModel", () => {
+  const BASE_URL = "https://api.example.com/v1";
+  const API_KEY = "sk-test-key";
+  const MODEL = "test-model";
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  test("success path — returns ok on 200", async () => {
+    const mockFetch = vi
+      .spyOn(globalThis, "fetch")
+      .mockResolvedValue(new Response(JSON.stringify({}), { status: 200 }));
+
+    const result = await validateModel(BASE_URL, API_KEY, MODEL);
+
+    expect(result).toEqual({ ok: true, value: undefined });
+    expect(mockFetch).toHaveBeenCalledOnce();
+
+    const [url, opts] = mockFetch.mock.calls[0]!;
+    expect(url).toBe(`${BASE_URL}/chat/completions`);
+    expect((opts as RequestInit).headers).toEqual(
+      expect.objectContaining({ Authorization: `Bearer ${API_KEY}` }),
+    );
+    const body = JSON.parse((opts as RequestInit).body as string);
+    expect(body).toEqual({
+      model: MODEL,
+      messages: [{ role: "user", content: "hi" }],
+      max_tokens: 1,
+    });
+  });
+
+  test("HTTP 401 — returns error containing 401", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response("Unauthorized", { status: 401, statusText: "Unauthorized" }),
+    );
+
+    const result = await validateModel(BASE_URL, API_KEY, MODEL);
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toContain("401");
+    }
+  });
+
+  test("HTTP 404 — returns error containing 404", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response("Not Found", { status: 404, statusText: "Not Found" }),
+    );
+
+    const result = await validateModel(BASE_URL, API_KEY, MODEL);
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error).toContain("404");
+    }
+  });
+
+  test("network timeout — returns error mentioning timeout", async () => {
+    const err = new DOMException("signal timed out", "AbortError");
+    vi.spyOn(globalThis, "fetch").mockRejectedValue(err);
+
+    const result = await validateModel(BASE_URL, API_KEY, MODEL);
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.toLowerCase()).toMatch(/timeout|timed out/);
+    }
+  });
+
+  test("network error (DNS/connection) — returns error mentioning connectivity", async () => {
+    vi.spyOn(globalThis, "fetch").mockRejectedValue(new TypeError("fetch failed"));
+
+    const result = await validateModel(BASE_URL, API_KEY, MODEL);
+
+    expect(result.ok).toBe(false);
+    if (!result.ok) {
+      expect(result.error.toLowerCase()).toMatch(/connect|reach|network/);
+    }
+  });
+
+  test("request body correctness", async () => {
+    const mockFetch = vi
+      .spyOn(globalThis, "fetch")
+      .mockResolvedValue(new Response(JSON.stringify({}), { status: 200 }));
+
+    await validateModel(BASE_URL, API_KEY, "my-special-model");
+
+    const body = JSON.parse((mockFetch.mock.calls[0]![1] as RequestInit).body as string);
+    expect(body).toEqual({
+      model: "my-special-model",
+      messages: [{ role: "user", content: "hi" }],
+      max_tokens: 1,
+    });
+  });
+});
+
+describe("cmdSetup with validation", () => {
+  let storageRoot: string;
+
+  beforeEach(async () => {
+    storageRoot = await mkdtemp(join(tmpdir(), "uwf-setup-validate-"));
+  });
+
+  afterEach(async () => {
+    vi.restoreAllMocks();
+    await rm(storageRoot, { recursive: true, force: true });
+  });
+
+  const setupArgs = () => ({
+    provider: "testprovider",
+    baseUrl: "https://api.test.com/v1",
+    apiKey: "sk-test",
+    model: "test-model",
+    storageRoot,
+  });
+
+  test("includes validation result on success", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response(JSON.stringify({}), { status: 200 }),
+    );
+
+    const result = await cmdSetup(setupArgs());
+
+    expect(result.validation).toEqual({ ok: true, value: undefined });
+    // Config files should still be written
+    expect(result.configPath).toBeTruthy();
+    expect(result.envPath).toBeTruthy();
+  });
+
+  test("includes validation failure — config still saved", async () => {
+    vi.spyOn(globalThis, "fetch").mockResolvedValue(
+      new Response("Unauthorized", { status: 401, statusText: "Unauthorized" }),
+    );
+
+    const result = await cmdSetup(setupArgs());
+
+    expect(result.validation).toBeDefined();
+    expect((result.validation as { ok: boolean }).ok).toBe(false);
+    // Config files should still be written despite validation failure
+    expect(result.configPath).toBeTruthy();
+    expect(result.envPath).toBeTruthy();
+  });
+});
@@ -0,0 +1,71 @@
+import { execFileSync } from "node:child_process";
+import { join } from "node:path";
+import { describe, expect, test } from "vitest";
+
+const CLI_PATH = join(import.meta.dirname, "..", "cli.js");
+
+function runCli(args: string[]): { stdout: string; stderr: string; exitCode: number } {
+  try {
+    const stdout = execFileSync("bun", ["run", CLI_PATH, ...args], {
+      encoding: "utf8",
+      env: { ...process.env, WORKFLOW_STORAGE_ROOT: "/tmp/uwf-test-nonexistent" },
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+    return { stdout, stderr: "", exitCode: 0 };
+  } catch (e: unknown) {
+    const err = e as NodeJS.ErrnoException & { stdout?: string; stderr?: string; status?: number };
+    return {
+      stdout: err.stdout ?? "",
+      stderr: err.stderr ?? "",
+      exitCode: err.status ?? 1,
+    };
+  }
+}
+
+describe("thread step --count CLI parsing", () => {
+  test("--help shows -c/--count option", () => {
+    const result = runCli(["thread", "step", "--help"]);
+    expect(result.stdout).toContain("--count");
+    expect(result.stdout).toContain("-c");
+  });
+
+  test("description says 'one or more steps'", () => {
+    const result = runCli(["thread", "step", "--help"]);
+    expect(result.stdout).toContain("one or more steps");
+  });
+});
+
+describe("cmdThreadStep count logic", () => {
+  test("count=0 fails with validation error", () => {
+    const result = runCli(["thread", "step", "FAKE_THREAD_ID", "-c", "0"]);
+    expect(result.exitCode).not.toBe(0);
+    expect(result.stderr).toContain("positive integer");
+  });
+
+  test("negative count fails with validation error", () => {
+    const result = runCli(["thread", "step", "FAKE_THREAD_ID", "-c", "-1"]);
+    expect(result.exitCode).not.toBe(0);
+    expect(result.stderr).toContain("positive integer");
+  });
+
+  test("non-integer count fails with validation error", () => {
+    const result = runCli(["thread", "step", "FAKE_THREAD_ID", "-c", "1.5"]);
+    expect(result.exitCode).not.toBe(0);
+    expect(result.stderr).toContain("positive integer");
+  });
+
+  test("count=1 is the default (no -c flag)", () => {
+    // Without -c, it should attempt to run 1 step (failing on missing thread, not on count validation)
+    const result = runCli(["thread", "step", "FAKE_THREAD_ID"]);
+    expect(result.exitCode).not.toBe(0);
+    // Should NOT contain "positive integer" error — should fail on thread lookup instead
+    expect(result.stderr).not.toContain("positive integer");
+  });
+
+  test("count=3 passes validation (fails on thread lookup)", () => {
+    const result = runCli(["thread", "step", "FAKE_THREAD_ID", "-c", "3"]);
+    expect(result.exitCode).not.toBe(0);
+    // Should NOT contain "positive integer" error — should fail on thread/storage lookup
+    expect(result.stderr).not.toContain("positive integer");
+  });
+});
@@ -0,0 +1,367 @@
+import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { createFsStore } from "@uncaged/json-cas-fs";
+import type { CasRef, WorkflowPayload } from "@uncaged/workflow-protocol";
+import { afterEach, beforeEach, describe, expect, test } from "vitest";
+import { stringify } from "yaml";
+import { cmdThreadStart } from "../commands/thread.js";
+import { registerUwfSchemas } from "../schemas.js";
+import type { UwfStore } from "../store.js";
+import { loadWorkflowRegistry, saveWorkflowRegistry } from "../store.js";
+
+// ── helpers ───────────────────────────────────────────────────────────────────
+
+async function makeUwfStore(storageRoot: string): Promise<UwfStore> {
+  const casDir = join(storageRoot, "cas");
+  await mkdir(casDir, { recursive: true });
+  const store = createFsStore(casDir);
+  const schemas = await registerUwfSchemas(store);
+  return { storageRoot, store, schemas };
+}
+
+async function storeWorkflow(uwf: UwfStore, name: string): Promise<CasRef> {
+  const payload: WorkflowPayload = {
+    name,
+    description: "Test workflow",
+    roles: {},
+    conditions: {},
+    graph: {},
+  };
+  return await uwf.store.put(uwf.schemas.workflow, payload);
+}
+
+async function createWorkflowYaml(name: string, version: string | null = null): Promise<string> {
+  const payload: WorkflowPayload = {
+    name,
+    description: version !== null ? `Test workflow (${version})` : "Test workflow",
+    roles: {},
+    conditions: {},
+    graph: {},
+  };
+  const yaml = stringify(payload);
+  return yaml;
+}
+
+// ── fixture ───────────────────────────────────────────────────────────────────
+
+let tmpDir: string;
+let storageRoot: string;
+let projectRoot: string;
+
+beforeEach(async () => {
+  tmpDir = await mkdtemp(join(tmpdir(), "cli-uwf-wf-resolve-test-"));
+  storageRoot = join(tmpDir, "storage");
+  projectRoot = join(tmpDir, "project");
+  await mkdir(storageRoot, { recursive: true });
+  await mkdir(projectRoot, { recursive: true });
+});
+
+afterEach(async () => {
+  await rm(tmpDir, { recursive: true, force: true });
+});
+
+// ── Strategy 1: CAS Hash Resolution ───────────────────────────────────────────
+
+describe("Strategy 1: CAS Hash Resolution", () => {
+  test("should resolve valid 13-char Crockford Base32 hash", async () => {
+    const uwf = await makeUwfStore(storageRoot);
+    const hash = await storeWorkflow(uwf, "test-workflow");
+
+    const result = await cmdThreadStart(storageRoot, hash, "test prompt", projectRoot);
+
+    expect(result.workflow).toBe(hash);
+    expect(result.thread).toMatch(/^[0-9A-HJKMNP-TV-Z]{26}$/);
+  });
+
+  test("should fail on invalid hash format (non-Crockford characters)", async () => {
+    await makeUwfStore(storageRoot);
+
+    await expect(
+      cmdThreadStart(storageRoot, "123456789ABCD", "prompt", projectRoot),
+    ).rejects.toThrow();
+  });
+
+  test("should fail on valid-format hash not present in CAS", async () => {
+    await makeUwfStore(storageRoot);
+    const fakeHash = "0000000000000"; // valid format, doesn't exist
+
+    await expect(cmdThreadStart(storageRoot, fakeHash, "prompt", projectRoot)).rejects.toThrow();
+  });
+
+  test("should reject 40-char hex hash (legacy format not supported)", async () => {
+    await makeUwfStore(storageRoot);
+    const hexHash = "a".repeat(40);
+
+    await expect(cmdThreadStart(storageRoot, hexHash, "prompt", projectRoot)).rejects.toThrow();
+  });
+});
+
+// ── Strategy 2: File Path Resolution ──────────────────────────────────────────
+
+describe("Strategy 2: File Path Resolution", () => {
+  test("should load workflow from absolute file path", async () => {
+    await makeUwfStore(storageRoot);
+    const yamlPath = join(tmpDir, "test-workflow.yaml");
+    await writeFile(yamlPath, await createWorkflowYaml("test-workflow"));
+
+    const result = await cmdThreadStart(storageRoot, yamlPath, "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+    const uwf = await makeUwfStore(storageRoot);
+    const node = uwf.store.get(result.workflow);
+    expect(node).not.toBeNull();
+    if (node !== null) {
+      expect((node.payload as WorkflowPayload).name).toBe("test-workflow");
+    }
+  });
+
+  test("should load workflow from relative file path", async () => {
+    await makeUwfStore(storageRoot);
+    const yamlPath = "test-workflow.yaml";
+    await writeFile(join(projectRoot, yamlPath), await createWorkflowYaml("test-workflow"));
+
+    const result = await cmdThreadStart(storageRoot, yamlPath, "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+  });
+
+  test("should fail when file path does not exist", async () => {
+    await makeUwfStore(storageRoot);
+
+    await expect(
+      cmdThreadStart(storageRoot, "./nonexistent.yaml", "prompt", projectRoot),
+    ).rejects.toThrow();
+  });
+
+  test("should fail on invalid YAML syntax in file", async () => {
+    await makeUwfStore(storageRoot);
+    const yamlPath = join(tmpDir, "bad-syntax.yaml");
+    await writeFile(yamlPath, "invalid: yaml: : :");
+
+    await expect(cmdThreadStart(storageRoot, yamlPath, "prompt", projectRoot)).rejects.toThrow();
+  });
+
+  test("should fail on valid YAML with invalid WorkflowPayload shape", async () => {
+    await makeUwfStore(storageRoot);
+    const yamlPath = join(tmpDir, "invalid-workflow.yaml");
+    await writeFile(yamlPath, "name: test\n# missing roles, conditions, and graph");
+
+    await expect(cmdThreadStart(storageRoot, yamlPath, "prompt", projectRoot)).rejects.toThrow();
+  });
+
+  test("should enforce filename matches workflow name", async () => {
+    await makeUwfStore(storageRoot);
+    const yamlPath = join(tmpDir, "solve-issue.yaml");
+    await writeFile(yamlPath, await createWorkflowYaml("wrong-name"));
+
+    await expect(cmdThreadStart(storageRoot, yamlPath, "prompt", projectRoot)).rejects.toThrow();
+  });
+});
+
+// ── Strategy 3: Local Discovery (Parent Traversal) ────────────────────────────
+
+describe("Strategy 3: Local Discovery", () => {
+  test("should find workflow in current directory .workflow/", async () => {
+    await makeUwfStore(storageRoot);
+    const workflowDir = join(projectRoot, ".workflow");
+    await mkdir(workflowDir, { recursive: true });
+    await writeFile(join(workflowDir, "solve-issue.yaml"), await createWorkflowYaml("solve-issue"));
+
+    const result = await cmdThreadStart(storageRoot, "solve-issue", "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+    const uwf = await makeUwfStore(storageRoot);
+    const node = uwf.store.get(result.workflow);
+    expect(node).not.toBeNull();
+    if (node !== null) {
+      expect((node.payload as WorkflowPayload).name).toBe("solve-issue");
+    }
+  });
+
+  test("should find workflow in parent directory .workflow/", async () => {
+    await makeUwfStore(storageRoot);
+    const workflowDir = join(projectRoot, ".workflow");
+    await mkdir(workflowDir, { recursive: true });
+    await writeFile(join(workflowDir, "solve-issue.yaml"), await createWorkflowYaml("solve-issue"));
+
+    const subdir = join(projectRoot, "packages", "cli-workflow", "src");
+    await mkdir(subdir, { recursive: true });
+
+    const result = await cmdThreadStart(storageRoot, "solve-issue", "prompt", subdir);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+  });
+
+  test("should stop at filesystem root when traversing", async () => {
+    await makeUwfStore(storageRoot);
+    const deepPath = join(tmpDir, "deep", "path", "that", "does", "not", "have", "workflow");
+    await mkdir(deepPath, { recursive: true });
+
+    await expect(cmdThreadStart(storageRoot, "nonexistent", "prompt", deepPath)).rejects.toThrow();
+  });
+
+  test("should prefer .workflow/ over .workflows/ directory", async () => {
+    await makeUwfStore(storageRoot);
+    const workflowDir = join(projectRoot, ".workflow");
+    const workflowsDir = join(projectRoot, ".workflows");
+    await mkdir(workflowDir, { recursive: true });
+    await mkdir(workflowsDir, { recursive: true });
+
+    await writeFile(
+      join(workflowDir, "solve-issue.yaml"),
+      await createWorkflowYaml("solve-issue", "1"),
+    );
+    await writeFile(
+      join(workflowsDir, "solve-issue.yaml"),
+      await createWorkflowYaml("solve-issue", "2"),
+    );
+
+    const result = await cmdThreadStart(storageRoot, "solve-issue", "prompt", projectRoot);
+
+    const uwf = await makeUwfStore(storageRoot);
+    const node = uwf.store.get(result.workflow);
+    expect(node).not.toBeNull();
+    if (node !== null) {
+      expect((node.payload as WorkflowPayload).description).toBe("Test workflow (1)");
+    }
+  });
+
+  test("should support .yml extension in local discovery", async () => {
+    await makeUwfStore(storageRoot);
+    const workflowDir = join(projectRoot, ".workflow");
+    await mkdir(workflowDir, { recursive: true });
+    await writeFile(join(workflowDir, "solve-issue.yml"), await createWorkflowYaml("solve-issue"));
+
+    const result = await cmdThreadStart(storageRoot, "solve-issue", "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+  });
+});
+
+// ── Strategy 4: Global Registry Fallback ──────────────────────────────────────
+
+describe("Strategy 4: Global Registry Resolution", () => {
+  test("should resolve workflow from global registry when not found locally", async () => {
+    const uwf = await makeUwfStore(storageRoot);
+    const hash = await storeWorkflow(uwf, "deploy-pipeline");
+    const registry = await loadWorkflowRegistry(storageRoot);
+    registry["deploy-pipeline"] = hash;
+    await saveWorkflowRegistry(storageRoot, registry);
+
+    const isolatedRoot = join(tmpDir, "isolated");
+    await mkdir(isolatedRoot, { recursive: true });
+
+    const result = await cmdThreadStart(storageRoot, "deploy-pipeline", "prompt", isolatedRoot);
+
+    expect(result.workflow).toBe(hash);
+  });
+
+  test("should fail when workflow not found in any strategy", async () => {
+    await makeUwfStore(storageRoot);
+
+    await expect(cmdThreadStart(storageRoot, "nonexistent", "prompt", tmpDir)).rejects.toThrow();
+  });
+});
+
+// ── Strategy Priority Order ───────────────────────────────────────────────────
+
+describe("Resolution Priority", () => {
+  test("should use explicit file path over local discovery", async () => {
+    await makeUwfStore(storageRoot);
+
+    // Setup: Create workflow in .workflow/ AND as explicit file
+    const workflowDir = join(projectRoot, ".workflow");
+    await mkdir(workflowDir, { recursive: true });
+    await writeFile(
+      join(workflowDir, "solve-issue.yaml"),
+      await createWorkflowYaml("solve-issue", "discovery"),
+    );
+
+    const explicitPath = join(projectRoot, "custom-solve-issue.yaml");
+    await writeFile(explicitPath, await createWorkflowYaml("custom-solve-issue", "explicit"));
+
+    // Execute with explicit path
+    const result = await cmdThreadStart(storageRoot, explicitPath, "prompt", projectRoot);
+
+    const uwf = await makeUwfStore(storageRoot);
+    const node = uwf.store.get(result.workflow);
+    expect(node).not.toBeNull();
+    if (node !== null) {
+      expect((node.payload as WorkflowPayload).description).toBe("Test workflow (explicit)");
+    }
+  });
+
+  test("should use local discovery over global registry", async () => {
+    const uwf = await makeUwfStore(storageRoot);
+
+    // Setup: Register globally
+    const globalHash = await storeWorkflow(uwf, "solve-issue");
+    const registry = await loadWorkflowRegistry(storageRoot);
+    registry["solve-issue"] = globalHash;
+    await saveWorkflowRegistry(storageRoot, registry);
+
+    // Setup: Create local .workflow/
+    const workflowDir = join(projectRoot, ".workflow");
+    await mkdir(workflowDir, { recursive: true });
+    const localYaml = await createWorkflowYaml("solve-issue", "local");
+    await writeFile(join(workflowDir, "solve-issue.yaml"), localYaml);
+
+    const result = await cmdThreadStart(storageRoot, "solve-issue", "prompt", projectRoot);
+
+    const uwf2 = await makeUwfStore(storageRoot);
+    const node = uwf2.store.get(result.workflow);
+    expect(node).not.toBeNull();
+    if (node !== null) {
+      expect((node.payload as WorkflowPayload).description).toBe("Test workflow (local)");
+    }
+  });
+});
+
+// ── Edge Cases ────────────────────────────────────────────────────────────────
+
+describe("Edge Cases", () => {
+  test("should treat '13-char-string.yaml' as file path, not CAS hash", async () => {
+    await makeUwfStore(storageRoot);
+    const fileName = "0123456789ABC.yaml"; // 13 chars + .yaml
+    await writeFile(join(projectRoot, fileName), await createWorkflowYaml("0123456789ABC"));
+
+    const result = await cmdThreadStart(storageRoot, fileName, "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+  });
+
+  test("should handle workflow names containing slashes as file paths", async () => {
+    await makeUwfStore(storageRoot);
+    const filePath = "subdir/solve-issue.yaml";
+    const fullPath = join(projectRoot, filePath);
+    await mkdir(join(projectRoot, "subdir"), { recursive: true });
+    await writeFile(fullPath, await createWorkflowYaml("solve-issue"));
+
+    const result = await cmdThreadStart(storageRoot, filePath, "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+  });
+
+  test("should handle absolute paths correctly", async () => {
+    await makeUwfStore(storageRoot);
+    const absPath = join(tmpDir, "abs-workflow.yaml");
+    await writeFile(absPath, await createWorkflowYaml("abs-workflow"));
+
+    const result = await cmdThreadStart(storageRoot, absPath, "prompt", projectRoot);
+
+    expect(result.workflow).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
+  });
+
+  test("should fail on empty workflow ID", async () => {
+    await makeUwfStore(storageRoot);
+
+    await expect(cmdThreadStart(storageRoot, "", "prompt", projectRoot)).rejects.toThrow();
+  });
+
+  test("should fail on whitespace-only workflow ID", async () => {
+    await makeUwfStore(storageRoot);
+
+    await expect(cmdThreadStart(storageRoot, "   ", "prompt", projectRoot)).rejects.toThrow();
+  });
+});
@@ -7,13 +7,16 @@ import {
  cmdCasGet,
  cmdCasHas,
  cmdCasPut,
+  cmdCasPutText,
  cmdCasRefs,
  cmdCasReindex,
  cmdCasSchemaGet,
  cmdCasSchemaList,
  cmdCasWalk,
 } from "./commands/cas.js";
+import { cmdLogClean, cmdLogList, cmdLogShow } from "./commands/log.js";
 import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js";
+import { cmdSkillCli } from "./commands/skill.js";
 import {
  cmdThreadFork,
  cmdThreadKill,
@@ -107,15 +110,21 @@ thread

 thread
  .command("step")
-  .description("Execute one step")
+  .description("Execute one or more steps")
  .argument("<thread-id>", "Thread ULID")
  .option("--agent <cmd>", "Override agent command")
-  .action((threadId: string, opts: { agent: string | undefined }) => {
+  .option("-c, --count <number>", "Number of steps to run (default: 1)")
+  .action((threadId: string, opts: { agent: string | undefined; count: string | undefined }) => {
    const storageRoot = resolveStorageRoot();
    runAction(async () => {
      const agentOverride = opts.agent ?? null;
-      const result = await cmdThreadStep(storageRoot, threadId, agentOverride);
-      writeOutput(result);
+      const count = opts.count !== undefined ? Number(opts.count) : 1;
+      const results = await cmdThreadStep(storageRoot, threadId, agentOverride, count);
+      if (results.length === 1) {
+        writeOutput(results[0]);
+      } else {
+        writeOutput(results);
+      }
    });
  });

@@ -220,6 +229,15 @@ thread
    });
  });

+const skill = program.command("skill").description("Built-in skill references for agents");
+
+skill
+  .command("cli")
+  .description("Print a markdown reference of all uwf commands")
+  .action(() => {
+    console.log(cmdSkillCli());
+  });
+
 program
  .command("setup")
  .description("Configure provider, model, and agent")
@@ -285,6 +303,17 @@ cas
    });
  });

+cas
+  .command("put-text")
+  .description("Store a plain text string, print its hash")
+  .argument("<text>", "Text content to store")
+  .action((text: string) => {
+    const storageRoot = resolveStorageRoot();
+    runAction(async () => {
+      writeOutput(await cmdCasPutText(storageRoot, text));
+    });
+  });
+
 cas
  .command("has")
  .description("Check if a hash exists")
@@ -351,6 +380,55 @@ casSchema
    });
  });

+const log = program.command("log").description("Process-level debug logs");
+
+log
+  .command("list")
+  .description("List log files with sizes")
+  .action(() => {
+    const storageRoot = resolveStorageRoot();
+    runAction(async () => {
+      const result = await cmdLogList(storageRoot);
+      writeOutput(result);
+    });
+  });
+
+log
+  .command("show")
+  .description("Show and filter log entries")
+  .option("--thread <thread-id>", "Filter by thread ID")
+  .option("--process <pid>", "Filter by process ID")
+  .option("--date <date>", "Filter by date (YYYY-MM-DD)")
+  .action(
+    (opts: {
+      thread: string | undefined;
+      process: string | undefined;
+      date: string | undefined;
+    }) => {
+      const storageRoot = resolveStorageRoot();
+      runAction(async () => {
+        const result = await cmdLogShow(storageRoot, {
+          thread: opts.thread ?? null,
+          process: opts.process ?? null,
+          date: opts.date ?? null,
+        });
+        writeOutput(result);
+      });
+    },
+  );
+
+log
+  .command("clean")
+  .description("Delete log files older than given date")
+  .requiredOption("--before <date>", "Delete files before this date (YYYY-MM-DD)")
+  .action((opts: { before: string }) => {
+    const storageRoot = resolveStorageRoot();
+    runAction(async () => {
+      const result = await cmdLogClean(storageRoot, opts.before);
+      writeOutput(result);
+    });
+  });
+
 program.parseAsync(process.argv).catch((e: unknown) => {
  const message = e instanceof Error ? e.message : String(e);
  process.stderr.write(`${message}\n`);
@@ -1,10 +1,12 @@
 import { readFileSync } from "node:fs";
 import { join } from "node:path";

-import type { Hash, JSONSchema, Store } from "@uncaged/json-cas";
-import { bootstrap, getSchema, refs, walk } from "@uncaged/json-cas";
+import type { JSONSchema, Store } from "@uncaged/json-cas";
+import { bootstrap, getSchema, putSchema, refs, walk } from "@uncaged/json-cas";
 import { createFsStore } from "@uncaged/json-cas-fs";

+import { TEXT_SCHEMA } from "../schemas.js";
+
 // ---- Helpers ----

 function openStore(storageRoot: string): Store {
@@ -121,3 +123,10 @@ export async function cmdCasSchemaGet(storageRoot: string, hash: string): Promis
  }
  return schema;
 }
+
+export async function cmdCasPutText(storageRoot: string, text: string): Promise<{ hash: string }> {
+  const store = openStore(storageRoot);
+  const typeHash = await putSchema(store, TEXT_SCHEMA);
+  const hash = await store.put(typeHash, text);
+  return { hash };
+}
@@ -0,0 +1,116 @@
+import { readdir, readFile, stat, unlink } from "node:fs/promises";
+import { join } from "node:path";
+
+type LogListItem = {
+  name: string;
+  size: number;
+  date: string;
+};
+
+type LogShowFilter = {
+  thread: string | null;
+  process: string | null;
+  date: string | null;
+};
+
+type LogEntry = {
+  ts: string;
+  pid: string;
+  tag: string;
+  msg: string;
+  thread: string | null;
+  workflow: string | null;
+};
+
+type LogCleanResult = {
+  deleted: number;
+};
+
+function logsDir(storageRoot: string): string {
+  return join(storageRoot, "logs");
+}
+
+async function listLogFiles(dir: string): Promise<Array<string>> {
+  try {
+    const files = await readdir(dir);
+    return files.filter((f) => f.endsWith(".jsonl")).sort();
+  } catch {
+    return [];
+  }
+}
+
+function dateFromFilename(name: string): string {
+  return name.replace(".jsonl", "");
+}
+
+async function parseJsonlFile(path: string): Promise<Array<LogEntry>> {
+  const content = await readFile(path, "utf-8");
+  const lines = content
+    .trim()
+    .split("\n")
+    .filter((l) => l.length > 0);
+  return lines.map((line) => JSON.parse(line) as LogEntry);
+}
+
+export async function cmdLogList(storageRoot: string): Promise<Array<LogListItem>> {
+  const dir = logsDir(storageRoot);
+  const files = await listLogFiles(dir);
+  const items: Array<LogListItem> = [];
+  for (const name of files) {
+    const s = await stat(join(dir, name));
+    items.push({ name, size: s.size, date: dateFromFilename(name) });
+  }
+  // sort by date descending
+  items.sort((a, b) => (a.date > b.date ? -1 : a.date < b.date ? 1 : 0));
+  return items;
+}
+
+export async function cmdLogShow(
+  storageRoot: string,
+  filter: LogShowFilter,
+): Promise<Array<LogEntry>> {
+  const dir = logsDir(storageRoot);
+  let files: Array<string>;
+
+  if (filter.date !== null) {
+    files = [`${filter.date}.jsonl`];
+  } else {
+    files = await listLogFiles(dir);
+  }
+
+  let entries: Array<LogEntry> = [];
+  for (const file of files) {
+    try {
+      const parsed = await parseJsonlFile(join(dir, file));
+      entries = entries.concat(parsed);
+    } catch {
+      // file doesn't exist or is unreadable, skip
+    }
+  }
+
+  if (filter.thread !== null) {
+    entries = entries.filter((e) => e.thread === filter.thread);
+  }
+  if (filter.process !== null) {
+    entries = entries.filter((e) => e.pid === filter.process);
+  }
+
+  entries.sort((a, b) => (a.ts < b.ts ? -1 : a.ts > b.ts ? 1 : 0));
+  return entries;
+}
+
+export async function cmdLogClean(storageRoot: string, before: string): Promise<LogCleanResult> {
+  const dir = logsDir(storageRoot);
+  const files = await listLogFiles(dir);
+  let deleted = 0;
+
+  for (const name of files) {
+    const date = dateFromFilename(name);
+    if (date < before) {
+      await unlink(join(dir, name));
+      deleted++;
+    }
+  }
+
+  return { deleted };
+}
@@ -1,11 +1,46 @@
 import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
-import { homedir } from "node:os";
-import { join, resolve } from "node:path";
+import { join } from "node:path";
 import { stdin as input, stdout as output } from "node:process";
 import { createInterface } from "node:readline/promises";
-
+import type { Result } from "@uncaged/workflow-util";
 import { parse, stringify } from "yaml";

+/**
+ * Send a minimal chat completion request to verify the model is reachable.
+ * Returns ok on 2xx, error with reason string otherwise.
+ */
+export async function validateModel(
+  baseUrl: string,
+  apiKey: string,
+  model: string,
+): Promise<Result<void, string>> {
+  try {
+    const url = `${baseUrl.replace(/\/+$/, "")}/chat/completions`;
+    const res = await fetch(url, {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        model,
+        messages: [{ role: "user", content: "hi" }],
+        max_tokens: 1,
+      }),
+      signal: AbortSignal.timeout(15_000),
+    });
+    if (!res.ok) {
+      return { ok: false, error: `HTTP ${res.status} ${res.statusText}` };
+    }
+    return { ok: true, value: undefined };
+  } catch (err: unknown) {
+    if (err instanceof DOMException && err.name === "AbortError") {
+      return { ok: false, error: "Request timed out — model endpoint unreachable" };
+    }
+    return { ok: false, error: `Network error — could not reach endpoint (${String(err)})` };
+  }
+}
+
 /**
 * Preset provider list — embedded to avoid runtime YAML loading dependency.
 * Keep in sync with providers.yaml in cli-workflow.
@@ -102,6 +137,75 @@ function apiKeyEnvName(providerName: string): string {
  return `${providerName.toUpperCase().replace(/[^A-Z0-9]/g, "_")}_API_KEY`;
 }

+/**
+ * Discover uwf-* agent binaries in PATH.
+ * Returns sorted list of binary names (e.g., ["uwf-hermes", "uwf-claude-code"]).
+ */
+async function _discoverAgents(): Promise<string[]> {
+  try {
+    // Use which -a to find all uwf-* binaries in PATH
+    const proc = Bun.spawn(["which", "-a", "uwf-hermes", "uwf-claude-code", "uwf-cursor"], {
+      stdout: "pipe",
+      stderr: "pipe",
+    });
+
+    const text = await new Response(proc.stdout).text();
+    await proc.exited;
+
+    if (proc.exitCode !== 0) {
+      // Try alternative approach: search PATH directories manually
+      const pathEnv = process.env.PATH || "";
+      const pathDirs = pathEnv.split(":").filter((d) => d.length > 0);
+      const agents = new Set<string>();
+
+      for (const dir of pathDirs) {
+        try {
+          if (!existsSync(dir)) continue;
+          const { readdirSync, statSync } = await import("node:fs");
+          const entries = readdirSync(dir);
+
+          for (const entry of entries) {
+            if (!entry.startsWith("uwf-") || entry === "uwf") continue;
+            const fullPath = join(dir, entry);
+            try {
+              const stat = statSync(fullPath);
+              // Check if executable (owner, group, or other has execute bit)
+              if (stat.isFile() && (stat.mode & 0o111) !== 0) {
+                agents.add(entry);
+              }
+            } catch {
+              // Skip if can't stat
+            }
+          }
+        } catch {
+          // Skip inaccessible directories
+        }
+      }
+
+      return Array.from(agents).sort();
+    }
+
+    // Parse which output - each line is a path to a binary
+    const paths = text
+      .trim()
+      .split("\n")
+      .filter((line) => line.length > 0);
+    const agents = new Set<string>();
+
+    for (const path of paths) {
+      const basename = path.split("/").pop();
+      if (basename?.startsWith("uwf-") && basename !== "uwf") {
+        agents.add(basename);
+      }
+    }
+
+    return Array.from(agents).sort();
+  } catch {
+    // If all fails, return empty array
+    return [];
+  }
+}
+
 /**
 * Merge setup args into config.yaml structure. Non-destructive — preserves existing entries.
 */
@@ -164,12 +268,16 @@ export async function cmdSetup(args: SetupArgs): Promise<Record<string, unknown>
  envData[envName] = args.apiKey;
  saveEnvFile(envPath, envData);

+  // Validate model connectivity
+  const validation = await validateModel(args.baseUrl, args.apiKey, args.model);
+
  return {
    configPath,
    envPath,
    provider: args.provider,
    model: args.model,
    defaultAgent: merged.defaultAgent,
+    validation,
  };
 }

@@ -329,7 +437,7 @@ export async function cmdSetupInteractive(storageRoot: string): Promise<Record<s

    console.log(`  → ${providerName}/${model}\n`);

-    await cmdSetup({
+    const setupResult = await cmdSetup({
      provider: providerName,
      baseUrl,
      apiKey,
@@ -337,6 +445,19 @@ export async function cmdSetupInteractive(storageRoot: string): Promise<Record<s
      storageRoot,
    });

+    // Show validation result
+    if (setupResult.validation && typeof setupResult.validation === "object") {
+      const v = setupResult.validation as { ok: boolean; error?: string };
+      if (v.ok) {
+        console.log("✓ Model verified — connection successful.\n");
+      } else {
+        console.log(`\n⚠ Warning: Could not reach model — ${v.error}`);
+        console.log(
+          "  Config saved, but you may want to try a different model or check your API key.\n",
+        );
+      }
+    }
+
    console.log("Setup complete! Get started:\n");
    console.log("  uwf workflow put <workflow.yaml>   Register a workflow");
    console.log('  uwf thread start <name> -p "..."   Start a thread');
@@ -0,0 +1 @@
+export { generateCliReference as cmdSkillCli } from "@uncaged/workflow-util";
@@ -1,5 +1,6 @@
 import { execFileSync } from "node:child_process";
-import { readFile } from "node:fs/promises";
+import { access, readFile } from "node:fs/promises";
+import { dirname, isAbsolute, resolve as resolvePath } from "node:path";
 import type { Store as CasStore, JSONSchema } from "@uncaged/json-cas";
 import { getSchema, validate } from "@uncaged/json-cas";
 import { getEnvPath, loadWorkflowConfig } from "@uncaged/workflow-agent-kit";
@@ -23,19 +24,17 @@ import type {
  WorkflowConfig,
  WorkflowPayload,
 } from "@uncaged/workflow-protocol";
-import { generateUlid } from "@uncaged/workflow-util";
+import { createProcessLogger, generateUlid, type ProcessLogger } from "@uncaged/workflow-util";
 import { config as loadDotenv } from "dotenv";
 import { parse, stringify } from "yaml";

 import {
  appendThreadHistory,
  createUwfStore,
-  discoverProjectWorkflows,
  findThreadInHistory,
  loadThreadHistory,
  loadThreadsIndex,
  loadWorkflowRegistry,
-  resolveProjectWorkflowFile,
  resolveWorkflowHash,
  saveThreadsIndex,
  type ThreadHistoryLine,
@@ -47,6 +46,18 @@ import { materializeWorkflowPayload } from "./workflow.js";
 const END_ROLE = "$END";
 export const THREAD_READ_DEFAULT_QUOTA = 4000;

+const PL_THREAD_START = "7HNQ4B2X";
+const PL_MODERATOR = "M3K8V9T1";
+const PL_AGENT_SPAWN = "R5J2W8N4";
+const PL_AGENT_DONE = "C6P9E3H7";
+const PL_THREAD_ARCHIVED = "F4D8Q2K5";
+const PL_STEP_ERROR = "B8T5N1V6";
+
+function failStep(plog: ProcessLogger, message: string): never {
+  plog.log(PL_STEP_ERROR, message, null);
+  fail(message);
+}
+
 type ChainState = {
  startHash: CasRef;
  start: StartNodePayload;
@@ -70,6 +81,83 @@ function fail(message: string): never {
  process.exit(1);
 }

+/**
+ * Check if a string looks like a file path (contains path separators or has .yaml/.yml extension).
+ */
+function isFilePath(input: string): boolean {
+  return (
+    input.includes("/") || input.includes("\\") || input.endsWith(".yaml") || input.endsWith(".yml")
+  );
+}
+
+/**
+ * Check if a workflow file exists at the given path.
+ */
+async function workflowFileExists(dir: string, name: string, ext: string): Promise<string | null> {
+  const candidate = resolvePath(dir, `${name}${ext}`);
+  try {
+    await access(candidate);
+    return candidate;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Search for a workflow file in a given directory (checks both .workflow/ and .workflows/).
+ */
+async function findWorkflowInDir(dir: string, name: string): Promise<string | null> {
+  // Check .workflow/ directory first (preferred)
+  for (const ext of [".yaml", ".yml"]) {
+    const result = await workflowFileExists(resolvePath(dir, ".workflow"), name, ext);
+    if (result !== null) {
+      return result;
+    }
+  }
+
+  // Check .workflows/ directory as fallback (legacy)
+  for (const ext of [".yaml", ".yml"]) {
+    const result = await workflowFileExists(resolvePath(dir, ".workflows"), name, ext);
+    if (result !== null) {
+      return result;
+    }
+  }
+
+  return null;
+}
+
+/**
+ * Traverse parent directories looking for `.workflow/<name>.yaml` or `.workflow/<name>.yml`.
+ * Returns the absolute path if found, otherwise null.
+ * Stops at filesystem root or .git directory.
+ */
+async function findWorkflowInParents(startDir: string, name: string): Promise<string | null> {
+  let currentDir = resolvePath(startDir);
+  const root = resolvePath("/");
+
+  while (true) {
+    const found = await findWorkflowInDir(currentDir, name);
+    if (found !== null) {
+      return found;
+    }
+
+    // Stop at filesystem root
+    if (currentDir === root) {
+      break;
+    }
+
+    // Move to parent directory
+    const parentDir = dirname(currentDir);
+    if (parentDir === currentDir) {
+      // Reached filesystem root
+      break;
+    }
+    currentDir = parentDir;
+  }
+
+  return null;
+}
+
 async function materializeLocalWorkflow(uwf: UwfStore, filePath: string): Promise<CasRef> {
  let text: string;
  try {
@@ -111,18 +199,41 @@ async function resolveWorkflowCasRef(
  workflowId: string,
  projectRoot: string,
 ): Promise<CasRef> {
-  // Project-local resolution: check .workflows/<workflowId>.yaml first
-  const localEntries = await discoverProjectWorkflows(projectRoot);
-  const localFile = resolveProjectWorkflowFile(localEntries, workflowId);
-  if (localFile !== null) {
-    return materializeLocalWorkflow(uwf, localFile);
+  // Validate input
+  const trimmed = workflowId.trim();
+  if (trimmed === "") {
+    fail("workflow ID cannot be empty");
  }

-  // Global registry fallback
+  // Strategy 1: Direct CAS hash
+  if (isCasRef(trimmed)) {
+    const node = uwf.store.get(trimmed);
+    if (node === null) {
+      fail(`CAS node not found: ${trimmed}`);
+    }
+    if (node.type !== uwf.schemas.workflow) {
+      fail(`node ${trimmed} is not a Workflow (type ${node.type})`);
+    }
+    return trimmed;
+  }
+
+  // Strategy 2: Explicit file path (relative or absolute)
+  if (isFilePath(trimmed)) {
+    const absolutePath = isAbsolute(trimmed) ? trimmed : resolvePath(projectRoot, trimmed);
+    return materializeLocalWorkflow(uwf, absolutePath);
+  }
+
+  // Strategy 3: Local discovery (parent directory traversal)
+  const localPath = await findWorkflowInParents(projectRoot, trimmed);
+  if (localPath !== null) {
+    return materializeLocalWorkflow(uwf, localPath);
+  }
+
+  // Strategy 4: Global registry fallback
  const registry = await loadWorkflowRegistry(storageRoot);
-  const hash = resolveWorkflowHash(registry, workflowId);
+  const hash = resolveWorkflowHash(registry, trimmed);
  if (!isCasRef(hash)) {
-    fail(`workflow not found: ${workflowId}`);
+    fail(`workflow not found: ${trimmed}`);
  }
  const node = uwf.store.get(hash);
  if (node === null) {
@@ -168,6 +279,10 @@ export async function cmdThreadStart(
  const workflowHash = await resolveWorkflowCasRef(uwf, storageRoot, workflowId, projectRoot);

  const threadId = generateUlid(Date.now()) as ThreadId;
+  const plog = createProcessLogger({
+    storageRoot,
+    context: { thread: threadId, workflow: workflowHash },
+  });
  const startPayload: StartNodePayload = {
    workflow: workflowHash,
    prompt,
@@ -183,6 +298,12 @@ export async function cmdThreadStart(
  index[threadId] = headHash;
  await saveThreadsIndex(storageRoot, index);

+  plog.log(
+    PL_THREAD_START,
+    `thread created workflow=${workflowHash} thread=${threadId} head=${headHash}`,
+    null,
+  );
+
  return { workflow: workflowHash, thread: threadId };
 }

@@ -418,7 +539,7 @@ function collectOrderedSteps(
 }

 function formatYaml(value: unknown): string {
-  return stringify(value).trimEnd();
+  return stringify(value, { aliasDuplicateObjects: false }).trimEnd();
 }

 function formatCompactStep(index: number, item: OrderedStepItem, outputYaml: string): string {
@@ -572,6 +693,7 @@ function buildModeratorContext(uwf: UwfStore, chain: ChainState): ModeratorConte
    output: expandOutput(uwf, step.output),
    detail: step.detail,
    agent: step.agent,
+    edgePrompt: step.edgePrompt ?? "",
  }));
  return { start: chain.start, steps };
 }
@@ -624,30 +746,38 @@ function resolveAgentConfig(
  return agentConfig;
 }

-function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): CasRef {
+function spawnAgent(
+  plog: ProcessLogger,
+  agent: AgentConfig,
+  threadId: ThreadId,
+  role: string,
+  edgePrompt: string,
+): CasRef {
  const argv = [...agent.args, threadId, role];
+  const env = { ...process.env, UWF_EDGE_PROMPT: edgePrompt };
  let stdout: string;
  try {
    stdout = execFileSync(agent.command, argv, {
      encoding: "utf8",
-      env: process.env,
+      env,
      stdio: ["ignore", "pipe", "pipe"],
+      maxBuffer: 50 * 1024 * 1024, // 50 MB — stream-json output can be large
    });
  } catch (e) {
-    const err = e as NodeJS.ErrnoException & { stderr?: Buffer | string };
+    const err = e as NodeJS.ErrnoException & { stderr?: Buffer | string | null };
    const stderr =
-      err.stderr === undefined
+      err.stderr == null
        ? ""
        : typeof err.stderr === "string"
          ? err.stderr
          : err.stderr.toString("utf8");
    const detail = stderr.trim() !== "" ? `: ${stderr.trim()}` : "";
-    fail(`agent command failed (${agent.command})${detail}`);
+    failStep(plog, `agent command failed (${agent.command})${detail}`);
  }

  const line = stdout.trim().split("\n").pop()?.trim() ?? "";
  if (!isCasRef(line)) {
-    fail(`agent stdout is not a valid CAS hash: ${line || "(empty)"}`);
+    failStep(plog, `agent stdout is not a valid CAS hash: ${line || "(empty)"}`);
  }
  return line;
 }
@@ -673,12 +803,54 @@ export async function cmdThreadStep(
  storageRoot: string,
  threadId: ThreadId,
  agentOverride: string | null,
-): Promise<StepOutput> {
+  count: number,
+): Promise<StepOutput[]> {
+  if (count < 1 || !Number.isInteger(count)) {
+    fail(`--count must be a positive integer, got: ${count}`);
+  }
+
+  const workflowHash = await resolveActiveThreadWorkflowHash(storageRoot, threadId);
+  const plog = createProcessLogger({
+    storageRoot,
+    context: { thread: threadId, workflow: workflowHash },
+  });
+
+  const results: StepOutput[] = [];
+  for (let i = 0; i < count; i++) {
+    const result = await cmdThreadStepOnce(storageRoot, threadId, agentOverride, plog);
+    results.push(result);
+    if (result.done) {
+      break;
+    }
+  }
+  return results;
+}
+
+async function resolveActiveThreadWorkflowHash(
+  storageRoot: string,
+  threadId: ThreadId,
+): Promise<CasRef> {
  const index = await loadThreadsIndex(storageRoot);
  const headHash = index[threadId];
  if (headHash === undefined) {
    fail(`thread not active: ${threadId}`);
  }
+  const uwf = await createUwfStore(storageRoot);
+  const chain = walkChain(uwf, headHash);
+  return chain.start.workflow;
+}
+
+async function cmdThreadStepOnce(
+  storageRoot: string,
+  threadId: ThreadId,
+  agentOverride: string | null,
+  plog: ProcessLogger,
+): Promise<StepOutput> {
+  const index = await loadThreadsIndex(storageRoot);
+  const headHash = index[threadId];
+  if (headHash === undefined) {
+    failStep(plog, `thread not active: ${threadId}`);
+  }

  const uwf = await createUwfStore(storageRoot);
  const chain = walkChain(uwf, headHash);
@@ -688,10 +860,17 @@ export async function cmdThreadStep(

  const nextResult = await evaluate(workflow, context);
  if (!nextResult.ok) {
-    fail(nextResult.error.message);
+    failStep(plog, `moderator evaluate failed: ${nextResult.error.message}`);
  }

-  if (nextResult.value === END_ROLE) {
+  plog.log(
+    PL_MODERATOR,
+    `moderator role=${nextResult.value.role} prompt=${nextResult.value.prompt}`,
+    null,
+  );
+
+  if (nextResult.value.role === END_ROLE) {
+    plog.log(PL_THREAD_ARCHIVED, `thread archived head=${headHash}`, null);
    await archiveThread(storageRoot, threadId, workflowHash, headHash);
    return {
      workflow: workflowHash,
@@ -701,18 +880,25 @@ export async function cmdThreadStep(
    };
  }

-  const role = nextResult.value;
+  const role = nextResult.value.role;
+  const edgePrompt = nextResult.value.prompt;
  const config = await loadWorkflowConfig(storageRoot);
  const agent = resolveAgentConfig(config, workflow, role, agentOverride);

+  plog.log(PL_AGENT_SPAWN, `spawning agent command=${agent.command}`, {
+    args: [...agent.args, threadId, role].join(" "),
+  });
+
  loadDotenv({ path: getEnvPath(storageRoot) });
-  const newHead = spawnAgent(agent, threadId, role);
+  const newHead = spawnAgent(plog, agent, threadId, role, edgePrompt);
+
+  plog.log(PL_AGENT_DONE, `agent returned head=${newHead}`, null);

  // Re-create store to pick up nodes written by the agent subprocess
  const uwfAfter = await createUwfStore(storageRoot);
  const newNode = uwfAfter.store.get(newHead);
  if (newNode === null || newNode.type !== uwfAfter.schemas.stepNode) {
-    fail(`agent returned hash that is not a StepNode: ${newHead}`);
+    failStep(plog, `agent returned hash that is not a StepNode: ${newHead}`);
  }

  // Reload threads index to avoid overwriting changes made by the agent subprocess
@@ -724,11 +910,12 @@ export async function cmdThreadStep(
  const contextAfter = buildModeratorContext(uwfAfter, chainAfter);
  const afterResult = await evaluate(workflow, contextAfter);
  if (!afterResult.ok) {
-    fail(afterResult.error.message);
+    failStep(plog, `post-step moderator evaluate failed: ${afterResult.error.message}`);
  }

-  const done = afterResult.value === END_ROLE;
+  const done = afterResult.value.role === END_ROLE;
  if (done) {
+    plog.log(PL_THREAD_ARCHIVED, `thread archived head=${newHead}`, null);
    await archiveThread(storageRoot, threadId, workflowHash, newHead);
  }

@@ -2,7 +2,12 @@ import { readFile } from "node:fs/promises";

 import type { JSONSchema } from "@uncaged/json-cas";
 import { putSchema, validate } from "@uncaged/json-cas";
-import type { CasRef, RoleDefinition, WorkflowPayload } from "@uncaged/workflow-protocol";
+import type {
+  CasRef,
+  RoleDefinition,
+  Transition,
+  WorkflowPayload,
+} from "@uncaged/workflow-protocol";
 import { parse } from "yaml";

 import {
@@ -46,11 +51,34 @@ function isJsonSchema(value: unknown): value is JSONSchema {
  return typeof value === "object" && value !== null && !Array.isArray(value);
 }

-async function resolveMetaRef(uwf: UwfStore, roleName: string, meta: unknown): Promise<CasRef> {
-  if (!isJsonSchema(meta)) {
-    fail(`role "${roleName}": meta must be a JSON Schema object`);
+/** Normalize graph transitions: ensure condition is null (not undefined) for fallback entries. */
+function normalizeGraph(graph: Record<string, Transition[]>): Record<string, Transition[]> {
+  const result: Record<string, Transition[]> = {};
+  for (const [node, transitions] of Object.entries(graph)) {
+    result[node] = transitions.map((t) => {
+      if (typeof t.prompt !== "string" || t.prompt.trim() === "") {
+        fail(`graph[${node}] transition to "${t.role}": prompt is required (non-empty string)`);
+      }
+      return {
+        role: t.role,
+        condition: t.condition ?? null,
+        prompt: t.prompt,
+      };
+    });
  }
-  const schema: JSONSchema = meta.title === undefined ? { ...meta, title: roleName } : meta;
+  return result;
+}
+
+async function resolveFrontmatterRef(
+  uwf: UwfStore,
+  roleName: string,
+  frontmatter: unknown,
+): Promise<CasRef> {
+  if (!isJsonSchema(frontmatter)) {
+    fail(`role "${roleName}": frontmatter must be a JSON Schema object`);
+  }
+  const schema: JSONSchema =
+    frontmatter.title === undefined ? { ...frontmatter, title: roleName } : frontmatter;
  return putSchema(uwf.store, schema);
 }

@@ -60,14 +88,18 @@ export async function materializeWorkflowPayload(
 ): Promise<WorkflowPayload> {
  const roles: Record<string, RoleDefinition> = {};
  for (const [roleName, role] of Object.entries(raw.roles)) {
-    const meta = await resolveMetaRef(uwf, `${raw.name}.${roleName}`, role.meta);
+    const frontmatter = await resolveFrontmatterRef(
+      uwf,
+      `${raw.name}.${roleName}`,
+      role.frontmatter,
+    );
    roles[roleName] = {
      description: role.description,
      goal: role.goal,
      capabilities: role.capabilities,
      procedure: role.procedure,
      output: role.output,
-      meta,
+      frontmatter,
    };
  }
  return {
@@ -75,7 +107,7 @@ export async function materializeWorkflowPayload(
    description: raw.description,
    roles,
    conditions: raw.conditions,
-    graph: raw.graph,
+    graph: normalizeGraph(raw.graph),
  };
 }

@@ -7,6 +7,6 @@ export function formatOutput(data: unknown, format: OutputFormat): string {
    case "json":
      return JSON.stringify(data);
    case "yaml":
-      return stringify(data).trimEnd();
+      return stringify(data, { aliasDuplicateObjects: false }).trimEnd();
  }
 }
@@ -2,10 +2,13 @@ import type { Hash, Store } from "@uncaged/json-cas";
 import { putSchema } from "@uncaged/json-cas";
 import { START_NODE_SCHEMA, STEP_NODE_SCHEMA, WORKFLOW_SCHEMA } from "@uncaged/workflow-protocol";

+export const TEXT_SCHEMA = { type: "string" as const };
+
 export type UwfSchemaHashes = {
  workflow: Hash;
  startNode: Hash;
  stepNode: Hash;
+  text: Hash;
 };

 /**
@@ -13,10 +16,11 @@ export type UwfSchemaHashes = {
 * Idempotent: safe to call on every CLI invocation.
 */
 export async function registerUwfSchemas(store: Store): Promise<UwfSchemaHashes> {
-  const [workflow, startNode, stepNode] = await Promise.all([
+  const [workflow, startNode, stepNode, text] = await Promise.all([
    putSchema(store, WORKFLOW_SCHEMA),
    putSchema(store, START_NODE_SCHEMA),
    putSchema(store, STEP_NODE_SCHEMA),
+    putSchema(store, TEXT_SCHEMA),
  ]);
-  return { workflow, startNode, stepNode };
+  return { workflow, startNode, stepNode, text };
 }
@@ -15,8 +15,8 @@ function isRoleDefinition(value: unknown): boolean {
  if (!isRecord(value)) {
    return false;
  }
-  const meta = value.meta;
-  const metaOk = isRecord(meta) && typeof meta.type === "string";
+  const frontmatter = value.frontmatter;
+  const frontmatterOk = isRecord(frontmatter) && typeof frontmatter.type === "string";
  const capabilities = value.capabilities;
  const capabilitiesOk =
    Array.isArray(capabilities) && capabilities.every((c) => typeof c === "string");
@@ -26,7 +26,7 @@ function isRoleDefinition(value: unknown): boolean {
    capabilitiesOk &&
    typeof value.procedure === "string" &&
    typeof value.output === "string" &&
-    metaOk
+    frontmatterOk
  );
 }

@@ -42,7 +42,12 @@ function isTransition(value: unknown): boolean {
    return false;
  }
  const condition = value.condition;
-  return typeof value.role === "string" && (condition === null || typeof condition === "string");
+  return (
+    typeof value.role === "string" &&
+    typeof value.prompt === "string" &&
+    value.prompt.trim() !== "" &&
+    (condition === null || condition === undefined || typeof condition === "string")
+  );
 }

 function isStringRecord(value: unknown, itemCheck: (item: unknown) => boolean): boolean {
@@ -0,0 +1,16 @@
+import { describe, expect, test } from "bun:test";
+
+import type { LlmToolCall } from "../src/llm/types.js";
+
+/** Mirror OpenAI response shape for parser coverage via chatCompletionWithTools integration later. */
+describe("LlmToolCall shape", () => {
+  test("tool call record fields", () => {
+    const call: LlmToolCall = {
+      id: "call_1",
+      name: "read_file",
+      arguments: '{"path":"README.md"}',
+    };
+    expect(call.name).toBe("read_file");
+    expect(JSON.parse(call.arguments)).toEqual({ path: "README.md" });
+  });
+});
@@ -0,0 +1,21 @@
+import { describe, expect, test } from "bun:test";
+import { resolve } from "node:path";
+import { resolvePath } from "../src/tools/path.js";
+
+describe("resolvePath", () => {
+  test("resolves relative paths against cwd", () => {
+    const root = "/workspace/project";
+    const resolved = resolvePath(root, "src/foo.ts");
+    expect(resolved).toBe(resolve(root, "src/foo.ts"));
+  });
+
+  test("resolves absolute paths as-is", () => {
+    const resolved = resolvePath("/workspace", "/etc/hosts");
+    expect(resolved).toBe("/etc/hosts");
+  });
+
+  test("resolves parent traversal normally", () => {
+    const resolved = resolvePath("/workspace/project", "../other/file.ts");
+    expect(resolved).toBe(resolve("/workspace/project", "../other/file.ts"));
+  });
+});
@@ -0,0 +1,236 @@
+import { describe, expect, test } from "bun:test";
+
+import type { AgentContext } from "@uncaged/workflow-agent-kit";
+
+import { buildBuiltinMessages } from "../src/prompt.js";
+
+function minimalContext(overrides: Partial<AgentContext> = {}): AgentContext {
+  return {
+    threadId: "00000000000000000000000000" as AgentContext["threadId"],
+    role: "developer",
+    store: {} as AgentContext["store"],
+    workflow: {
+      name: "test",
+      description: "test workflow",
+      roles: {
+        developer: {
+          description: "Developer role",
+          goal: "Ship the fix",
+          capabilities: ["file-edit"],
+          procedure: "Edit files",
+          output: "A patch",
+          frontmatter: "schema-hash",
+        },
+      },
+      conditions: {},
+      graph: {},
+    },
+    start: { workflow: "wf-hash", prompt: "Fix the bug" },
+    steps: [],
+    outputFormatInstruction: "---\nstatus: done\n---",
+    edgePrompt: "Implement the fix described in the plan.",
+    isFirstVisit: true,
+    ...overrides,
+  };
+}
+
+describe("buildBuiltinMessages", () => {
+  test("system includes output format and role goal", () => {
+    const messages = buildBuiltinMessages(minimalContext());
+    const system = messages[0];
+    expect(system?.role).toBe("system");
+    if (system?.role === "system") {
+      expect(system.content).toContain("status: done");
+      expect(system.content).toContain("## Goal");
+      expect(system.content).toContain("Ship the fix");
+    }
+  });
+
+  test("first visit produces system + single user message with edge prompt", () => {
+    const messages = buildBuiltinMessages(minimalContext());
+    expect(messages).toHaveLength(2);
+    expect(messages[1]?.role).toBe("user");
+    if (messages[1]?.role === "user") {
+      expect(messages[1].content).toContain("Implement the fix");
+      expect(messages[1].content).not.toContain("## What Happened Since Your Last Turn");
+    }
+  });
+
+  test("first visit with prior steps includes inter-step summary in final user message", () => {
+    const messages = buildBuiltinMessages(
+      minimalContext({
+        steps: [
+          {
+            role: "planner",
+            output: { plan: "step 1" },
+            agent: "uwf-builtin",
+            detail: "detail-hash",
+            edgePrompt: "Create a plan.",
+          },
+        ],
+      }),
+    );
+    expect(messages).toHaveLength(2);
+    const finalUser = messages[1];
+    if (finalUser?.role === "user") {
+      expect(finalUser.content).toContain("Implement the fix");
+      expect(finalUser.content).toContain("## What Happened Since Your Last Turn");
+      expect(finalUser.content).toContain("planner");
+    }
+  });
+
+  test("re-entry reconstructs prior user/assistant turns plus current user message", () => {
+    const messages = buildBuiltinMessages(
+      minimalContext({
+        isFirstVisit: false,
+        edgePrompt: "Fix the reviewer's feedback.",
+        steps: [
+          {
+            role: "developer",
+            output: { summary: "Initial fix" },
+            agent: "uwf-builtin",
+            detail: "detail-1",
+            edgePrompt: "Implement the fix.",
+          },
+          {
+            role: "reviewer",
+            output: { approved: false, comments: "Missing tests" },
+            agent: "uwf-builtin",
+            detail: "detail-2",
+            edgePrompt: "Review the implementation.",
+          },
+        ],
+      }),
+    );
+
+    expect(messages).toHaveLength(4);
+    expect(messages[0]?.role).toBe("system");
+    expect(messages[1]?.role).toBe("user");
+    expect(messages[2]?.role).toBe("assistant");
+    expect(messages[3]?.role).toBe("user");
+
+    if (messages[1]?.role === "user") {
+      expect(messages[1].content).toBe("Implement the fix.");
+    }
+    if (messages[2]?.role === "assistant") {
+      expect(messages[2].content).toBe(JSON.stringify({ summary: "Initial fix" }));
+    }
+    if (messages[3]?.role === "user") {
+      expect(messages[3].content).toContain("Fix the reviewer's feedback.");
+      expect(messages[3].content).toContain("## What Happened Since Your Last Turn");
+      expect(messages[3].content).toContain("reviewer");
+      expect(messages[3].content).toContain("Missing tests");
+    }
+  });
+
+  test("prefix is stable across re-entry for LLM cache hits", () => {
+    const firstVisitMessages = buildBuiltinMessages(
+      minimalContext({
+        edgePrompt: "Implement the fix.",
+        steps: [],
+      }),
+    );
+
+    const reEntryMessages = buildBuiltinMessages(
+      minimalContext({
+        isFirstVisit: false,
+        edgePrompt: "Fix the reviewer's feedback.",
+        steps: [
+          {
+            role: "developer",
+            output: { summary: "Initial fix" },
+            agent: "uwf-builtin",
+            detail: "detail-1",
+            edgePrompt: "Implement the fix.",
+          },
+          {
+            role: "reviewer",
+            output: { approved: false },
+            agent: "uwf-builtin",
+            detail: "detail-2",
+            edgePrompt: "Review the code.",
+          },
+        ],
+      }),
+    );
+
+    expect(reEntryMessages[0]).toEqual(firstVisitMessages[0]);
+    expect(reEntryMessages[1]).toEqual(firstVisitMessages[1]);
+    expect(reEntryMessages[2]?.role).toBe("assistant");
+    if (reEntryMessages[2]?.role === "assistant") {
+      expect(reEntryMessages[2].content).toBe(JSON.stringify({ summary: "Initial fix" }));
+    }
+    expect(reEntryMessages[3]?.role).toBe("user");
+    if (reEntryMessages[3]?.role === "user") {
+      expect(reEntryMessages[3].content).toContain("Fix the reviewer's feedback.");
+    }
+  });
+
+  test("multiple prior visits emit one user/assistant pair per visit", () => {
+    const messages = buildBuiltinMessages(
+      minimalContext({
+        isFirstVisit: false,
+        edgePrompt: "Third round fix.",
+        steps: [
+          {
+            role: "developer",
+            output: { round: 1 },
+            agent: "uwf-builtin",
+            detail: "d1",
+            edgePrompt: "First attempt.",
+          },
+          {
+            role: "reviewer",
+            output: { approved: false },
+            agent: "uwf-builtin",
+            detail: "d2",
+            edgePrompt: "Review round 1.",
+          },
+          {
+            role: "developer",
+            output: { round: 2 },
+            agent: "uwf-builtin",
+            detail: "d3",
+            edgePrompt: "Second attempt.",
+          },
+          {
+            role: "reviewer",
+            output: { approved: false },
+            agent: "uwf-builtin",
+            detail: "d4",
+            edgePrompt: "Review round 2.",
+          },
+        ],
+      }),
+    );
+
+    expect(messages).toHaveLength(6);
+    expect(messages.map((m) => m.role)).toEqual([
+      "system",
+      "user",
+      "assistant",
+      "user",
+      "assistant",
+      "user",
+    ]);
+
+    if (messages[1]?.role === "user") {
+      expect(messages[1].content).toBe("First attempt.");
+    }
+    if (messages[2]?.role === "assistant") {
+      expect(messages[2].content).toBe(JSON.stringify({ round: 1 }));
+    }
+    if (messages[3]?.role === "user") {
+      expect(messages[3].content).toContain("Second attempt.");
+      expect(messages[3].content).toContain("reviewer");
+    }
+    if (messages[4]?.role === "assistant") {
+      expect(messages[4].content).toBe(JSON.stringify({ round: 2 }));
+    }
+    if (messages[5]?.role === "user") {
+      expect(messages[5].content).toContain("Third round fix.");
+      expect(messages[5].content).toContain("### Step 4: reviewer");
+      expect(messages[5].content).toContain('"approved":false');
+    }
+  });
+});
@@ -0,0 +1,34 @@
+{
+  "name": "@uncaged/workflow-agent-builtin",
+  "version": "0.5.0",
+  "files": [
+    "src",
+    "dist",
+    "package.json"
+  ],
+  "type": "module",
+  "bin": {
+    "uwf-builtin": "./src/cli.ts"
+  },
+  "exports": {
+    ".": {
+      "bun": "./src/index.ts",
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js"
+    }
+  },
+  "scripts": {
+    "test": "bun test"
+  },
+  "dependencies": {
+    "@uncaged/json-cas": "^0.4.0",
+    "@uncaged/workflow-agent-kit": "workspace:^",
+    "@uncaged/workflow-util": "workspace:^"
+  },
+  "devDependencies": {
+    "typescript": "^5.8.3"
+  },
+  "publishConfig": {
+    "access": "public"
+  }
+}
@@ -0,0 +1,140 @@
+import type { Store } from "@uncaged/json-cas";
+import {
+  type AgentContext,
+  type AgentRunResult,
+  createAgent,
+  loadWorkflowConfig,
+  resolveModel,
+  resolveStorageRoot,
+} from "@uncaged/workflow-agent-kit";
+import { createLogger, generateUlid } from "@uncaged/workflow-util";
+
+import { storeBuiltinDetail } from "./detail.js";
+import type { ChatMessage } from "./llm/index.js";
+import { BUILTIN_CONTINUE_MAX_TURNS, BUILTIN_MAX_TURNS, runBuiltinLoop } from "./loop.js";
+import { buildBuiltinMessages } from "./prompt.js";
+import { initSessionDir, removeSession } from "./session.js";
+
+const log = createLogger({ sink: { kind: "stderr" } });
+
+type SessionRecord = {
+  sessionId: string;
+  model: string;
+  startedAtMs: number;
+  messages: ChatMessage[];
+};
+
+const sessions = new Map<string, SessionRecord>();
+
+function getSession(sessionId: string): SessionRecord {
+  const session = sessions.get(sessionId);
+  if (session === undefined) {
+    throw new Error(`builtin session not found: ${sessionId}`);
+  }
+  return session;
+}
+
+function buildToolContext(storageRoot: string): { cwd: string; storageRoot: string } {
+  return {
+    cwd: process.cwd(),
+    storageRoot,
+  };
+}
+
+async function runBuiltinWithMessages(
+  storageRoot: string,
+  provider: ReturnType<typeof resolveModel>,
+  messages: ChatMessage[],
+  session: SessionRecord,
+  store: Store,
+  maxTurns: number,
+): Promise<AgentRunResult> {
+  const loopResult = await runBuiltinLoop({
+    provider,
+    messages,
+    toolCtx: buildToolContext(storageRoot),
+    maxTurns,
+    storageRoot,
+    sessionId: session.sessionId,
+  });
+
+  session.messages = loopResult.messages;
+
+  if (loopResult.turnCount === 0) {
+    log("5RWTK9NB", "no turns produced, returning empty output");
+    await removeSession(storageRoot, session.sessionId);
+    return { output: "", detailHash: "", sessionId: session.sessionId };
+  }
+
+  // Read jsonl → persist turns to CAS → store detail
+  const { detailHash } = await storeBuiltinDetail(
+    store,
+    storageRoot,
+    session.sessionId,
+    session.model,
+    session.startedAtMs,
+  );
+
+  // Clean up session jsonl
+  await removeSession(storageRoot, session.sessionId);
+
+  return { output: loopResult.finalText, detailHash, sessionId: session.sessionId };
+}
+
+async function runBuiltin(ctx: AgentContext): Promise<AgentRunResult> {
+  const storageRoot = resolveStorageRoot();
+  const config = await loadWorkflowConfig(storageRoot);
+  const provider = resolveModel(config, config.defaultModel);
+
+  const sessionId = generateUlid(Date.now());
+  await initSessionDir(storageRoot);
+  const messages = buildBuiltinMessages(ctx);
+
+  const session: SessionRecord = {
+    sessionId,
+    model: provider.model,
+    startedAtMs: Date.now(),
+    messages,
+  };
+  sessions.set(sessionId, session);
+
+  return runBuiltinWithMessages(
+    storageRoot,
+    provider,
+    messages,
+    session,
+    ctx.store,
+    BUILTIN_MAX_TURNS,
+  );
+}
+
+async function continueBuiltin(
+  sessionId: string,
+  message: string,
+  store: Store,
+): Promise<AgentRunResult> {
+  const session = getSession(sessionId);
+  const storageRoot = resolveStorageRoot();
+  const config = await loadWorkflowConfig(storageRoot);
+  const provider = resolveModel(config, config.defaultModel);
+
+  const messages: ChatMessage[] = [...session.messages, { role: "user", content: message }];
+
+  return runBuiltinWithMessages(
+    storageRoot,
+    provider,
+    messages,
+    session,
+    store,
+    BUILTIN_CONTINUE_MAX_TURNS,
+  );
+}
+
+/** Agent CLI factory: built-in LLM loop with file/shell tools. */
+export function createBuiltinAgent(): () => Promise<void> {
+  return createAgent({
+    name: "builtin",
+    run: runBuiltin,
+    continue: continueBuiltin,
+  });
+}
@@ -0,0 +1,6 @@
+#!/usr/bin/env bun
+
+import { createBuiltinAgent } from "./agent.js";
+
+const main = createBuiltinAgent();
+void main();
@@ -0,0 +1,49 @@
+import { bootstrap, putSchema, type Store } from "@uncaged/json-cas";
+
+import { BUILTIN_DETAIL_SCHEMA, BUILTIN_TURN_SCHEMA } from "./schemas.js";
+import { readSessionTurns } from "./session.js";
+import type { BuiltinDetailPayload } from "./types.js";
+
+type BuiltinSchemaHashes = {
+  turn: string;
+  detail: string;
+};
+
+export async function registerBuiltinSchemas(store: Store): Promise<BuiltinSchemaHashes> {
+  await bootstrap(store);
+  const [turn, detail] = await Promise.all([
+    putSchema(store, BUILTIN_TURN_SCHEMA),
+    putSchema(store, BUILTIN_DETAIL_SCHEMA),
+  ]);
+  return { turn, detail };
+}
+
+/** Read session jsonl, persist each turn to CAS, return detail hash. */
+export async function storeBuiltinDetail(
+  store: Store,
+  storageRoot: string,
+  sessionId: string,
+  model: string,
+  startedAtMs: number,
+  nowMs: number = Date.now(),
+): Promise<{ detailHash: string; turnCount: number }> {
+  const schemas = await registerBuiltinSchemas(store);
+  const turns = await readSessionTurns(storageRoot, sessionId);
+
+  const turnHashes: string[] = [];
+  for (const turn of turns) {
+    const hash = await store.put(schemas.turn, turn);
+    turnHashes.push(hash);
+  }
+
+  const duration = Math.max(0, nowMs - startedAtMs);
+  const detail: BuiltinDetailPayload = {
+    sessionId,
+    model,
+    duration,
+    turnCount: turnHashes.length,
+    turns: turnHashes,
+  };
+  const detailHash = await store.put(schemas.detail, detail);
+  return { detailHash, turnCount: turnHashes.length };
+}
@@ -0,0 +1,16 @@
+export { createBuiltinAgent } from "./agent.js";
+export { registerBuiltinSchemas, storeBuiltinDetail } from "./detail.js";
+export type { ChatMessage, LlmAssistantResponse, LlmToolCall } from "./llm/index.js";
+export { chatCompletionWithTools } from "./llm/index.js";
+export { BUILTIN_CONTINUE_MAX_TURNS, BUILTIN_MAX_TURNS, runBuiltinLoop } from "./loop.js";
+export { buildBuiltinMessages } from "./prompt.js";
+export { appendSessionTurn, initSessionDir, readSessionTurns, removeSession } from "./session.js";
+export type { BuiltinTool, ToolContext } from "./tools/index.js";
+export { executeBuiltinTool, getBuiltinTools } from "./tools/index.js";
+export type {
+  BuiltinDetailPayload,
+  BuiltinLoopTurn,
+  BuiltinToolCallRecord,
+  BuiltinToolResultRecord,
+  BuiltinTurnPayload,
+} from "./types.js";
@@ -0,0 +1,7 @@
+export { chatCompletionWithTools } from "./llm.js";
+export type {
+  ChatMessage,
+  LlmAssistantResponse,
+  LlmToolCall,
+  OpenAiToolDefinition,
+} from "./types.js";
@@ -0,0 +1,135 @@
+import type { ResolvedLlmProvider } from "@uncaged/workflow-agent-kit";
+
+import type {
+  ChatMessage,
+  LlmAssistantResponse,
+  LlmToolCall,
+  OpenAiToolDefinition,
+} from "./types.js";
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+function chatUrl(baseUrl: string): string {
+  const trimmed = baseUrl.replace(/\/+$/, "");
+  return `${trimmed}/chat/completions`;
+}
+
+function parseToolCalls(raw: unknown): LlmToolCall[] | null {
+  if (!Array.isArray(raw) || raw.length === 0) {
+    return null;
+  }
+  const calls: LlmToolCall[] = [];
+  for (const entry of raw) {
+    if (!isRecord(entry)) {
+      continue;
+    }
+    const id = entry.id;
+    const fn = entry.function;
+    if (typeof id !== "string" || !isRecord(fn)) {
+      continue;
+    }
+    const name = fn.name;
+    const args = fn.arguments;
+    if (typeof name !== "string" || typeof args !== "string") {
+      continue;
+    }
+    calls.push({ id, name, arguments: args });
+  }
+  return calls.length > 0 ? calls : null;
+}
+
+function parseAssistantMessage(parsed: unknown): LlmAssistantResponse {
+  if (!isRecord(parsed)) {
+    throw new Error("LLM response is not an object");
+  }
+  const choices = parsed.choices;
+  if (!Array.isArray(choices) || choices.length === 0) {
+    throw new Error("LLM response has no choices");
+  }
+  const c0 = choices[0];
+  if (!isRecord(c0)) {
+    throw new Error("LLM choice is not an object");
+  }
+  const messageObj = c0.message;
+  if (!isRecord(messageObj)) {
+    throw new Error("LLM message is not an object");
+  }
+  const contentRaw = messageObj.content;
+  const content =
+    typeof contentRaw === "string"
+      ? contentRaw
+      : contentRaw === null || contentRaw === undefined
+        ? null
+        : null;
+  const toolCalls = parseToolCalls(messageObj.tool_calls);
+  return { content, toolCalls };
+}
+
+function serializeMessage(message: ChatMessage): Record<string, unknown> {
+  if (message.role === "tool") {
+    return {
+      role: "tool",
+      tool_call_id: message.tool_call_id,
+      content: message.content,
+    };
+  }
+  if (message.role === "assistant") {
+    const base: Record<string, unknown> = {
+      role: "assistant",
+      content: message.content,
+    };
+    if (message.tool_calls !== null && message.tool_calls.length > 0) {
+      base.tool_calls = message.tool_calls.map((call) => ({
+        id: call.id,
+        type: "function",
+        function: { name: call.name, arguments: call.arguments },
+      }));
+    }
+    return base;
+  }
+  return { role: message.role, content: message.content };
+}
+
+/** OpenAI-compatible chat completion with tool calling (non-streaming). */
+export async function chatCompletionWithTools(
+  provider: ResolvedLlmProvider,
+  messages: ChatMessage[],
+  tools: OpenAiToolDefinition[],
+): Promise<LlmAssistantResponse> {
+  let response: Response;
+  try {
+    response = await fetch(chatUrl(provider.baseUrl), {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${provider.apiKey}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        model: provider.model,
+        messages: messages.map(serializeMessage),
+        tools,
+        tool_choice: "auto",
+      }),
+    });
+  } catch (cause) {
+    const message = cause instanceof Error ? cause.message : String(cause);
+    throw new Error(`LLM network error: ${message}`);
+  }
+
+  const responseText = await response.text();
+  if (!response.ok) {
+    throw new Error(`LLM HTTP ${response.status}: ${responseText.slice(0, 2000)}`);
+  }
+
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(responseText) as unknown;
+  } catch (cause) {
+    const message = cause instanceof Error ? cause.message : String(cause);
+    throw new Error(`LLM invalid JSON response: ${message}`);
+  }
+
+  return parseAssistantMessage(parsed);
+}
@@ -0,0 +1,29 @@
+export type LlmToolCall = {
+  id: string;
+  name: string;
+  arguments: string;
+};
+
+export type LlmAssistantResponse = {
+  content: string | null;
+  toolCalls: LlmToolCall[] | null;
+};
+
+export type ChatMessage =
+  | { role: "system"; content: string }
+  | { role: "user"; content: string }
+  | {
+      role: "assistant";
+      content: string | null;
+      tool_calls: LlmToolCall[] | null;
+    }
+  | { role: "tool"; tool_call_id: string; content: string };
+
+export type OpenAiToolDefinition = {
+  type: "function";
+  function: {
+    name: string;
+    description: string;
+    parameters: Record<string, unknown>;
+  };
+};
@@ -0,0 +1,138 @@
+import type { ResolvedLlmProvider } from "@uncaged/workflow-agent-kit";
+import { createLogger } from "@uncaged/workflow-util";
+
+import { type ChatMessage, chatCompletionWithTools, type LlmToolCall } from "./llm/index.js";
+import { appendSessionTurn } from "./session.js";
+import {
+  builtinToolsToOpenAi,
+  executeBuiltinTool,
+  getBuiltinTools,
+  type ToolContext,
+} from "./tools/index.js";
+import type { BuiltinToolCall, BuiltinTurnPayload } from "./types.js";
+
+const log = createLogger({ sink: { kind: "stderr" } });
+
+export const BUILTIN_MAX_TURNS = 30;
+export const BUILTIN_CONTINUE_MAX_TURNS = 5;
+
+export type RunBuiltinLoopOptions = {
+  provider: ResolvedLlmProvider;
+  messages: ChatMessage[];
+  toolCtx: ToolContext;
+  maxTurns: number;
+  storageRoot: string;
+  sessionId: string;
+};
+
+export type RunBuiltinLoopResult = {
+  finalText: string;
+  messages: ChatMessage[];
+  turnCount: number;
+};
+
+function mapToolCallsForPayload(calls: LlmToolCall[]): BuiltinToolCall[] {
+  return calls.map((call) => ({
+    name: call.name,
+    args: call.arguments,
+  }));
+}
+
+async function appendTurn(
+  storageRoot: string,
+  sessionId: string,
+  payload: BuiltinTurnPayload,
+): Promise<void> {
+  await appendSessionTurn(storageRoot, sessionId, payload);
+}
+
+async function executeTurnTools(
+  calls: Array<{ id: string; name: string; arguments: string }>,
+  toolCtx: ToolContext,
+  messages: ChatMessage[],
+  storageRoot: string,
+  sessionId: string,
+): Promise<number> {
+  let turnCount = 0;
+  for (const call of calls) {
+    const result = await executeBuiltinTool(call.name, call.arguments, toolCtx);
+    messages.push({ role: "tool", tool_call_id: call.id, content: result });
+    await appendTurn(storageRoot, sessionId, {
+      role: "tool",
+      content: result,
+      toolCalls: null,
+      reasoning: null,
+    });
+    turnCount += 1;
+  }
+  return turnCount;
+}
+
+/** Agent run loop: LLM ↔ tools until no tool_calls or maxTurns. */
+export async function runBuiltinLoop(
+  options: RunBuiltinLoopOptions,
+): Promise<RunBuiltinLoopResult> {
+  const messages = [...options.messages];
+  const openAiTools = builtinToolsToOpenAi(getBuiltinTools());
+  let finalText = "";
+  let turnCount = 0;
+
+  for (let turn = 0; turn < options.maxTurns; turn++) {
+    log("8K2M4N7P", `builtin loop turn ${turn + 1}/${options.maxTurns}`);
+    const response = await chatCompletionWithTools(options.provider, messages, openAiTools);
+
+    const assistantMessage: ChatMessage = {
+      role: "assistant",
+      content: response.content,
+      tool_calls: response.toolCalls,
+    };
+    messages.push(assistantMessage);
+
+    if (response.toolCalls === null || response.toolCalls.length === 0) {
+      finalText = response.content ?? "";
+      await appendTurn(options.storageRoot, options.sessionId, {
+        role: "assistant",
+        content: response.content ?? "",
+        toolCalls: null,
+        reasoning: null,
+      });
+      turnCount += 1;
+      break;
+    }
+
+    // Assistant turn with tool calls
+    await appendTurn(options.storageRoot, options.sessionId, {
+      role: "assistant",
+      content: response.content ?? "",
+      toolCalls: mapToolCallsForPayload(response.toolCalls),
+      reasoning: null,
+    });
+    turnCount += 1;
+
+    // Execute tools
+    turnCount += await executeTurnTools(
+      response.toolCalls,
+      options.toolCtx,
+      messages,
+      options.storageRoot,
+      options.sessionId,
+    );
+  }
+
+  if (finalText === "" && messages.length > 0) {
+    for (let i = messages.length - 1; i >= 0; i--) {
+      const msg = messages[i];
+      if (
+        msg !== undefined &&
+        msg.role === "assistant" &&
+        msg.content !== null &&
+        msg.content.trim() !== ""
+      ) {
+        finalText = msg.content;
+        break;
+      }
+    }
+  }
+
+  return { finalText, messages, turnCount };
+}
@@ -0,0 +1,99 @@
+import { type AgentContext, buildRolePrompt } from "@uncaged/workflow-agent-kit";
+
+import type { ChatMessage } from "./llm/index.js";
+
+type StepContext = AgentContext["steps"][number];
+
+function formatStep(step: StepContext, stepNumber: number): string {
+  return [
+    `### Step ${stepNumber}: ${step.role}`,
+    `Output: ${JSON.stringify(step.output)}`,
+    `Agent: ${step.agent}`,
+  ].join("\n");
+}
+
+function buildStepsSummary(steps: StepContext[], fromIndex: number, toIndex: number): string {
+  if (fromIndex >= toIndex) {
+    return "";
+  }
+
+  const lines: string[] = ["## What Happened Since Your Last Turn"];
+  for (let i = fromIndex; i < toIndex; i++) {
+    const step = steps[i];
+    if (step === undefined) {
+      continue;
+    }
+    lines.push("");
+    lines.push(formatStep(step, i + 1));
+  }
+  return lines.join("\n");
+}
+
+function buildUserTurnContent(edgePrompt: string, summary: string): string {
+  const parts: string[] = [];
+  if (edgePrompt !== "") {
+    parts.push(edgePrompt);
+  }
+  if (summary !== "") {
+    if (parts.length > 0) {
+      parts.push("");
+    }
+    parts.push(summary);
+  }
+  return parts.join("\n");
+}
+
+/**
+ * Reconstruct multi-turn chat messages from thread history for cache-friendly session resume.
+ *
+ * - system: role prompt + output format (stable prefix)
+ * - For each prior visit of this role: user (edgePrompt + inter-step summary) + assistant (output JSON)
+ * - Final user: current edgePrompt + summary since last visit of this role
+ */
+export function buildBuiltinMessages(ctx: AgentContext): ChatMessage[] {
+  const roleDef = ctx.workflow.roles[ctx.role];
+  const rolePrompt = roleDef !== undefined ? buildRolePrompt(roleDef) : "";
+  const systemParts: string[] = [];
+  if (ctx.outputFormatInstruction !== "") {
+    systemParts.push(ctx.outputFormatInstruction, "");
+  }
+  systemParts.push(rolePrompt);
+
+  const messages: ChatMessage[] = [{ role: "system", content: systemParts.join("\n") }];
+
+  const roleVisitIndices: number[] = [];
+  for (let i = 0; i < ctx.steps.length; i++) {
+    const step = ctx.steps[i];
+    if (step !== undefined && step.role === ctx.role) {
+      roleVisitIndices.push(i);
+    }
+  }
+
+  let prevVisitIndex = -1;
+  for (const visitIndex of roleVisitIndices) {
+    const visitStep = ctx.steps[visitIndex];
+    if (visitStep === undefined) {
+      continue;
+    }
+
+    const summary = buildStepsSummary(ctx.steps, prevVisitIndex + 1, visitIndex);
+    messages.push({
+      role: "user",
+      content: buildUserTurnContent(visitStep.edgePrompt, summary),
+    });
+    messages.push({
+      role: "assistant",
+      content: JSON.stringify(visitStep.output),
+      tool_calls: null,
+    });
+    prevVisitIndex = visitIndex;
+  }
+
+  const finalSummary = buildStepsSummary(ctx.steps, prevVisitIndex + 1, ctx.steps.length);
+  messages.push({
+    role: "user",
+    content: buildUserTurnContent(ctx.edgePrompt, finalSummary),
+  });
+
+  return messages;
+}
@@ -0,0 +1,45 @@
+import type { JSONSchema } from "@uncaged/json-cas";
+
+const BUILTIN_TOOL_CALL_SCHEMA: JSONSchema = {
+  type: "object",
+  required: ["name", "args"],
+  properties: {
+    name: { type: "string" },
+    args: { type: "string" },
+  },
+  additionalProperties: false,
+};
+
+export const BUILTIN_TURN_SCHEMA: JSONSchema = {
+  title: "builtin-turn",
+  type: "object",
+  required: ["role", "content"],
+  properties: {
+    role: { type: "string", enum: ["assistant", "tool"] },
+    content: { type: "string" },
+    toolCalls: {
+      anyOf: [{ type: "array", items: BUILTIN_TOOL_CALL_SCHEMA }, { type: "null" }],
+    },
+    reasoning: {
+      anyOf: [{ type: "string" }, { type: "null" }],
+    },
+  },
+  additionalProperties: false,
+};
+
+export const BUILTIN_DETAIL_SCHEMA: JSONSchema = {
+  title: "builtin-detail",
+  type: "object",
+  required: ["sessionId", "model", "duration", "turnCount", "turns"],
+  properties: {
+    sessionId: { type: "string" },
+    model: { type: "string" },
+    duration: { type: "integer" },
+    turnCount: { type: "integer" },
+    turns: {
+      type: "array",
+      items: { type: "string", format: "cas_ref" },
+    },
+  },
+  additionalProperties: false,
+};
@@ -0,0 +1,59 @@
+import { appendFile, mkdir, readFile, rm } from "node:fs/promises";
+import { join } from "node:path";
+
+import { createLogger } from "@uncaged/workflow-util";
+
+import type { BuiltinTurnPayload } from "./types.js";
+
+const log = createLogger({ sink: { kind: "stderr" } });
+
+function sessionsDir(storageRoot: string): string {
+  return join(storageRoot, "sessions");
+}
+
+function sessionFile(storageRoot: string, sessionId: string): string {
+  return join(sessionsDir(storageRoot), `${sessionId}.jsonl`);
+}
+
+/** Ensure sessions directory exists. */
+export async function initSessionDir(storageRoot: string): Promise<void> {
+  await mkdir(sessionsDir(storageRoot), { recursive: true });
+}
+
+/** Append a turn to the session jsonl file. */
+export async function appendSessionTurn(
+  storageRoot: string,
+  sessionId: string,
+  turn: BuiltinTurnPayload,
+): Promise<void> {
+  const line = `${JSON.stringify(turn)}\n`;
+  await appendFile(sessionFile(storageRoot, sessionId), line, "utf-8");
+  log("3XQVN8KR", `session ${sessionId} appended ${turn.role} turn`);
+}
+
+/** Read all turns from session jsonl. Returns empty array if file does not exist. */
+export async function readSessionTurns(
+  storageRoot: string,
+  sessionId: string,
+): Promise<BuiltinTurnPayload[]> {
+  try {
+    const content = await readFile(sessionFile(storageRoot, sessionId), "utf-8");
+    const lines = content
+      .trim()
+      .split("\n")
+      .filter((l) => l.length > 0);
+    return lines.map((l) => JSON.parse(l) as BuiltinTurnPayload);
+  } catch {
+    return [];
+  }
+}
+
+/** Remove session jsonl file (called after detail is persisted to step CAS). */
+export async function removeSession(storageRoot: string, sessionId: string): Promise<void> {
+  try {
+    await rm(sessionFile(storageRoot, sessionId));
+    log("7FWDP2MJ", `session ${sessionId} removed`);
+  } catch {
+    // already gone — fine
+  }
+}
@@ -0,0 +1,44 @@
+import type { OpenAiToolDefinition } from "../llm/index.js";
+
+import { readFileTool } from "./read-file.js";
+import { runCommandTool } from "./run-command.js";
+import type { BuiltinTool, ToolContext } from "./types.js";
+import { writeFileTool } from "./write-file.js";
+
+export { resolvePath } from "./path.js";
+export type { BuiltinTool, ToolContext } from "./types.js";
+
+const BUILTIN_TOOLS: BuiltinTool[] = [readFileTool, writeFileTool, runCommandTool];
+
+export function getBuiltinTools(): readonly BuiltinTool[] {
+  return BUILTIN_TOOLS;
+}
+
+export function builtinToolsToOpenAi(tools: readonly BuiltinTool[]): OpenAiToolDefinition[] {
+  return tools.map((tool) => ({
+    type: "function",
+    function: {
+      name: tool.name,
+      description: tool.description,
+      parameters: tool.parameters as Record<string, unknown>,
+    },
+  }));
+}
+
+export async function executeBuiltinTool(
+  name: string,
+  argsJson: string,
+  ctx: ToolContext,
+): Promise<string> {
+  const tool = BUILTIN_TOOLS.find((t) => t.name === name);
+  if (tool === undefined) {
+    return `Error: unknown tool ${name}`;
+  }
+  let args: unknown;
+  try {
+    args = JSON.parse(argsJson) as unknown;
+  } catch {
+    return "Error: tool arguments must be valid JSON";
+  }
+  return tool.execute(args, ctx);
+}
@@ -0,0 +1,6 @@
+import { resolve } from "node:path";
+
+/** Resolve a path relative to the working directory. */
+export function resolvePath(cwd: string, inputPath: string): string {
+  return resolve(cwd, inputPath);
+}
@@ -0,0 +1,41 @@
+import { readFile, stat } from "node:fs/promises";
+import { resolvePath } from "./path.js";
+import type { BuiltinTool } from "./types.js";
+
+const MAX_READ_BYTES = 512 * 1024;
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+export const readFileTool: BuiltinTool = {
+  name: "read_file",
+  description: "Read a UTF-8 text file from the workspace.",
+  parameters: {
+    type: "object",
+    required: ["path"],
+    properties: {
+      path: { type: "string", description: "Relative or absolute path within the workspace." },
+    },
+    additionalProperties: false,
+  },
+  execute: async (args, ctx) => {
+    if (!isRecord(args) || typeof args.path !== "string") {
+      return "Error: path must be a string";
+    }
+    const resolved = resolvePath(ctx.cwd, args.path);
+    try {
+      const info = await stat(resolved);
+      if (!info.isFile()) {
+        return "Error: not a file";
+      }
+      if (info.size > MAX_READ_BYTES) {
+        return `Error: file exceeds ${MAX_READ_BYTES} byte limit`;
+      }
+      return await readFile(resolved, "utf8");
+    } catch (cause) {
+      const message = cause instanceof Error ? cause.message : String(cause);
+      return `Error: ${message}`;
+    }
+  },
+};
@@ -0,0 +1,95 @@
+import { spawn } from "node:child_process";
+import { resolvePath } from "./path.js";
+import type { BuiltinTool } from "./types.js";
+
+const COMMAND_TIMEOUT_MS = 60_000;
+const MAX_OUTPUT_CHARS = 32_000;
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+function truncate(text: string, maxChars: number): string {
+  if (text.length <= maxChars) {
+    return text;
+  }
+  return `${text.slice(0, maxChars)}\n...(truncated)`;
+}
+
+function runShell(
+  command: string,
+  cwd: string,
+): Promise<{ stdout: string; stderr: string; code: number }> {
+  return new Promise((resolve, reject) => {
+    const child = spawn(command, {
+      cwd,
+      env: process.env,
+      shell: true,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+
+    let stdout = "";
+    let stderr = "";
+    child.stdout?.on("data", (chunk: Buffer) => {
+      stdout += chunk.toString();
+    });
+    child.stderr?.on("data", (chunk: Buffer) => {
+      stderr += chunk.toString();
+    });
+
+    const timer = setTimeout(() => {
+      child.kill("SIGTERM");
+    }, COMMAND_TIMEOUT_MS);
+
+    child.on("error", (cause) => {
+      clearTimeout(timer);
+      const message = cause instanceof Error ? cause.message : String(cause);
+      reject(new Error(message));
+    });
+
+    child.on("close", (code) => {
+      clearTimeout(timer);
+      resolve({ stdout, stderr, code: code ?? 1 });
+    });
+  });
+}
+
+export const runCommandTool: BuiltinTool = {
+  name: "run_command",
+  description: "Run a shell command. Output is truncated to 32KB.",
+  parameters: {
+    type: "object",
+    required: ["command"],
+    properties: {
+      command: { type: "string", description: "Shell command to execute." },
+      cwd: {
+        type: "string",
+        description: "Optional working directory relative to workspace root.",
+      },
+    },
+    additionalProperties: false,
+  },
+  execute: async (args, ctx) => {
+    if (!isRecord(args) || typeof args.command !== "string") {
+      return "Error: command must be a string";
+    }
+    let workDir = ctx.cwd;
+    if (args.cwd !== undefined && args.cwd !== null) {
+      if (typeof args.cwd !== "string") {
+        return "Error: cwd must be a string";
+      }
+      workDir = resolvePath(ctx.cwd, args.cwd);
+    }
+    try {
+      const { stdout, stderr, code } = await runShell(args.command, workDir);
+      const out = truncate(
+        `exit_code: ${code}\n--- stdout ---\n${stdout}\n--- stderr ---\n${stderr}`,
+        MAX_OUTPUT_CHARS,
+      );
+      return out;
+    } catch (cause) {
+      const message = cause instanceof Error ? cause.message : String(cause);
+      return `Error: ${message}`;
+    }
+  },
+};
@@ -0,0 +1,13 @@
+import type { JSONSchema } from "@uncaged/json-cas";
+
+export type ToolContext = {
+  cwd: string;
+  storageRoot: string;
+};
+
+export type BuiltinTool = {
+  name: string;
+  description: string;
+  parameters: JSONSchema;
+  execute: (args: unknown, ctx: ToolContext) => Promise<string>;
+};
@@ -0,0 +1,36 @@
+import { mkdir, writeFile } from "node:fs/promises";
+import { dirname } from "node:path";
+import { resolvePath } from "./path.js";
+import type { BuiltinTool } from "./types.js";
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+export const writeFileTool: BuiltinTool = {
+  name: "write_file",
+  description: "Write UTF-8 text to a file in the workspace (creates parent directories).",
+  parameters: {
+    type: "object",
+    required: ["path", "content"],
+    properties: {
+      path: { type: "string", description: "Relative or absolute path within the workspace." },
+      content: { type: "string", description: "File contents to write." },
+    },
+    additionalProperties: false,
+  },
+  execute: async (args, ctx) => {
+    if (!isRecord(args) || typeof args.path !== "string" || typeof args.content !== "string") {
+      return "Error: path and content must be strings";
+    }
+    const resolved = resolvePath(ctx.cwd, args.path);
+    try {
+      await mkdir(dirname(resolved), { recursive: true });
+      await writeFile(resolved, args.content, "utf8");
+      return `Wrote ${args.content.length} bytes to ${args.path}`;
+    } catch (cause) {
+      const message = cause instanceof Error ? cause.message : String(cause);
+      return `Error: ${message}`;
+    }
+  },
+};
@@ -0,0 +1,49 @@
+import type { ChatMessage } from "./llm/index.js";
+
+export type BuiltinToolCallRecord = {
+  id: string;
+  name: string;
+  args: string;
+};
+
+export type BuiltinToolResultRecord = {
+  toolCallId: string;
+  name: string;
+  content: string;
+};
+
+export type BuiltinLoopTurn = {
+  assistantContent: string | null;
+  toolCalls: BuiltinToolCallRecord[] | null;
+  toolResults: BuiltinToolResultRecord[] | null;
+};
+
+export type BuiltinSessionState = {
+  sessionId: string;
+  model: string;
+  startedAtMs: number;
+  messages: ChatMessage[];
+  turns: BuiltinLoopTurn[];
+};
+
+export type BuiltinTurnRole = "assistant" | "tool";
+
+export type BuiltinToolCall = {
+  name: string;
+  args: string;
+};
+
+export type BuiltinTurnPayload = {
+  role: BuiltinTurnRole;
+  content: string;
+  toolCalls: BuiltinToolCall[] | null;
+  reasoning: string | null;
+};
+
+export type BuiltinDetailPayload = {
+  sessionId: string;
+  model: string;
+  duration: number;
+  turnCount: number;
+  turns: string[];
+};
@@ -0,0 +1,9 @@
+{
+  "extends": "../../tsconfig.json",
+  "compilerOptions": {
+    "rootDir": "src",
+    "outDir": "dist"
+  },
+  "include": ["src"],
+  "references": [{ "path": "../workflow-agent-kit" }, { "path": "../workflow-util" }]
+}
@@ -0,0 +1,71 @@
+import { describe, expect, test } from "bun:test";
+import type { AgentContext } from "@uncaged/workflow-agent-kit";
+import type { ThreadId } from "@uncaged/workflow-protocol";
+import { buildClaudeCodePrompt } from "../src/claude-code.js";
+
+function makeCtx(overrides: Partial<AgentContext> = {}): AgentContext {
+  return {
+    threadId: "01JTEST0000000000000000000" as ThreadId,
+    edgePrompt: "Proceed with the assigned role.",
+    isFirstVisit: true,
+    workflow: {
+      roles: {
+        developer: {
+          description: "TDD implementation per test spec",
+          goal: "Write code",
+          capabilities: ["coding"],
+          procedure: "1. Read spec\n2. Write code",
+          output: "List files changed",
+          frontmatter: "",
+        },
+      },
+      conditions: {},
+      graph: {},
+    },
+    role: "developer",
+    start: { prompt: "Fix the bug", workflowHash: "abc123", threadId: "t1" },
+    steps: [],
+    store: {} as AgentContext["store"],
+    outputFormatInstruction: "Use YAML frontmatter",
+    ...overrides,
+  };
+}
+
+describe("buildClaudeCodePrompt", () => {
+  test("assembles outputFormatInstruction + role prompt + task prompt", () => {
+    const result = buildClaudeCodePrompt(makeCtx());
+    expect(result).toMatch(/^Use YAML frontmatter/);
+    expect(result).toContain("Write code");
+    expect(result).toContain("## Task\nFix the bug");
+  });
+
+  test("includes previous steps as history summary", () => {
+    const ctx = makeCtx({
+      steps: [
+        {
+          role: "planner",
+          output: '{"plan":"do X"}',
+          agent: "hermes",
+          detail: "detail-1",
+          edgePrompt: "Create a plan.",
+        },
+      ],
+    });
+    const result = buildClaudeCodePrompt(ctx);
+    expect(result).toContain("## Previous Steps");
+    expect(result).toContain("Step 1: planner");
+    expect(result).toContain("do X");
+  });
+
+  test("omits history section when steps array is empty", () => {
+    const result = buildClaudeCodePrompt(makeCtx({ steps: [] }));
+    expect(result).not.toContain("## Previous Steps");
+  });
+
+  test("works without outputFormatInstruction", () => {
+    const result = buildClaudeCodePrompt(makeCtx({ outputFormatInstruction: "" }));
+    expect(result).not.toMatch(/^\s*\n/);
+    expect(result).toContain("Write code");
+    expect(result).toContain("## Task");
+  });
+});
@@ -0,0 +1,221 @@
+import { describe, expect, test } from "bun:test";
+import { createMemoryStore, walk } from "@uncaged/json-cas";
+import {
+  parseClaudeCodeJsonOutput,
+  parseClaudeCodeStreamOutput,
+  storeClaudeCodeDetail,
+  storeClaudeCodeRawOutput,
+} from "../src/session-detail.js";
+import type { ClaudeCodeParsedResult } from "../src/types.js";
+
+describe("parseClaudeCodeJsonOutput", () => {
+  test("parses valid claude -p --output-format json output", () => {
+    const stdout = JSON.stringify({
+      type: "result",
+      subtype: "success",
+      result: "Done fixing bug",
+      session_id: "75e2167f-abc",
+      num_turns: 3,
+      total_cost_usd: 0.08,
+      duration_ms: 10276,
+      stop_reason: "end_turn",
+      usage: { input_tokens: 100, output_tokens: 50 },
+    });
+    const parsed = parseClaudeCodeJsonOutput(stdout);
+    expect(parsed).not.toBeNull();
+    expect(parsed!.type).toBe("result");
+    expect(parsed!.subtype).toBe("success");
+    expect(parsed!.result).toBe("Done fixing bug");
+    expect(parsed!.sessionId).toBe("75e2167f-abc");
+    expect(parsed!.numTurns).toBe(3);
+    expect(parsed!.totalCostUsd).toBe(0.08);
+    expect(parsed!.durationMs).toBe(10276);
+    expect(parsed!.stopReason).toBe("end_turn");
+    expect(parsed!.usage.inputTokens).toBe(100);
+    expect(parsed!.usage.outputTokens).toBe(50);
+    expect(parsed!.turns).toEqual([]);
+  });
+
+  test("returns null for non-JSON output", () => {
+    const parsed = parseClaudeCodeJsonOutput("Some random text\nwithout JSON");
+    expect(parsed).toBeNull();
+  });
+
+  test("returns null when session_id is missing", () => {
+    const stdout = JSON.stringify({ type: "result", result: "hi", subtype: "success" });
+    const parsed = parseClaudeCodeJsonOutput(stdout);
+    expect(parsed).toBeNull();
+  });
+});
+
+describe("parseClaudeCodeStreamOutput", () => {
+  test("parses stream-json output with turns", () => {
+    const lines = [
+      JSON.stringify({
+        type: "system",
+        subtype: "init",
+        session_id: "sess-123",
+        model: "claude-sonnet-4.5",
+        tools: ["Bash", "Read"],
+      }),
+      JSON.stringify({
+        type: "assistant",
+        message: {
+          role: "assistant",
+          content: [
+            { type: "text", text: "I'll list the files." },
+            { type: "tool_use", id: "tool_1", name: "Bash", input: { command: "ls" } },
+          ],
+        },
+        session_id: "sess-123",
+      }),
+      JSON.stringify({
+        type: "user",
+        message: {
+          role: "user",
+          content: [{ type: "tool_result", tool_use_id: "tool_1", content: "file1.ts\nfile2.ts" }],
+        },
+        session_id: "sess-123",
+      }),
+      JSON.stringify({
+        type: "assistant",
+        message: {
+          role: "assistant",
+          content: [{ type: "text", text: "There are 2 files." }],
+        },
+        session_id: "sess-123",
+      }),
+      JSON.stringify({
+        type: "result",
+        subtype: "success",
+        result: "There are 2 files.",
+        session_id: "sess-123",
+        num_turns: 2,
+        total_cost_usd: 0.05,
+        duration_ms: 5000,
+        stop_reason: "end_turn",
+        usage: {
+          input_tokens: 200,
+          output_tokens: 30,
+          cache_read_input_tokens: 100,
+          cache_creation_input_tokens: 0,
+        },
+      }),
+    ];
+    const stdout = lines.join("\n");
+    const parsed = parseClaudeCodeStreamOutput(stdout);
+
+    expect(parsed).not.toBeNull();
+    expect(parsed!.model).toBe("claude-sonnet-4.5");
+    expect(parsed!.sessionId).toBe("sess-123");
+    expect(parsed!.result).toBe("There are 2 files.");
+    expect(parsed!.stopReason).toBe("end_turn");
+    expect(parsed!.usage.inputTokens).toBe(200);
+    expect(parsed!.usage.outputTokens).toBe(30);
+    expect(parsed!.usage.cacheReadInputTokens).toBe(100);
+
+    // Turns: assistant(text+tool), tool_result, assistant(text)
+    expect(parsed!.turns).toHaveLength(3);
+    expect(parsed!.turns[0]!.role).toBe("assistant");
+    expect(parsed!.turns[0]!.content).toBe("I'll list the files.");
+    expect(parsed!.turns[0]!.toolCalls).toHaveLength(1);
+    expect(parsed!.turns[0]!.toolCalls![0]!.name).toBe("Bash");
+    expect(parsed!.turns[1]!.role).toBe("tool_result");
+    expect(parsed!.turns[1]!.content).toBe("file1.ts\nfile2.ts");
+    expect(parsed!.turns[2]!.role).toBe("assistant");
+    expect(parsed!.turns[2]!.content).toBe("There are 2 files.");
+    expect(parsed!.turns[2]!.toolCalls).toBeNull();
+  });
+
+  test("returns null when no result line", () => {
+    const stdout = JSON.stringify({ type: "system", model: "test" });
+    expect(parseClaudeCodeStreamOutput(stdout)).toBeNull();
+  });
+
+  test("skips invalid JSON lines gracefully", () => {
+    const lines = [
+      "not json",
+      JSON.stringify({
+        type: "result",
+        subtype: "success",
+        result: "ok",
+        session_id: "s1",
+        num_turns: 1,
+        total_cost_usd: 0.01,
+        duration_ms: 1000,
+        stop_reason: "end_turn",
+        usage: {},
+      }),
+    ];
+    const parsed = parseClaudeCodeStreamOutput(lines.join("\n"));
+    expect(parsed).not.toBeNull();
+    expect(parsed!.result).toBe("ok");
+    expect(parsed!.turns).toHaveLength(0);
+  });
+});
+
+describe("storeClaudeCodeDetail", () => {
+  const baseParsed: ClaudeCodeParsedResult = {
+    type: "result",
+    subtype: "success",
+    result: "The answer",
+    sessionId: "abc-123",
+    numTurns: 5,
+    totalCostUsd: 0.12,
+    durationMs: 15000,
+    model: "claude-sonnet-4.5",
+    stopReason: "end_turn",
+    usage: {
+      inputTokens: 100,
+      outputTokens: 50,
+      cacheReadInputTokens: 0,
+      cacheCreationInputTokens: 0,
+    },
+    turns: [
+      { index: 0, role: "assistant", content: "hello", toolCalls: null },
+      { index: 1, role: "tool_result", content: "world", toolCalls: null },
+    ],
+  };
+
+  test("stores detail with per-turn CAS nodes", async () => {
+    const store = createMemoryStore();
+    const { detailHash, output, sessionId } = await storeClaudeCodeDetail(store, baseParsed);
+
+    expect(detailHash).toHaveLength(13);
+    expect(output).toBe("The answer");
+    expect(sessionId).toBe("abc-123");
+
+    const node = await store.get(detailHash);
+    expect(node).not.toBeNull();
+    expect(node!.payload.model).toBe("claude-sonnet-4.5");
+    expect(node!.payload.stopReason).toBe("end_turn");
+    expect(node!.payload.usage.inputTokens).toBe(100);
+    expect(node!.payload.turns).toHaveLength(2);
+
+    // Verify turn CAS nodes
+    const turn0 = await store.get(node!.payload.turns[0]);
+    expect(turn0).not.toBeNull();
+    expect(turn0!.payload.role).toBe("assistant");
+    expect(turn0!.payload.content).toBe("hello");
+  });
+
+  test("detail node is walkable from root", async () => {
+    const store = createMemoryStore();
+    const { detailHash } = await storeClaudeCodeDetail(store, baseParsed);
+    const visited: string[] = [];
+    walk(store, detailHash, (hash) => visited.push(hash));
+    expect(visited.length).toBeGreaterThan(0);
+  });
+});
+
+describe("storeClaudeCodeRawOutput", () => {
+  test("stores raw text when JSON parsing fails", async () => {
+    const store = createMemoryStore();
+    const rawText = "Claude produced plain text without JSON";
+    const hash = await storeClaudeCodeRawOutput(store, rawText);
+    expect(hash).toHaveLength(13);
+    const node = await store.get(hash);
+    expect(node).not.toBeNull();
+    expect(node!.payload.text).toBe(rawText);
+  });
+});
@@ -0,0 +1,33 @@
+{
+  "name": "@uncaged/workflow-agent-claude-code",
+  "version": "0.1.0",
+  "files": [
+    "src",
+    "dist",
+    "package.json"
+  ],
+  "type": "module",
+  "bin": {
+    "uwf-claude-code": "./src/cli.ts"
+  },
+  "exports": {
+    ".": {
+      "bun": "./src/index.ts",
+      "types": "./dist/index.d.ts",
+      "import": "./dist/index.js"
+    }
+  },
+  "scripts": {
+    "test": "bun test"
+  },
+  "dependencies": {
+    "@uncaged/json-cas": "^0.4.0",
+    "@uncaged/workflow-agent-kit": "workspace:^"
+  },
+  "devDependencies": {
+    "typescript": "^5.8.3"
+  },
+  "publishConfig": {
+    "access": "public"
+  }
+}
@@ -0,0 +1,195 @@
+import { spawn } from "node:child_process";
+import { mkdirSync, writeFileSync } from "node:fs";
+import type { Store } from "@uncaged/json-cas";
+import {
+  type AgentContext,
+  type AgentRunResult,
+  buildRolePrompt,
+  createAgent,
+  getCachedSessionId,
+  setCachedSessionId,
+} from "@uncaged/workflow-agent-kit";
+import { createLogger } from "@uncaged/workflow-util";
+
+import { parseClaudeCodeStreamOutput, storeClaudeCodeDetail } from "./session-detail.js";
+
+const log = createLogger({ sink: { kind: "stderr" } });
+
+const CLAUDE_COMMAND = "claude";
+const CLAUDE_MAX_TURNS = 90;
+
+function buildHistorySummary(steps: AgentContext["steps"]): string {
+  if (steps.length === 0) {
+    return "";
+  }
+
+  const lines: string[] = ["## Previous Steps"];
+  for (let i = 0; i < steps.length; i++) {
+    const step = steps[i];
+    if (step === undefined) {
+      continue;
+    }
+    lines.push("");
+    lines.push(`### Step ${i + 1}: ${step.role}`);
+    lines.push(`Output: ${JSON.stringify(step.output)}`);
+    lines.push(`Agent: ${step.agent}`);
+  }
+  return lines.join("\n");
+}
+
+/** Assemble system prompt, task, and prior step outputs for Claude Code. */
+export function buildClaudeCodePrompt(ctx: AgentContext): string {
+  const roleDef = ctx.workflow.roles[ctx.role];
+  const rolePrompt = roleDef !== undefined ? buildRolePrompt(roleDef) : "";
+  const parts: string[] = [];
+  if (ctx.outputFormatInstruction !== undefined && ctx.outputFormatInstruction !== "") {
+    parts.push(ctx.outputFormatInstruction, "");
+  }
+  parts.push(rolePrompt, "", "## Task", ctx.start.prompt);
+  const historyBlock = buildHistorySummary(ctx.steps);
+  if (historyBlock !== "") {
+    parts.push("", historyBlock);
+  }
+  parts.push("", "## Current Instruction", "", ctx.edgePrompt);
+  return parts.join("\n");
+}
+
+function spawnClaude(args: string[]): Promise<{ stdout: string; stderr: string }> {
+  return new Promise((resolve, reject) => {
+    const child = spawn(CLAUDE_COMMAND, args, {
+      env: process.env,
+      shell: false,
+      stdio: ["ignore", "pipe", "pipe"],
+    });
+
+    let stdout = "";
+    let stderr = "";
+    child.stdout?.on("data", (chunk: Buffer) => {
+      stdout += chunk.toString();
+    });
+    child.stderr?.on("data", (chunk: Buffer) => {
+      stderr += chunk.toString();
+    });
+
+    child.on("error", (cause) => {
+      const message = cause instanceof Error ? cause.message : String(cause);
+      reject(new Error(`claude spawn failed: ${message}`));
+    });
+
+    child.on("close", (code) => {
+      if (code === 0) {
+        resolve({ stdout, stderr });
+        return;
+      }
+      const detail = stderr.trim() !== "" ? ` stderr=${stderr.trim()}` : "";
+      reject(new Error(`claude exited with code ${code ?? "null"}${detail}`));
+    });
+  });
+}
+
+function spawnClaudeRun(prompt: string): Promise<{ stdout: string; stderr: string }> {
+  return spawnClaude([
+    "-p",
+    prompt,
+    "--output-format",
+    "stream-json",
+    "--verbose",
+    "--dangerously-skip-permissions",
+    "--max-turns",
+    String(CLAUDE_MAX_TURNS),
+  ]);
+}
+
+function spawnClaudeResume(
+  sessionId: string,
+  message: string,
+): Promise<{ stdout: string; stderr: string }> {
+  return spawnClaude([
+    "-p",
+    message,
+    "--resume",
+    sessionId,
+    "--output-format",
+    "stream-json",
+    "--verbose",
+    "--dangerously-skip-permissions",
+    "--max-turns",
+    String(CLAUDE_MAX_TURNS),
+  ]);
+}
+
+const NDJSON_DUMP_DIR = "/tmp/uwf-ndjson-dump";
+
+async function processClaudeOutput(stdout: string, store: Store): Promise<AgentRunResult> {
+  // Debug dump: save raw NDJSON for issue #439 investigation
+  try {
+    mkdirSync(NDJSON_DUMP_DIR, { recursive: true });
+    writeFileSync(`${NDJSON_DUMP_DIR}/${Date.now()}.ndjson`, stdout);
+  } catch {
+    // ignore dump failures
+  }
+
+  const parsed = parseClaudeCodeStreamOutput(stdout);
+
+  if (parsed !== null) {
+    const { detailHash, output, sessionId } = await storeClaudeCodeDetail(store, parsed);
+    return { output, detailHash, sessionId };
+  }
+
+  throw new Error(
+    `Claude Code returned unparseable output (first 200 chars): ${stdout.slice(0, 200)}`,
+  );
+}
+
+async function runClaudeCode(ctx: AgentContext): Promise<AgentRunResult> {
+  const fullPrompt = buildClaudeCodePrompt(ctx);
+
+  log("K7R2M4N8", `prompt for role=${ctx.role} (length=${fullPrompt.length}):\n${fullPrompt}`);
+
+  // Try resuming a cached session for re-entry scenarios (e.g. reviewer reject → developer re-entry).
+  if (!ctx.isFirstVisit) {
+    const cachedSessionId = await getCachedSessionId(ctx.threadId, ctx.role);
+    if (cachedSessionId !== null) {
+      try {
+        const { stdout } = await spawnClaudeResume(cachedSessionId, fullPrompt);
+        const result = await processClaudeOutput(stdout, ctx.store);
+        if (result.sessionId !== undefined && result.sessionId !== "") {
+          await setCachedSessionId(ctx.threadId, ctx.role, result.sessionId);
+        }
+        return result;
+      } catch (err) {
+        log(
+          "5VKR8N3Q",
+          "resume failed for session %s, falling back to fresh run: %s",
+          cachedSessionId,
+          err,
+        );
+      }
+    }
+  }
+
+  const { stdout } = await spawnClaudeRun(fullPrompt);
+  const result = await processClaudeOutput(stdout, ctx.store);
+  if (result.sessionId !== undefined && result.sessionId !== "") {
+    await setCachedSessionId(ctx.threadId, ctx.role, result.sessionId);
+  }
+  return result;
+}
+
+async function continueClaudeCode(
+  sessionId: string,
+  message: string,
+  store: Store,
+): Promise<AgentRunResult> {
+  const { stdout } = await spawnClaudeResume(sessionId, message);
+  return processClaudeOutput(stdout, store);
+}
+
+/** Agent CLI factory: parses argv, runs Claude Code, extracts output, writes StepNode. */
+export function createClaudeCodeAgent(): () => Promise<void> {
+  return createAgent({
+    name: "claude-code",
+    run: runClaudeCode,
+    continue: continueClaudeCode,
+  });
+}
@@ -0,0 +1,6 @@
+#!/usr/bin/env bun
+
+import { createClaudeCodeAgent } from "./claude-code.js";
+
+const main = createClaudeCodeAgent();
+void main();
@@ -0,0 +1,7 @@
+export { buildClaudeCodePrompt, createClaudeCodeAgent } from "./claude-code.js";
+export {
+  parseClaudeCodeJsonOutput,
+  parseClaudeCodeStreamOutput,
+  storeClaudeCodeDetail,
+  storeClaudeCodeRawOutput,
+} from "./session-detail.js";
@@ -0,0 +1,64 @@
+import type { JSONSchema } from "@uncaged/json-cas";
+
+export const CLAUDE_CODE_DETAIL_SCHEMA: JSONSchema = {
+  title: "claude-code-detail",
+  type: "object",
+  required: [
+    "sessionId",
+    "model",
+    "subtype",
+    "durationMs",
+    "numTurns",
+    "totalCostUsd",
+    "stopReason",
+    "usage",
+    "turns",
+  ],
+  properties: {
+    sessionId: { type: "string" },
+    model: { type: "string" },
+    subtype: { type: "string" },
+    durationMs: { type: "integer" },
+    numTurns: { type: "integer" },
+    totalCostUsd: { type: "number" },
+    stopReason: { type: "string" },
+    usage: {
+      type: "object",
+      properties: {
+        inputTokens: { type: "integer" },
+        outputTokens: { type: "integer" },
+        cacheReadInputTokens: { type: "integer" },
+        cacheCreationInputTokens: { type: "integer" },
+      },
+      required: ["inputTokens", "outputTokens", "cacheReadInputTokens", "cacheCreationInputTokens"],
+    },
+    turns: {
+      type: "array",
+      items: { type: "string" },
+    },
+  },
+  additionalProperties: false,
+};
+
+export const CLAUDE_CODE_TURN_SCHEMA: JSONSchema = {
+  title: "claude-code-turn",
+  type: "object",
+  required: ["index", "role", "content", "toolCalls"],
+  properties: {
+    index: { type: "integer" },
+    role: { type: "string" },
+    content: { type: "string" },
+    toolCalls: {},
+  },
+  additionalProperties: false,
+};
+
+export const CLAUDE_CODE_RAW_OUTPUT_SCHEMA: JSONSchema = {
+  title: "claude-code-raw-output",
+  type: "object",
+  required: ["text"],
+  properties: {
+    text: { type: "string" },
+  },
+  additionalProperties: false,
+};
@@ -0,0 +1,259 @@
+import { bootstrap, putSchema, type Store } from "@uncaged/json-cas";
+
+import {
+  CLAUDE_CODE_DETAIL_SCHEMA,
+  CLAUDE_CODE_RAW_OUTPUT_SCHEMA,
+  CLAUDE_CODE_TURN_SCHEMA,
+} from "./schemas.js";
+import type {
+  ClaudeCodeDetailPayload,
+  ClaudeCodeParsedResult,
+  ClaudeCodeToolCall,
+  ClaudeCodeTurnPayload,
+} from "./types.js";
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+function safeNumber(v: unknown, fallback = 0): number {
+  return typeof v === "number" ? v : fallback;
+}
+
+function safeString(v: unknown, fallback = ""): string {
+  return typeof v === "string" ? v : fallback;
+}
+
+/**
+ * Extract tool calls from an assistant message content array.
+ */
+function extractToolCalls(content: unknown[]): ClaudeCodeToolCall[] {
+  const calls: ClaudeCodeToolCall[] = [];
+  for (const item of content) {
+    if (isRecord(item) && item.type === "tool_use" && typeof item.name === "string") {
+      calls.push({
+        name: item.name,
+        input: typeof item.input === "string" ? item.input : JSON.stringify(item.input ?? {}),
+      });
+    }
+  }
+  return calls;
+}
+
+/**
+ * Extract text content from a message content array.
+ */
+function extractTextContent(content: unknown[]): string {
+  const texts: string[] = [];
+  for (const item of content) {
+    if (isRecord(item) && item.type === "text" && typeof item.text === "string") {
+      texts.push(item.text);
+    }
+  }
+  return texts.join("\n");
+}
+
+/**
+ * Extract tool result content from a user message content array.
+ */
+function extractToolResultContent(content: unknown[]): string {
+  const results: string[] = [];
+  for (const item of content) {
+    if (isRecord(item) && item.type === "tool_result") {
+      const text = typeof item.content === "string" ? item.content : "";
+      results.push(text);
+    }
+  }
+  return results.join("\n");
+}
+
+/**
+ * Parse Claude Code stream-json (NDJSON) output.
+ * Each line is a JSON object with type: "system" | "assistant" | "user" | "result".
+ */
+export function parseClaudeCodeStreamOutput(stdout: string): ClaudeCodeParsedResult | null {
+  const lines = stdout.trim().split("\n");
+  const turns: ClaudeCodeTurnPayload[] = [];
+  let resultLine: Record<string, unknown> | null = null;
+  let model = "";
+  let turnIndex = 0;
+
+  for (const line of lines) {
+    let parsed: unknown;
+    try {
+      parsed = JSON.parse(line);
+    } catch {
+      continue;
+    }
+    if (!isRecord(parsed)) continue;
+
+    const type = parsed.type;
+
+    if (type === "system" && typeof parsed.model === "string") {
+      model = parsed.model;
+    }
+
+    if (type === "assistant" && isRecord(parsed.message)) {
+      const msg = parsed.message;
+      const content = Array.isArray(msg.content) ? msg.content : [];
+      const textContent = extractTextContent(content as unknown[]);
+      const toolCalls = extractToolCalls(content as unknown[]);
+
+      // Only record turns that have actual content
+      if (textContent !== "" || toolCalls.length > 0) {
+        turns.push({
+          index: turnIndex++,
+          role: "assistant",
+          content: textContent,
+          toolCalls: toolCalls.length > 0 ? toolCalls : null,
+        });
+      }
+    }
+
+    if (type === "user" && isRecord(parsed.message)) {
+      const msg = parsed.message;
+      const content = Array.isArray(msg.content) ? msg.content : [];
+      const resultContent = extractToolResultContent(content as unknown[]);
+
+      if (resultContent !== "") {
+        turns.push({
+          index: turnIndex++,
+          role: "tool_result",
+          content: resultContent,
+          toolCalls: null,
+        });
+      }
+    }
+
+    if (type === "result") {
+      resultLine = parsed;
+    }
+  }
+
+  if (resultLine === null) return null;
+
+  const sessionId = resultLine.session_id;
+  const result = resultLine.result;
+  const subtype = resultLine.subtype;
+
+  if (typeof sessionId !== "string" || typeof result !== "string" || typeof subtype !== "string") {
+    return null;
+  }
+
+  const usage = isRecord(resultLine.usage) ? resultLine.usage : {};
+
+  return {
+    type: safeString(resultLine.type, "result"),
+    subtype: subtype as ClaudeCodeParsedResult["subtype"],
+    result,
+    sessionId,
+    numTurns: safeNumber(resultLine.num_turns),
+    totalCostUsd: safeNumber(resultLine.total_cost_usd),
+    durationMs: safeNumber(resultLine.duration_ms),
+    model,
+    stopReason: safeString(resultLine.stop_reason),
+    usage: {
+      inputTokens: safeNumber(usage.input_tokens),
+      outputTokens: safeNumber(usage.output_tokens),
+      cacheReadInputTokens: safeNumber(usage.cache_read_input_tokens),
+      cacheCreationInputTokens: safeNumber(usage.cache_creation_input_tokens),
+    },
+    turns,
+  };
+}
+
+/**
+ * Legacy: parse Claude Code plain JSON output (non-streaming).
+ * Falls back when stream-json is not available.
+ */
+export function parseClaudeCodeJsonOutput(stdout: string): ClaudeCodeParsedResult | null {
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(stdout.trim());
+  } catch {
+    return null;
+  }
+
+  if (!isRecord(parsed)) return null;
+
+  const sessionId = parsed.session_id;
+  const result = parsed.result;
+  const subtype = parsed.subtype;
+
+  if (typeof sessionId !== "string" || typeof result !== "string" || typeof subtype !== "string") {
+    return null;
+  }
+
+  const usage = isRecord(parsed.usage) ? parsed.usage : {};
+
+  return {
+    type: safeString(parsed.type, "result"),
+    subtype: subtype as ClaudeCodeParsedResult["subtype"],
+    result,
+    sessionId,
+    numTurns: safeNumber(parsed.num_turns),
+    totalCostUsd: safeNumber(parsed.total_cost_usd),
+    durationMs: safeNumber(parsed.duration_ms),
+    model: "",
+    stopReason: safeString(parsed.stop_reason),
+    usage: {
+      inputTokens: safeNumber(usage.input_tokens),
+      outputTokens: safeNumber(usage.output_tokens),
+      cacheReadInputTokens: safeNumber(usage.cache_read_input_tokens),
+      cacheCreationInputTokens: safeNumber(usage.cache_creation_input_tokens),
+    },
+    turns: [],
+  };
+}
+
+type ClaudeCodeSchemaHashes = {
+  detail: string;
+  turn: string;
+  rawOutput: string;
+};
+
+async function registerSchemas(store: Store): Promise<ClaudeCodeSchemaHashes> {
+  await bootstrap(store);
+  const [detail, turn, rawOutput] = await Promise.all([
+    putSchema(store, CLAUDE_CODE_DETAIL_SCHEMA),
+    putSchema(store, CLAUDE_CODE_TURN_SCHEMA),
+    putSchema(store, CLAUDE_CODE_RAW_OUTPUT_SCHEMA),
+  ]);
+  return { detail, turn, rawOutput };
+}
+
+/** Store parsed Claude Code result with per-turn breakdown as CAS detail nodes. */
+export async function storeClaudeCodeDetail(
+  store: Store,
+  parsed: ClaudeCodeParsedResult,
+): Promise<{ detailHash: string; output: string; sessionId: string }> {
+  const schemas = await registerSchemas(store);
+
+  // Store each turn as an individual CAS node
+  const turnHashes: string[] = [];
+  for (const turn of parsed.turns) {
+    const hash = await store.put(schemas.turn, turn);
+    turnHashes.push(hash);
+  }
+
+  const detail: ClaudeCodeDetailPayload = {
+    sessionId: parsed.sessionId,
+    model: parsed.model,
+    subtype: parsed.subtype,
+    durationMs: parsed.durationMs,
+    numTurns: parsed.numTurns,
+    totalCostUsd: parsed.totalCostUsd,
+    stopReason: parsed.stopReason,
+    usage: parsed.usage,
+    turns: turnHashes,
+  };
+
+  const detailHash = await store.put(schemas.detail, detail);
+  return { detailHash, output: parsed.result, sessionId: parsed.sessionId };
+}
+
+/** Fallback: store raw text output when JSON parsing fails. */
+export async function storeClaudeCodeRawOutput(store: Store, rawOutput: string): Promise<string> {
+  const schemas = await registerSchemas(store);
+  return store.put(schemas.rawOutput, { text: rawOutput });
+}
@@ -0,0 +1,53 @@
+export type ClaudeCodeResultSubtype = "success" | "error_max_turns" | "error_budget";
+
+/** A single tool call within an assistant turn. */
+export type ClaudeCodeToolCall = {
+  name: string;
+  input: string;
+};
+
+/** A single turn (assistant text, tool use, or tool result). */
+export type ClaudeCodeTurnPayload = {
+  index: number;
+  role: "assistant" | "tool_result";
+  content: string;
+  toolCalls: ClaudeCodeToolCall[] | null;
+};
+
+/** Top-level detail stored as CAS node. */
+export type ClaudeCodeDetailPayload = {
+  sessionId: string;
+  model: string;
+  subtype: string;
+  durationMs: number;
+  numTurns: number;
+  totalCostUsd: number;
+  stopReason: string;
+  usage: {
+    inputTokens: number;
+    outputTokens: number;
+    cacheReadInputTokens: number;
+    cacheCreationInputTokens: number;
+  };
+  turns: string[]; // CAS hashes of ClaudeCodeTurnPayload
+};
+
+/** Intermediate parsed result from stream-json output. */
+export type ClaudeCodeParsedResult = {
+  type: string;
+  subtype: ClaudeCodeResultSubtype;
+  result: string;
+  sessionId: string;
+  numTurns: number;
+  totalCostUsd: number;
+  durationMs: number;
+  model: string;
+  stopReason: string;
+  usage: {
+    inputTokens: number;
+    outputTokens: number;
+    cacheReadInputTokens: number;
+    cacheCreationInputTokens: number;
+  };
+  turns: ClaudeCodeTurnPayload[];
+};
@@ -0,0 +1,6 @@
+{
+  "extends": "../../tsconfig.json",
+  "compilerOptions": { "rootDir": "src", "outDir": "dist" },
+  "include": ["src"],
+  "references": [{ "path": "../workflow-agent-kit" }]
+}
@@ -0,0 +1,78 @@
+import { afterEach, beforeEach, describe, expect, it } from "bun:test";
+
+import { HermesAcpClient } from "../src/acp-client.js";
+
+const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
+
+describe("HermesAcpClient", () => {
+  let client: HermesAcpClient;
+
+  beforeEach(() => {
+    client = new HermesAcpClient();
+  });
+
+  afterEach(async () => {
+    await client.close();
+  });
+
+  it(
+    "connect() returns a UUID sessionId",
+    async () => {
+      const sessionId = await client.connect(process.cwd());
+      expect(typeof sessionId).toBe("string");
+      expect(sessionId).toMatch(UUID_RE);
+    },
+    { timeout: 2 * 60 * 1000 },
+  );
+
+  it(
+    "prompt() returns a non-empty text response",
+    async () => {
+      await client.connect(process.cwd());
+      const result = await client.prompt("Reply with exactly the word: PONG");
+      expect(typeof result.text).toBe("string");
+      expect(result.text.length).toBeGreaterThan(0);
+      expect(typeof result.sessionId).toBe("string");
+      expect(result.sessionId).toMatch(UUID_RE);
+    },
+    { timeout: 2 * 60 * 1000 },
+  );
+
+  it(
+    "prompt() can be called twice on the same session (resume)",
+    async () => {
+      await client.connect(process.cwd());
+
+      const first = await client.prompt("Say the word ALPHA and nothing else.");
+      expect(first.text.length).toBeGreaterThan(0);
+
+      const second = await client.prompt("Now say the word BETA and nothing else.");
+      expect(second.text.length).toBeGreaterThan(0);
+
+      expect(first.sessionId).toBe(second.sessionId);
+    },
+    { timeout: 2 * 60 * 1000 },
+  );
+
+  // TODO(#435): flaky — depends on live LLM; mock or move to integration suite
+  it.skip(
+    "prompt() collects structured messages including tool calls",
+    async () => {
+      await client.connect(process.cwd());
+      const result = await client.prompt("Run this command: echo TOOL_DETAIL_TEST");
+      expect(result.messages.length).toBeGreaterThan(0);
+      // Should have at least one tool message (the echo command)
+      const toolMessages = result.messages.filter((m) => m.role === "tool");
+      expect(toolMessages.length).toBeGreaterThan(0);
+      // Tool message should contain the output
+      const toolContent = toolMessages[0]?.content ?? "";
+      expect(toolContent).toContain("TOOL_DETAIL_TEST");
+      // Should have assistant messages with tool_calls
+      const assistantWithTools = result.messages.filter(
+        (m) => m.role === "assistant" && m.tool_calls !== null,
+      );
+      expect(assistantWithTools.length).toBeGreaterThan(0);
+    },
+    { timeout: 2 * 60 * 1000 },
+  );
+});
@@ -0,0 +1,98 @@
+import { describe, expect, test } from "bun:test";
+import type { AgentContext } from "@uncaged/workflow-agent-kit";
+import type { ThreadId } from "@uncaged/workflow-protocol";
+import { buildHermesPrompt } from "../src/hermes.js";
+
+function makeCtx(overrides: Partial<AgentContext> = {}): AgentContext {
+  return {
+    threadId: "01JTEST0000000000000000000" as ThreadId,
+    edgePrompt: "Proceed with the assigned role.",
+    isFirstVisit: true,
+    workflow: {
+      roles: {
+        developer: {
+          description: "TDD implementation per test spec",
+          goal: "Write code",
+          capabilities: ["coding"],
+          procedure: "1. Read spec\n2. Write code",
+          output: "List files changed",
+          frontmatter: "",
+        },
+      },
+      conditions: {},
+      graph: {},
+    },
+    role: "developer",
+    start: { prompt: "Fix the bug", workflowHash: "abc123", threadId: "t1" },
+    steps: [],
+    store: {} as AgentContext["store"],
+    outputFormatInstruction: "Use YAML frontmatter",
+    ...overrides,
+  };
+}
+
+describe("buildHermesPrompt", () => {
+  test("first visit uses full role prompt and includes moderator instruction", () => {
+    const result = buildHermesPrompt(
+      makeCtx({ edgePrompt: "Focus on the failing test.", isFirstVisit: true }),
+    );
+
+    expect(result).toMatch(/^Use YAML frontmatter/);
+    expect(result).toContain("Write code");
+    expect(result).toContain("## Task\nFix the bug");
+    expect(result).toContain("## Moderator Instruction");
+    expect(result).toContain("Focus on the failing test.");
+  });
+
+  test("re-entry uses continuation prompt with edge instruction", () => {
+    const ctx = makeCtx({
+      isFirstVisit: false,
+      edgePrompt: "The reviewer rejected your work. Fix the issues.",
+      steps: [
+        {
+          role: "developer",
+          output: { summary: "Initial fix" },
+          agent: "uwf-hermes",
+          detail: "detail-1",
+          edgePrompt: "Implement the fix.",
+        },
+        {
+          role: "reviewer",
+          output: { approved: false },
+          agent: "uwf-hermes",
+          detail: "detail-2",
+          edgePrompt: "Review the code.",
+        },
+      ],
+    });
+
+    const result = buildHermesPrompt(ctx);
+
+    expect(result).not.toContain("## Task");
+    expect(result).toContain("## What Happened Since Your Last Turn");
+    expect(result).toContain("## Moderator Instruction");
+    expect(result).toContain("The reviewer rejected your work.");
+  });
+
+  test("forced first visit via isFirstVisit uses initial prompt even when role appears in history", () => {
+    const result = buildHermesPrompt(
+      makeCtx({
+        isFirstVisit: true,
+        steps: [
+          {
+            role: "developer",
+            output: { done: true },
+            agent: "uwf-hermes",
+            detail: "detail-1",
+            edgePrompt: "First attempt.",
+          },
+        ],
+        edgePrompt: "Retry with a fresh approach.",
+      }),
+    );
+
+    expect(result).toContain("## Task");
+    expect(result).toContain("Retry with a fresh approach.");
+    expect(result).not.toContain("## What Happened Since Your Last Turn");
+  });
+});
@@ -0,0 +1,57 @@
+import { afterEach, describe, expect, it } from "bun:test";
+
+import { HermesAcpClient } from "../src/acp-client.js";
+
+/**
+ * E2E test for cross-process session resume.
+ *
+ * Simulates the workflow re-entry scenario:
+ * 1. Client A: connect → prompt → close (developer first run)
+ * 2. Client B: resume(sessionId) → prompt (developer re-entry after reviewer reject)
+ *
+ * This is what happens when uwf thread step spawns uwf-hermes twice for the same role.
+ */
+describe("HermesAcpClient cross-process resume", () => {
+  const clients: HermesAcpClient[] = [];
+
+  afterEach(async () => {
+    for (const c of clients) {
+      await c.close();
+    }
+    clients.length = 0;
+  });
+
+  // TODO(#435): flaky — depends on live LLM; mock or move to integration suite
+  it.skip(
+    "resume() after close — second prompt returns non-empty text",
+    async () => {
+      // --- Client A: first run ---
+      const clientA = new HermesAcpClient();
+      clients.push(clientA);
+
+      await clientA.connect(process.cwd());
+      const first = await clientA.prompt(
+        "Remember the secret code: WATERMELON. Reply with exactly: ACKNOWLEDGED",
+      );
+      expect(first.text.length).toBeGreaterThan(0);
+      const sessionId = first.sessionId;
+
+      // Close client A (simulates uwf-hermes process exit)
+      await clientA.close();
+
+      // --- Client B: resume (simulates re-entry) ---
+      const clientB = new HermesAcpClient();
+      clients.push(clientB);
+
+      await clientB.resume(sessionId, process.cwd());
+      const second = await clientB.prompt(
+        "What was the secret code I told you earlier? Reply with just the code word.",
+      );
+
+      // The critical assertion: resumed session produces non-empty output
+      expect(second.text.length).toBeGreaterThan(0);
+      expect(second.sessionId).toBe(sessionId);
+    },
+    { timeout: 3 * 60 * 1000 },
+  );
+});
@@ -22,7 +22,9 @@
  },
  "dependencies": {
    "@uncaged/json-cas": "^0.4.0",
-    "@uncaged/workflow-agent-kit": "workspace:^"
+    "@uncaged/workflow-agent-kit": "workspace:^",
+    "@uncaged/workflow-protocol": "workspace:^",
+    "@uncaged/workflow-util": "workspace:^"
  },
  "devDependencies": {
    "typescript": "^5.8.3"
@@ -0,0 +1,392 @@
+import type { ChildProcess } from "node:child_process";
+import { spawn } from "node:child_process";
+import { createInterface } from "node:readline";
+
+import type { HermesSessionMessage } from "./types.js";
+
+const HERMES_COMMAND = "hermes";
+const PROTOCOL_VERSION = 1;
+
+type JsonRpcResponse = {
+  jsonrpc: "2.0";
+  id: number;
+  result?: unknown;
+  error?: { code: number; message: string };
+};
+
+type PendingRequest = {
+  resolve: (value: JsonRpcResponse) => void;
+  reject: (reason: Error) => void;
+};
+
+/** Tracks in-flight tool calls so we can build complete messages when they finish. */
+type PendingToolCall = {
+  name: string;
+  args: string;
+};
+
+export type AcpPromptResult = {
+  text: string;
+  sessionId: string;
+  messages: HermesSessionMessage[];
+};
+
+export class HermesAcpClient {
+  private process: ChildProcess | null = null;
+  private nextId = 1;
+  private sessionId: string | null = null;
+  private stderrBuffer = "";
+  private pending = new Map<number, PendingRequest>();
+
+  // Message collection state
+  private messageChunks: string[] = [];
+  private reasoningChunks: string[] = [];
+  private pendingTools = new Map<string, PendingToolCall>();
+  messages: HermesSessionMessage[] = [];
+
+  /** Spawn hermes acp, initialize, create session */
+  async connect(cwd: string): Promise<string> {
+    await this.ensureProcess();
+    await this.initialize();
+
+    const sessionResponse = (await this.sendRequest("session/new", {
+      cwd,
+      mcpServers: [],
+    })) as { result: { sessionId: string } };
+
+    const sessionId = sessionResponse.result?.sessionId;
+    if (typeof sessionId !== "string" || sessionId === "") {
+      throw new Error(`session/new did not return a sessionId: ${JSON.stringify(sessionResponse)}`);
+    }
+
+    this.sessionId = sessionId;
+    return sessionId;
+  }
+
+  /** Spawn hermes acp, initialize, resume an existing session */
+  async resume(sessionId: string, cwd: string): Promise<string> {
+    await this.ensureProcess();
+    await this.initialize();
+
+    const response = await this.sendRequest("session/resume", {
+      cwd,
+      sessionId,
+      mcpServers: [],
+    });
+
+    if ((response as { error?: unknown }).error !== undefined) {
+      throw new Error(
+        `session/resume failed: ${JSON.stringify((response as { error: unknown }).error)}`,
+      );
+    }
+
+    this.sessionId = sessionId;
+    return sessionId;
+  }
+
+  /** Send prompt and collect full response text + structured messages. */
+  async prompt(text: string): Promise<AcpPromptResult> {
+    if (this.sessionId === null) {
+      throw new Error("Not connected — call connect() first");
+    }
+
+    this.messageChunks = [];
+    this.reasoningChunks = [];
+
+    const response = await this.sendRequest("session/prompt", {
+      sessionId: this.sessionId,
+      prompt: [{ type: "text", text }],
+    });
+
+    if ((response as { error?: unknown }).error !== undefined) {
+      throw new Error(
+        `session/prompt failed: ${JSON.stringify((response as { error: unknown }).error)}`,
+      );
+    }
+
+    // Flush any trailing assistant text that wasn't followed by a tool call.
+    this.flushAssistantMessage();
+
+    // Extract the final assistant text from collected messages.
+    let finalText = "";
+    for (let i = this.messages.length - 1; i >= 0; i--) {
+      const msg = this.messages[i];
+      if (
+        msg !== undefined &&
+        msg.role === "assistant" &&
+        msg.content !== null &&
+        msg.content.trim() !== ""
+      ) {
+        finalText = msg.content;
+        break;
+      }
+    }
+
+    return {
+      text: finalText,
+      sessionId: this.sessionId,
+      messages: this.messages,
+    };
+  }
+
+  /** Close the connection */
+  async close(): Promise<void> {
+    if (this.process === null) {
+      return;
+    }
+    this.sessionId = null;
+    this.process.stdin?.end();
+    const proc = this.process;
+    await new Promise<void>((resolve) => {
+      proc.on("close", () => resolve());
+      setTimeout(resolve, 5000);
+    });
+    this.process = null;
+  }
+
+  // ---- JSON-RPC transport ----
+
+  private sendRequest(
+    method: string,
+    params: Record<string, unknown>,
+    timeoutMs = 10 * 60 * 1000,
+  ): Promise<JsonRpcResponse> {
+    const id = this.nextId++;
+    return new Promise<JsonRpcResponse>((resolve, reject) => {
+      const timer = setTimeout(() => {
+        this.pending.delete(id);
+        reject(new Error(`Timeout waiting for response to ${method} (id=${id})`));
+      }, timeoutMs);
+
+      this.pending.set(id, {
+        resolve: (value) => {
+          clearTimeout(timer);
+          resolve(value);
+        },
+        reject: (err) => {
+          clearTimeout(timer);
+          reject(err);
+        },
+      });
+
+      this.writeLine(JSON.stringify({ jsonrpc: "2.0", id, method, params }));
+    });
+  }
+
+  private sendNotification(method: string, params?: Record<string, unknown>): void {
+    const message: Record<string, unknown> = { jsonrpc: "2.0", method };
+    if (params !== undefined) {
+      message.params = params;
+    }
+    this.writeLine(JSON.stringify(message));
+  }
+
+  private writeLine(line: string): void {
+    if (this.process?.stdin === null || this.process?.stdin === undefined) {
+      throw new Error("Cannot write: hermes acp process stdin not available");
+    }
+    this.process.stdin.write(`${line}\n`);
+  }
+
+  private handleLine(line: string): void {
+    if (line === "") {
+      return;
+    }
+
+    let parsed: unknown;
+    try {
+      parsed = JSON.parse(line);
+    } catch {
+      return;
+    }
+
+    const msg = parsed as Record<string, unknown>;
+
+    const hasId = "id" in msg && msg.id !== undefined && msg.id !== null;
+    const hasMethod = typeof msg.method === "string";
+
+    // JSON-RPC response to one of our requests (has "id" but no "method")
+    if (hasId && !hasMethod) {
+      const response = msg as unknown as JsonRpcResponse;
+      const handler = this.pending.get(response.id);
+      if (handler !== undefined) {
+        this.pending.delete(response.id);
+        handler.resolve(response);
+      }
+      return;
+    }
+
+    // Server-initiated JSON-RPC request: session/request_permission (has "id" + "method")
+    if (msg.method === "session/request_permission" && hasId) {
+      const params = msg.params as Record<string, unknown> | undefined;
+      const options = (params?.options ?? []) as Array<{ optionId?: string }>;
+      const firstOptionId = options[0]?.optionId ?? "";
+      this.writeLine(
+        JSON.stringify({
+          jsonrpc: "2.0",
+          id: msg.id,
+          result: { outcome: { outcome: "selected", optionId: firstOptionId } },
+        }),
+      );
+      return;
+    }
+
+    // JSON-RPC notification — session/update (no "id")
+    if (msg.method === "session/update") {
+      const params = msg.params as Record<string, unknown> | undefined;
+      const update = params?.update as Record<string, unknown> | undefined;
+      if (update !== undefined) {
+        this.handleSessionUpdate(update);
+      }
+      return;
+    }
+  }
+
+  // ---- Session update → structured messages ----
+
+  private handleSessionUpdate(update: Record<string, unknown>): void {
+    const updateType = update.sessionUpdate as string;
+
+    switch (updateType) {
+      case "agent_message_chunk": {
+        const content = update.content as { type?: string; text?: string } | undefined;
+        if (content?.type === "text" && typeof content.text === "string") {
+          this.messageChunks.push(content.text);
+        }
+        break;
+      }
+
+      case "agent_thought_chunk": {
+        const content = update.content as { type?: string; text?: string } | undefined;
+        if (content?.type === "text" && typeof content.text === "string") {
+          this.reasoningChunks.push(content.text);
+        }
+        break;
+      }
+
+      case "tool_call": {
+        const title = (update.title as string) ?? "";
+        const rawInput = update.rawInput;
+        const args = rawInput !== undefined && rawInput !== null ? JSON.stringify(rawInput) : "";
+        const toolCallId = update.toolCallId as string;
+        this.pendingTools.set(toolCallId, { name: title, args });
+
+        // Flush accumulated assistant text before tool call
+        this.flushAssistantMessage();
+        break;
+      }
+
+      case "tool_call_update": {
+        const status = update.status as string | undefined;
+        if (status === "completed" || status === "failed") {
+          const toolCallId = update.toolCallId as string;
+          const pending = this.pendingTools.get(toolCallId);
+          const toolName = pending?.name ?? toolCallId;
+          const rawOutput = update.rawOutput;
+          const outputStr =
+            rawOutput !== undefined && rawOutput !== null
+              ? typeof rawOutput === "string"
+                ? rawOutput
+                : JSON.stringify(rawOutput)
+              : "";
+          this.messages.push({
+            role: "assistant",
+            content: null,
+            reasoning: null,
+            tool_calls: [{ function: { name: toolName, arguments: pending?.args ?? "" } }],
+          });
+          this.messages.push({
+            role: "tool",
+            content: outputStr,
+            reasoning: null,
+            tool_calls: null,
+          });
+          this.pendingTools.delete(toolCallId);
+        }
+        break;
+      }
+
+      default:
+        break;
+    }
+  }
+
+  /** Flush any accumulated text/reasoning into an assistant message. */
+  private flushAssistantMessage(): void {
+    const text = this.messageChunks.join("");
+    const reasoning = this.reasoningChunks.join("");
+    if (text !== "" || reasoning !== "") {
+      this.messages.push({
+        role: "assistant",
+        content: text || null,
+        reasoning: reasoning || null,
+        tool_calls: null,
+      });
+    }
+    this.messageChunks = [];
+    this.reasoningChunks = [];
+  }
+
+  private rejectAll(err: Error): void {
+    for (const handler of this.pending.values()) {
+      handler.reject(err);
+    }
+    this.pending.clear();
+  }
+
+  private async ensureProcess(): Promise<void> {
+    if (this.process !== null) {
+      return;
+    }
+
+    const child = spawn(HERMES_COMMAND, ["acp"], {
+      env: process.env,
+      shell: false,
+      stdio: ["pipe", "pipe", "pipe"],
+    });
+
+    this.process = child;
+
+    child.stderr?.on("data", (chunk: Buffer) => {
+      this.stderrBuffer += chunk.toString();
+    });
+
+    child.on("error", (cause) => {
+      const message = cause instanceof Error ? cause.message : String(cause);
+      this.rejectAll(new Error(`hermes acp spawn failed: ${message}`));
+    });
+
+    child.on("close", (code) => {
+      if (code !== 0 && this.pending.size > 0) {
+        const detail = this.stderrBuffer.trim() !== "" ? ` stderr=${this.stderrBuffer.trim()}` : "";
+        this.rejectAll(
+          new Error(`hermes acp exited unexpectedly with code ${code ?? "null"}${detail}`),
+        );
+      }
+    });
+
+    if (child.stdout === null) {
+      throw new Error("hermes acp process stdout is not available");
+    }
+    const rl = createInterface({ input: child.stdout });
+    rl.on("line", (line) => {
+      this.handleLine(line.trim());
+    });
+  }
+
+  private async initialize(): Promise<void> {
+    const initResponse = await this.sendRequest("initialize", {
+      protocolVersion: PROTOCOL_VERSION,
+      clientInfo: { name: "uwf", version: "0.1.0" },
+      capabilities: {},
+    });
+
+    if ((initResponse as { error?: unknown }).error !== undefined) {
+      throw new Error(
+        `initialize failed: ${JSON.stringify((initResponse as { error: unknown }).error)}`,
+      );
+    }
+
+    this.sendNotification("initialized");
+  }
+}
@@ -1,21 +1,18 @@
-import { spawn } from "node:child_process";
-
+import type { Store } from "@uncaged/json-cas";
 import {
  type AgentContext,
  type AgentRunResult,
+  buildContinuationPrompt,
  buildRolePrompt,
  createAgent,
 } from "@uncaged/workflow-agent-kit";
+import { createLogger } from "@uncaged/workflow-util";

-import {
-  loadHermesSession,
-  parseSessionIdFromStdout,
-  storeHermesRawOutput,
-  storeHermesSessionDetail,
-} from "./session-detail.js";
+import { HermesAcpClient } from "./acp-client.js";
+import { getCachedSessionId, isResumeDisabled, setCachedSessionId } from "./session-cache.js";
+import { storeHermesSessionDetail } from "./session-detail.js";

-const HERMES_COMMAND = "hermes";
-const HERMES_MAX_TURNS = 90;
+const log = createLogger({ sink: { kind: "stderr" } });

 function buildHistorySummary(steps: AgentContext["steps"]): string {
  if (steps.length === 0) {
@@ -36,12 +33,11 @@ function buildHistorySummary(steps: AgentContext["steps"]): string {
  return lines.join("\n");
 }

-/** Assemble system prompt, task, and prior step outputs for Hermes. */
-export function buildHermesPrompt(ctx: AgentContext): string {
+function buildInitialPrompt(ctx: AgentContext): string {
  const roleDef = ctx.workflow.roles[ctx.role];
  const rolePrompt = roleDef !== undefined ? buildRolePrompt(roleDef) : "";
  const parts: string[] = [];
-  if (ctx.outputFormatInstruction !== undefined && ctx.outputFormatInstruction !== "") {
+  if (ctx.outputFormatInstruction !== "") {
    parts.push(ctx.outputFormatInstruction, "");
  }
  parts.push(rolePrompt, "", "## Task", ctx.start.prompt);
@@ -49,74 +45,145 @@ export function buildHermesPrompt(ctx: AgentContext): string {
  if (historyBlock !== "") {
    parts.push("", historyBlock);
  }
+  parts.push("", "## Moderator Instruction", "", ctx.edgePrompt);
  return parts.join("\n");
 }

-function spawnHermesChat(prompt: string): Promise<{ stdout: string; stderr: string }> {
-  return new Promise((resolve, reject) => {
-    const args = [
-      "chat",
-      "-q",
-      prompt,
-      "--yolo",
-      "--max-turns",
-      String(HERMES_MAX_TURNS),
-      "--quiet",
-    ];
-    const child = spawn(HERMES_COMMAND, args, {
-      env: process.env,
-      shell: false,
-      stdio: ["ignore", "pipe", "pipe"],
-    });
+/** Assemble system prompt, task, and prior step outputs for Hermes. */
+export function buildHermesPrompt(ctx: AgentContext): string {
+  if (!ctx.isFirstVisit) {
+    const parts: string[] = [];
+    if (ctx.outputFormatInstruction !== "") {
+      parts.push(ctx.outputFormatInstruction, "");
+    }
+    parts.push(buildContinuationPrompt(ctx.steps, ctx.role, ctx.edgePrompt));
+    return parts.join("\n");
+  }

-    let stdout = "";
-    let stderr = "";
-    child.stdout?.on("data", (chunk: Buffer) => {
-      stdout += chunk.toString();
-    });
-    child.stderr?.on("data", (chunk: Buffer) => {
-      stderr += chunk.toString();
-    });
-
-    child.on("error", (cause) => {
-      const message = cause instanceof Error ? cause.message : String(cause);
-      reject(new Error(`hermes spawn failed: ${message}`));
-    });
-
-    child.on("close", (code) => {
-      if (code === 0) {
-        resolve({ stdout, stderr });
-        return;
-      }
-      const detail = stderr.trim() !== "" ? ` stderr=${stderr.trim()}` : "";
-      reject(new Error(`hermes exited with code ${code ?? "null"}${detail}`));
-    });
-  });
+  return buildInitialPrompt(ctx);
 }

-async function runHermes(ctx: AgentContext): Promise<AgentRunResult> {
-  const fullPrompt = buildHermesPrompt(ctx);
-  const { stdout, stderr } = await spawnHermesChat(fullPrompt);
-  const { store } = ctx;
+async function storePromptResult(
+  store: Store,
+  sessionId: string,
+  messages: Awaited<ReturnType<HermesAcpClient["prompt"]>>["messages"],
+): Promise<{ detailHash: string }> {
+  const session = {
+    session_id: sessionId,
+    model: "",
+    session_start: new Date().toISOString(),
+    messages,
+  };
+  return storeHermesSessionDetail(store, session);
+}

-  // --quiet mode: session_id may be on stdout or stderr
-  const sessionId = parseSessionIdFromStdout(stderr) ?? parseSessionIdFromStdout(stdout);
-  if (sessionId !== null) {
-    const session = await loadHermesSession(sessionId);
-    if (session !== null) {
-      const { detailHash, output } = await storeHermesSessionDetail(store, session);
-      return { output, detailHash };
+type PromptAttempt = {
+  useContinuation: boolean;
+  resumed: boolean;
+};
+
+async function prepareSession(
+  client: HermesAcpClient,
+  ctx: AgentContext,
+  cwd: string,
+): Promise<PromptAttempt> {
+  if (ctx.isFirstVisit || isResumeDisabled()) {
+    await client.connect(cwd);
+    return { useContinuation: false, resumed: false };
+  }
+
+  const cachedSessionId = await getCachedSessionId(ctx.threadId, ctx.role);
+  if (cachedSessionId === null) {
+    log("6RWK3N8Q", `no cached session for ${ctx.threadId}:${ctx.role}, starting new session`);
+    await client.connect(cwd);
+    return { useContinuation: false, resumed: false };
+  }
+
+  try {
+    await client.resume(cachedSessionId, cwd);
+    log("9MHT4V2P", `resumed hermes session ${cachedSessionId} for ${ctx.threadId}:${ctx.role}`);
+    return { useContinuation: true, resumed: true };
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    log("3XPN7K4W", `session resume failed, falling back to new session: ${message}`);
+    await client.close();
+    await client.connect(cwd);
+    return { useContinuation: false, resumed: false };
+  }
+}
+
+/**
+ * Agent CLI factory: parses argv, runs Hermes, extracts output, writes StepNode.
+ *
+ * A single ACP client is shared across run() and continue() calls so that
+ * frontmatter retry loops keep the same Hermes session context.  The client
+ * is closed once the agent process exits (via process.on("exit")).
+ */
+export function createHermesAgent(): () => Promise<void> {
+  const client = new HermesAcpClient();
+
+  // Ensure cleanup regardless of how the process exits.
+  process.on("exit", () => {
+    void client.close();
+  });
+
+  async function runPrompt(ctx: AgentContext, useContinuation: boolean): Promise<AgentRunResult> {
+    const effectiveCtx = useContinuation ? ctx : { ...ctx, isFirstVisit: true };
+    const fullPrompt = buildHermesPrompt(effectiveCtx);
+    const { text, sessionId, messages } = await client.prompt(fullPrompt);
+    const { detailHash } = await storePromptResult(ctx.store, sessionId, messages);
+
+    if (!isResumeDisabled()) {
+      await setCachedSessionId(ctx.threadId, ctx.role, sessionId);
+    }
+
+    return { output: text, detailHash, sessionId };
+  }
+
+  async function runHermes(ctx: AgentContext): Promise<AgentRunResult> {
+    const cwd = process.cwd();
+    const attempt = await prepareSession(client, ctx, cwd);
+
+    try {
+      return await runPrompt(ctx, attempt.useContinuation);
+    } catch (error) {
+      if (!attempt.resumed) {
+        throw error;
+      }
+
+      const message = error instanceof Error ? error.message : String(error);
+      log("8FQW2R6N", `continuation prompt failed, retrying with initial prompt: ${message}`);
+      await client.close();
+      await client.connect(cwd);
+      return runPrompt(ctx, false);
    }
  }

-  const detailHash = await storeHermesRawOutput(store, stdout);
-  return { output: stdout, detailHash };
-}
+  async function continueHermes(
+    _sessionId: string,
+    message: string,
+    store: Store,
+  ): Promise<AgentRunResult> {
+    // Client is already connected from runHermes — same ACP session,
+    // so the agent sees the full conversation history (crucial for retries).
+    const { text, sessionId, messages } = await client.prompt(message);
+    const { detailHash } = await storePromptResult(store, sessionId, messages);
+    return { output: text, detailHash, sessionId };
+  }

-/** Agent CLI factory: parses argv, runs Hermes, extracts output, writes StepNode. */
-export function createHermesAgent(): () => Promise<void> {
-  return createAgent({
+  const agentMain = createAgent({
    name: "hermes",
    run: runHermes,
+    continue: continueHermes,
  });
+
+  // Wrap to ensure ACP client is closed after agent completes,
+  // so the hermes subprocess exits and bun can terminate.
+  return async () => {
+    try {
+      await agentMain();
+    } finally {
+      await client.close();
+    }
+  };
 }
@@ -1 +1,2 @@
+export { HermesAcpClient } from "./acp-client.js";
 export { buildHermesPrompt, createHermesAgent } from "./hermes.js";
@@ -0,0 +1,17 @@
+// Re-export session cache from the shared agent-kit package.
+export { getCachedSessionId, setCachedSessionId } from "@uncaged/workflow-agent-kit";
+
+export function isResumeDisabled(): boolean {
+  // Hermes ACP session/resume is broken: _restore fails for custom providers
+  // because resolve_runtime_provider("custom") throws and base_url/api_mode
+  // are lost in the fallback path.  Resume silently creates a new session
+  // (different sessionId, no history), causing empty-text responses.
+  // See: https://github.com/NousResearch/hermes-agent/issues/13489
+  // Disable by default until upstream fixes the bug.  Set UWF_HERMES_RESUME=1
+  // to opt back in.
+  const enableFlag = process.env.UWF_HERMES_RESUME;
+  if (enableFlag === "1" || enableFlag === "true") {
+    return false;
+  }
+  return true;
+}
@@ -0,0 +1,73 @@
+import type { StepContext } from "@uncaged/workflow-protocol";
+import { describe, expect, test } from "vitest";
+import { buildContinuationPrompt } from "../src/build-continuation-prompt.js";
+
+const reviewerStep: StepContext = {
+  role: "reviewer",
+  output: { approved: false, comments: "Missing tests" },
+  detail: "2MXBG6PN4A8JR",
+  agent: "uwf-hermes",
+  edgePrompt: "Review the developer's work.",
+};
+
+const developerStep: StepContext = {
+  role: "developer",
+  output: { filesChanged: ["src/app.ts"], summary: "Initial fix" },
+  detail: "1VPBG9SM5E7WK",
+  agent: "uwf-hermes",
+  edgePrompt: "Implement the fix.",
+};
+
+describe("buildContinuationPrompt", () => {
+  test("includes steps after the last matching role and the edge prompt", () => {
+    const steps: StepContext[] = [
+      developerStep,
+      reviewerStep,
+      {
+        role: "planner",
+        output: { plan: "revise approach" },
+        detail: "7BQST3VW9F2MA",
+        agent: "uwf-hermes",
+        edgePrompt: "Revise the plan.",
+      },
+    ];
+
+    const result = buildContinuationPrompt(
+      steps,
+      "developer",
+      "The reviewer rejected your implementation. Read their feedback and fix the issues.",
+    );
+
+    expect(result).toContain("## What Happened Since Your Last Turn");
+    expect(result).toContain("### Step 2: reviewer");
+    expect(result).toContain("Missing tests");
+    expect(result).toContain("### Step 3: planner");
+    expect(result).toContain("## Moderator Instruction");
+    expect(result).toContain("The reviewer rejected your implementation.");
+    expect(result).not.toContain("Initial fix");
+  });
+
+  test("uses all steps when the role has not run before", () => {
+    const result = buildContinuationPrompt(
+      [developerStep, reviewerStep],
+      "planner",
+      "Continue from the reviewer feedback.",
+    );
+
+    expect(result).toContain("### Step 1: developer");
+    expect(result).toContain("### Step 2: reviewer");
+    expect(result).toContain("Continue from the reviewer feedback.");
+  });
+
+  test("still includes moderator instruction when there are no intervening steps", () => {
+    const result = buildContinuationPrompt(
+      [developerStep],
+      "developer",
+      "Please revise your work.",
+    );
+
+    expect(result).not.toContain("## What Happened Since Your Last Turn");
+    expect(result).toContain("## Moderator Instruction");
+    expect(result).toContain("Please revise your work.");
+  });
+});
@@ -2,13 +2,32 @@ import { describe, expect, test } from "vitest";

 import { buildOutputFormatInstruction } from "../src/build-output-format-instruction.js";

+const PLANNER_SCHEMA = {
+  type: "object",
+  properties: {
+    status: { type: "string", enum: ["ready", "insufficient_info"] },
+    plan: { type: "string" },
+  },
+  required: ["status"],
+  additionalProperties: false,
+};
+
+const REVIEWER_SCHEMA = {
+  type: "object",
+  properties: {
+    approved: { type: "boolean" },
+  },
+  required: ["approved"],
+  additionalProperties: false,
+};
+
 describe("buildOutputFormatInstruction", () => {
  test("always includes the frontmatter example block", () => {
    const result = buildOutputFormatInstruction({});
    expect(result).toContain("---");
-    expect(result).toContain("status: done");
-    expect(result).toContain("confidence:");
-    expect(result).toContain("scope: role");
+    expect(result).not.toContain("status: done");
+    expect(result).not.toContain("confidence:");
+    expect(result).not.toContain("scope: role");
  });

  test("always marks frontmatter as the primary deliverable", () => {
@@ -16,17 +35,36 @@ describe("buildOutputFormatInstruction", () => {
    expect(result).toContain("primary deliverable");
  });

-  test("lists fields from a flat object schema", () => {
+  test("generates planner-specific YAML example from schema", () => {
+    const result = buildOutputFormatInstruction(PLANNER_SCHEMA);
+    expect(result).toContain("status: ready  # required | ready | insufficient_info");
+    expect(result).toContain("plan: <string>");
+    expect(result).not.toContain("status: done");
+    expect(result).not.toContain("confidence:");
+    expect(result).not.toContain("artifacts:");
+  });
+
+  test("generates reviewer-specific YAML example from schema", () => {
+    const result = buildOutputFormatInstruction(REVIEWER_SCHEMA);
+    expect(result).toContain("approved: true  # required | true | false");
+    expect(result).not.toContain("status:");
+  });
+
+  test("lists fields from a flat object schema with required marker", () => {
    const schema = {
      type: "object",
      properties: {
        status: { type: "string" },
        confidence: { type: "number" },
      },
+      required: ["status"],
    };
    const result = buildOutputFormatInstruction(schema);
-    expect(result).toContain("`status`");
+    expect(result).toContain("`status` (required)");
    expect(result).toContain("`confidence`");
+    expect(result).not.toContain("`confidence` (required)");
+    expect(result).toContain("status: <string>  # required");
+    expect(result).toContain("confidence: <number>");
  });

  test("lists union of fields from an anyOf schema", () => {
@@ -45,6 +83,8 @@ describe("buildOutputFormatInstruction", () => {
    const result = buildOutputFormatInstruction(schema);
    expect(result).toContain("`alpha`");
    expect(result).toContain("`beta`");
+    expect(result).toContain("alpha: <string>");
+    expect(result).toContain("beta: <number>");
  });

  test("lists union of fields from a oneOf schema", () => {
@@ -63,6 +103,8 @@ describe("buildOutputFormatInstruction", () => {
    const result = buildOutputFormatInstruction(schema);
    expect(result).toContain("`foo`");
    expect(result).toContain("`bar`");
+    expect(result).toContain("foo: <string>");
+    expect(result).toContain("bar: true  # true | false");
  });

  test("falls back gracefully for a non-object schema with no properties", () => {
@@ -80,6 +122,45 @@ describe("buildOutputFormatInstruction", () => {
    const result = buildOutputFormatInstruction(schema);
    const matches = [...result.matchAll(/`shared`/g)];
    expect(matches.length).toBe(1);
+    expect(result).toContain("shared: <string>");
+  });
+
+  test("marks required when any union variant requires the field", () => {
+    const schema = {
+      anyOf: [
+        {
+          type: "object",
+          properties: { shared: { type: "string" } },
+          required: ["shared"],
+        },
+        { type: "object", properties: { shared: { type: "number" } } },
+      ],
+    };
+    const result = buildOutputFormatInstruction(schema);
+    expect(result).toContain("`shared` (required)");
+    expect(result).toContain("shared: <string>  # required");
+  });
+
+  test("explicitly forbids extra frontmatter fields", () => {
+    const result = buildOutputFormatInstruction(PLANNER_SCHEMA);
+    expect(result).toMatch(/\b(only|exclusively)\b.*fields/i);
+    expect(result).toMatch(/do not add (extra|additional|other) fields/i);
+  });
+
+  test("forbids extra fields even for empty schema", () => {
+    const result = buildOutputFormatInstruction({});
+    expect(result).toMatch(/do not add (extra|additional|other) fields/i);
+  });
+
+  test("forbids extra fields for anyOf/oneOf schemas", () => {
+    const schema = {
+      anyOf: [
+        { type: "object", properties: { alpha: { type: "string" } } },
+        { type: "object", properties: { beta: { type: "number" } } },
+      ],
+    };
+    const result = buildOutputFormatInstruction(schema);
+    expect(result).toMatch(/do not add (extra|additional|other) fields/i);
  });

  test("includes focus reminder about role scope", () => {
@@ -18,13 +18,16 @@ describe("buildRolePrompt", () => {
    expect(result).toContain("## Capabilities");
    expect(result).toContain("- cursor-agent");
    expect(result).toContain("- file-edit");
+    expect(result).toContain("## Prepare");
+    expect(result).toContain("uwf CLI Reference");
+    expect(result).toContain("cursor-agent, file-edit");
    expect(result).toContain("## Procedure");
    expect(result).toContain("Implement the feature.");
    expect(result).toContain("## Output");
    expect(result).toContain("Summarize changes.");
  });

-  test("empty fields are omitted", () => {
+  test("empty fields are omitted but Prepare is always present", () => {
    const role: RoleDefinition = {
      description: "A reviewer",
      goal: "You are a code reviewer.",
@@ -35,12 +38,14 @@ describe("buildRolePrompt", () => {
    };
    const result = buildRolePrompt(role);
    expect(result).toContain("## Goal");
+    expect(result).toContain("## Prepare");
+    expect(result).toContain("uwf CLI Reference");
    expect(result).toContain("## Procedure");
    expect(result).not.toContain("## Capabilities");
    expect(result).not.toContain("## Output");
  });

-  test("all empty returns empty string", () => {
+  test("all empty still includes Prepare section", () => {
    const role: RoleDefinition = {
      description: "Minimal",
      goal: "",
@@ -50,7 +55,12 @@ describe("buildRolePrompt", () => {
      meta: "placeholder00000" as string,
    };
    const result = buildRolePrompt(role);
-    expect(result).toBe("");
+    expect(result).toContain("## Prepare");
+    expect(result).toContain("uwf CLI Reference");
+    expect(result).not.toContain("## Goal");
+    expect(result).not.toContain("## Capabilities");
+    expect(result).not.toContain("## Procedure");
+    expect(result).not.toContain("## Output");
  });

  test("capabilities rendered as bullet list", () => {
@@ -29,6 +29,27 @@ const STRICT_SCHEMA = {
  additionalProperties: false,
 };

+/** Role-specific schema (reviewer) — only approved, no standard agent fields. */
+const REVIEWER_SCHEMA = {
+  type: "object",
+  properties: {
+    approved: { type: "boolean" },
+  },
+  required: ["approved"],
+  additionalProperties: false,
+};
+
+/** Role-specific schema (planner) — custom status enum + plan hash. */
+const PLANNER_SCHEMA = {
+  type: "object",
+  properties: {
+    status: { type: "string", enum: ["ready", "insufficient_info"] },
+    plan: { type: "string" },
+  },
+  required: ["status"],
+  additionalProperties: false,
+};
+
 async function makeStoreWithSchema(schema: Record<string, unknown>) {
  const store = createMemoryStore();
  const schemaHash = await putSchema(store, schema);
@@ -134,3 +155,48 @@ describe("tryFrontmatterFastPath — fallback: schema mismatch", () => {
    expect(result).toBeNull();
  });
 });
+
+// ── Role-specific schema fields ───────────────────────────────────────────────
+
+describe("tryFrontmatterFastPath — role-specific fields", () => {
+  test("extracts approved only for reviewer schema (no extra standard fields)", async () => {
+    const { store, schemaHash } = await makeStoreWithSchema(REVIEWER_SCHEMA);
+
+    const raw = "---\napproved: true\n---\n\nReview passed.";
+
+    const result = await tryFrontmatterFastPath(raw, schemaHash, store);
+    expect(result).not.toBeNull();
+
+    const node = store.get(result!.outputHash);
+    expect(node).not.toBeNull();
+    const payload = node!.payload as Record<string, unknown>;
+    expect(payload).toEqual({ approved: true });
+    expect(payload.status).toBeUndefined();
+    expect(payload.scope).toBeUndefined();
+  });
+
+  test("extracts plan and role-specific status for planner schema", async () => {
+    const { store, schemaHash } = await makeStoreWithSchema(PLANNER_SCHEMA);
+
+    const raw = "---\nstatus: ready\nplan: 01HASHPLANNER0001\n---\n\nSpec summary.";
+
+    const result = await tryFrontmatterFastPath(raw, schemaHash, store);
+    expect(result).not.toBeNull();
+
+    const node = store.get(result!.outputHash);
+    expect(node).not.toBeNull();
+    const payload = node!.payload as Record<string, unknown>;
+    expect(payload.status).toBe("ready");
+    expect(payload.plan).toBe("01HASHPLANNER0001");
+    expect(payload.scope).toBeUndefined();
+  });
+
+  test("returns null when required role-specific field is missing", async () => {
+    const { store, schemaHash } = await makeStoreWithSchema(REVIEWER_SCHEMA);
+
+    const raw = "---\nstatus: done\nscope: role\n---\n\nBody.";
+
+    const result = await tryFrontmatterFastPath(raw, schemaHash, store);
+    expect(result).toBeNull();
+  });
+});
@@ -0,0 +1,53 @@
+import type { StepContext } from "@uncaged/workflow-protocol";
+
+function formatStep(step: StepContext, stepNumber: number): string {
+  return [
+    `### Step ${stepNumber}: ${step.role}`,
+    `Output: ${JSON.stringify(step.output)}`,
+    `Agent: ${step.agent}`,
+  ].join("\n");
+}
+
+function findLastRoleIndex(steps: StepContext[], role: string): number {
+  for (let i = steps.length - 1; i >= 0; i--) {
+    const step = steps[i];
+    if (step !== undefined && step.role === role) {
+      return i;
+    }
+  }
+  return -1;
+}
+
+/**
+ * Build a continuation prompt for a role re-entry.
+ *
+ * Finds the most recent step for `role`, collects everything after it as context,
+ * and appends the moderator edge prompt as the instruction.
+ */
+export function buildContinuationPrompt(
+  steps: StepContext[],
+  role: string,
+  edgePrompt: string,
+): string {
+  const lastIndex = findLastRoleIndex(steps, role);
+  const sinceSteps = lastIndex >= 0 ? steps.slice(lastIndex + 1) : steps;
+
+  const parts: string[] = [];
+
+  if (sinceSteps.length > 0) {
+    parts.push("## What Happened Since Your Last Turn");
+    const baseStepNumber = lastIndex >= 0 ? lastIndex + 2 : 1;
+    for (let i = 0; i < sinceSteps.length; i++) {
+      const step = sinceSteps[i];
+      if (step === undefined) {
+        continue;
+      }
+      parts.push("");
+      parts.push(formatStep(step, baseStepNumber + i));
+    }
+    parts.push("");
+  }
+
+  parts.push("## Moderator Instruction", "", edgePrompt);
+  return parts.join("\n");
+}
@@ -1,5 +1,11 @@
 import type { JSONSchema } from "@uncaged/json-cas";

+type SchemaProperty = {
+  name: string;
+  schema: JSONSchema;
+  required: boolean;
+};
+
 /**
 * Extract top-level property names from a JSON Schema object.
 *
@@ -9,9 +15,44 @@ import type { JSONSchema } from "@uncaged/json-cas";
 *
 * Returns an empty array for schemas with no inspectable property definitions.
 */
-function extractSchemaFields(schema: JSONSchema): string[] {
+export function extractSchemaFields(schema: JSONSchema): string[] {
+  return extractSchemaProperties(schema).map((p) => p.name);
+}
+
+function extractSchemaProperties(schema: JSONSchema): SchemaProperty[] {
+  const objectSchemas = collectObjectSchemas(schema);
+  if (objectSchemas.length === 0) {
+    return [];
+  }
+
+  const byName = new Map<string, SchemaProperty>();
+
+  for (const objectSchema of objectSchemas) {
+    const requiredSet = new Set(
+      Array.isArray(objectSchema.required) ? (objectSchema.required as string[]) : [],
+    );
+    const properties = objectSchema.properties as Record<string, JSONSchema> | null | undefined;
+    if (typeof properties !== "object" || properties === null) {
+      continue;
+    }
+
+    for (const [name, propSchema] of Object.entries(properties)) {
+      const required = requiredSet.has(name);
+      const existing = byName.get(name);
+      if (existing === undefined) {
+        byName.set(name, { name, schema: propSchema, required });
+      } else if (required) {
+        byName.set(name, { ...existing, required: true });
+      }
+    }
+  }
+
+  return [...byName.values()];
+}
+
+function collectObjectSchemas(schema: JSONSchema): JSONSchema[] {
  if (typeof schema.properties === "object" && schema.properties !== null) {
-    return Object.keys(schema.properties as Record<string, unknown>);
+    return [schema];
  }

  const unionKey = Array.isArray(schema.anyOf)
@@ -20,18 +61,109 @@ function extractSchemaFields(schema: JSONSchema): string[] {
      ? "oneOf"
      : null;

-  if (unionKey !== null) {
-    const variants = schema[unionKey] as JSONSchema[];
-    const fieldSet = new Set<string>();
-    for (const variant of variants) {
-      for (const field of extractSchemaFields(variant)) {
-        fieldSet.add(field);
-      }
-    }
-    return [...fieldSet];
+  if (unionKey === null) {
+    return [];
  }

-  return [];
+  const variants = schema[unionKey] as JSONSchema[];
+  const result: JSONSchema[] = [];
+  for (const variant of variants) {
+    result.push(...collectObjectSchemas(variant));
+  }
+  return result;
+}
+
+function resolvePropertySchema(prop: JSONSchema): JSONSchema {
+  if (Array.isArray(prop.enum) && prop.enum.length > 0) {
+    return prop;
+  }
+
+  const unionKey = Array.isArray(prop.anyOf) ? "anyOf" : Array.isArray(prop.oneOf) ? "oneOf" : null;
+
+  if (unionKey !== null) {
+    const variants = prop[unionKey] as JSONSchema[];
+    const nonNull = variants.filter((v) => v.type !== "null");
+    if (nonNull.length === 1) {
+      return nonNull[0];
+    }
+  }
+
+  return prop;
+}
+
+function formatYamlScalar(value: unknown): string {
+  if (typeof value === "boolean") {
+    return String(value);
+  }
+  if (typeof value === "number") {
+    return String(value);
+  }
+  return String(value);
+}
+
+function buildPropertyComment(parts: string[]): string {
+  const filtered = parts.filter((p) => p.length > 0);
+  return filtered.length > 0 ? `  # ${filtered.join(" | ")}` : "";
+}
+
+function buildPropertyExampleLine(prop: SchemaProperty): string {
+  const resolved = resolvePropertySchema(prop.schema);
+  const commentParts: string[] = [];
+  if (prop.required) {
+    commentParts.push("required");
+  }
+
+  if (Array.isArray(resolved.enum) && resolved.enum.length > 0) {
+    const enumValues = resolved.enum.map((v) => String(v));
+    commentParts.push(...enumValues);
+    const first = resolved.enum[0];
+    return `${prop.name}: ${formatYamlScalar(first)}${buildPropertyComment(commentParts)}`;
+  }
+
+  if (resolved.type === "boolean") {
+    commentParts.push("true", "false");
+    return `${prop.name}: true${buildPropertyComment(commentParts)}`;
+  }
+
+  if (resolved.type === "string") {
+    return `${prop.name}: <string>${buildPropertyComment(commentParts)}`;
+  }
+
+  if (resolved.type === "number" || resolved.type === "integer") {
+    return `${prop.name}: <number>${buildPropertyComment(commentParts)}`;
+  }
+
+  if (resolved.type === "array") {
+    return `${prop.name}:\n  - <item>${buildPropertyComment(commentParts)}`;
+  }
+
+  if (resolved.type === "object") {
+    return `${prop.name}: <object>${buildPropertyComment(commentParts)}`;
+  }
+
+  return `${prop.name}: <value>${buildPropertyComment(commentParts)}`;
+}
+
+function buildYamlExampleBlock(properties: SchemaProperty[]): string {
+  if (properties.length === 0) {
+    return "---\n\n... your markdown work here ...";
+  }
+
+  const lines = properties.map((p) => buildPropertyExampleLine(p));
+  return `---\n${lines.join("\n")}\n---\n\n... your markdown work here ...`;
+}
+
+function buildFieldList(properties: SchemaProperty[]): string {
+  if (properties.length === 0) {
+    return "  (schema fields will be extracted automatically)";
+  }
+
+  return properties
+    .map((p) => {
+      const suffix = p.required ? " (required)" : "";
+      return `  - \`${p.name}\`${suffix}`;
+    })
+    .join("\n");
 }

 /**
@@ -42,28 +174,16 @@ function extractSchemaFields(schema: JSONSchema): string[] {
 * system prompt so the deliverable format is the first thing the agent sees.
 */
 export function buildOutputFormatInstruction(schema: JSONSchema): string {
-  const fields = extractSchemaFields(schema);
-
-  const fieldList =
-    fields.length > 0
-      ? fields.map((f) => `  - \`${f}\``).join("\n")
-      : "  (schema fields will be extracted automatically)";
+  const properties = extractSchemaProperties(schema);
+  const yamlExample = buildYamlExampleBlock(properties);
+  const fieldList = buildFieldList(properties);

  return `## Deliverable Format

 Your response MUST begin with a YAML frontmatter block followed by your markdown work:

 \`\`\`
---
-status: done          # done | needs_input | in_progress | failed
-next: <role-name>     # suggested next role, or omit
-confidence: 0.9       # 0.0–1.0, your self-assessed confidence
-artifacts:            # list of file paths or CAS hashes you produced
-  - path/to/file.ts
-scope: role           # role | thread
---
-
-... your markdown work here ...
+${yamlExample}
 \`\`\`

 The frontmatter is the **primary deliverable** — the engine reads it directly.
@@ -71,5 +191,7 @@ Your meta output must satisfy these fields:

 ${fieldList}

+Output ONLY the fields listed above. Do not add extra fields that are not specified in the schema.
+
 Focus exclusively on YOUR role's deliverable. Do not perform actions outside your role's scope.`;
 }
@@ -1,10 +1,15 @@
 import type { RoleDefinition } from "@uncaged/workflow-protocol";
+import { generateCliReference } from "@uncaged/workflow-util";

 /**
 * Build the role prompt from a RoleDefinition.
 *
- * Assembles structured sections: Goal, Capabilities, Procedure, Output.
+ * Assembles structured sections: Goal, Capabilities, Prepare, Procedure, Output.
 * Empty strings and empty arrays are omitted from the output.
+ *
+ * The Prepare section always inlines the uwf CLI reference so the agent has
+ * workflow knowledge without needing to run an external command. The capabilities
+ * array is rendered as keyword hints for implicit skill loading.
 */
 export function buildRolePrompt(role: RoleDefinition): string {
  const sections: string[] = [];
@@ -18,6 +23,15 @@ export function buildRolePrompt(role: RoleDefinition): string {
    sections.push(`## Capabilities\n\n${list}`);
  }

+  const prepareLines: string[] = [generateCliReference()];
+  if (role.capabilities.length > 0) {
+    const keywords = role.capabilities.join(", ");
+    prepareLines.push(
+      `You have the following capabilities: ${keywords}. Load relevant skills matching these keywords before starting work.`,
+    );
+  }
+  sections.push(`## Prepare\n\n${prepareLines.join("\n\n")}`);
+
  if (role.procedure !== "") {
    sections.push(`## Procedure\n\n${role.procedure}`);
  }
@@ -21,6 +21,14 @@ function fail(message: string): never {
  throw new Error(message);
 }

+function readEdgePrompt(): string {
+  const value = process.env.UWF_EDGE_PROMPT;
+  if (value === undefined || value === "") {
+    fail("UWF_EDGE_PROMPT environment variable is required");
+  }
+  return value;
+}
+
 function walkChain(store: Store, schemas: AgentStore["schemas"], headHash: CasRef): ChainState {
  const headNode = store.get(headHash);
  if (headNode === null) {
@@ -94,6 +102,7 @@ async function buildHistory(
      output: expandOutput(store, step.output),
      detail: step.detail,
      agent: step.agent,
+      edgePrompt: step.edgePrompt ?? "",
    });
  }
  return history;
@@ -133,6 +142,8 @@ export async function buildContext(threadId: ThreadId, role: string): Promise<Ag
  }

  const steps = await buildHistory(store, chain.stepsNewestFirst);
+  const edgePrompt = readEdgePrompt();
+  const isFirstVisit = !steps.some((s) => s.role === role);

  return {
    threadId,
@@ -142,6 +153,8 @@ export async function buildContext(threadId: ThreadId, role: string): Promise<Ag
    workflow,
    store,
    outputFormatInstruction: "",
+    edgePrompt,
+    isFirstVisit,
  };
 }

@@ -178,6 +191,8 @@ export async function buildContextWithMeta(
  }

  const steps = await buildHistory(store, chain.stepsNewestFirst);
+  const edgePrompt = readEdgePrompt();
+  const isFirstVisit = !steps.some((s) => s.role === role);

  return {
    threadId,
@@ -187,6 +202,8 @@ export async function buildContextWithMeta(
    workflow,
    store,
    outputFormatInstruction: "",
+    edgePrompt,
+    isFirstVisit,
    meta: { storageRoot, store, schemas, headHash, chain },
  };
 }
@@ -1,13 +1,139 @@
 import type { Store } from "@uncaged/json-cas";
-import { validate } from "@uncaged/json-cas";
+import { getSchema, validate } from "@uncaged/json-cas";
 import type { CasRef } from "@uncaged/workflow-protocol";
-import { parseFrontmatterMarkdown, validateFrontmatter } from "@uncaged/workflow-util";
+import {
+  type AgentFrontmatter,
+  createLogger,
+  parseFrontmatterMarkdown,
+  validateFrontmatter,
+} from "@uncaged/workflow-util";
+import { parse as parseYaml } from "yaml";
+
+import { extractSchemaFields } from "./build-output-format-instruction.js";
+
+const log = createLogger({ sink: { kind: "stderr" } });
+
+const STANDARD_KEYS = ["status", "next", "confidence", "artifacts", "scope"] as const;
+
+type StandardKey = (typeof STANDARD_KEYS)[number];

 export type FrontmatterFastPathResult = {
  body: string;
  outputHash: CasRef;
 };

+function extractYamlBlock(raw: string): string | null {
+  const fence = "---";
+  if (!raw.startsWith(fence)) {
+    return null;
+  }
+
+  const rest = raw.slice(fence.length);
+  if (rest.length > 0 && rest[0] !== "\n" && rest[0] !== "\r") {
+    return null;
+  }
+
+  const afterOpen = rest.startsWith("\n") ? rest.slice(1) : rest;
+  const closeIndex = afterOpen.indexOf(`\n${fence}`);
+  if (closeIndex === -1) {
+    return null;
+  }
+
+  return afterOpen.slice(0, closeIndex);
+}
+
+function parseRawFrontmatterFields(raw: string): Record<string, unknown> {
+  const yamlText = extractYamlBlock(raw);
+  if (yamlText === null) {
+    return {};
+  }
+
+  try {
+    const parsed = parseYaml(yamlText);
+    if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
+      return {};
+    }
+    return parsed as Record<string, unknown>;
+  } catch {
+    return {};
+  }
+}
+
+function defaultCandidate(frontmatter: AgentFrontmatter): Record<string, unknown> {
+  return {
+    status: frontmatter.status,
+    next: frontmatter.next,
+    confidence: frontmatter.confidence,
+    artifacts: [...frontmatter.artifacts],
+    scope: frontmatter.scope,
+  };
+}
+
+function pickStandardField(frontmatter: AgentFrontmatter, key: StandardKey): unknown {
+  switch (key) {
+    case "status":
+      return frontmatter.status;
+    case "next":
+      return frontmatter.next;
+    case "confidence":
+      return frontmatter.confidence;
+    case "artifacts":
+      return [...frontmatter.artifacts];
+    case "scope":
+      return frontmatter.scope;
+  }
+}
+
+function isStandardKey(key: string): key is StandardKey {
+  return (STANDARD_KEYS as readonly string[]).includes(key);
+}
+
+function pickFieldValue(
+  field: string,
+  frontmatter: AgentFrontmatter,
+  rawFields: Record<string, unknown>,
+): unknown | undefined {
+  if (!isStandardKey(field)) {
+    return Object.hasOwn(rawFields, field) ? rawFields[field] : undefined;
+  }
+
+  const coerced = pickStandardField(frontmatter, field);
+  if (field === "artifacts" || field === "scope") {
+    return coerced;
+  }
+  if (coerced !== null) {
+    return coerced;
+  }
+  return Object.hasOwn(rawFields, field) ? rawFields[field] : coerced;
+}
+
+/**
+ * Build a CAS candidate object from schema property keys and parsed frontmatter.
+ *
+ * When the schema has no inspectable properties, falls back to the five standard
+ * agent frontmatter fields for backward compatibility.
+ */
+function buildCandidate(
+  frontmatter: AgentFrontmatter,
+  rawFields: Record<string, unknown>,
+  schemaFields: string[],
+): Record<string, unknown> {
+  if (schemaFields.length === 0) {
+    return defaultCandidate(frontmatter);
+  }
+
+  const candidate: Record<string, unknown> = {};
+
+  for (const field of schemaFields) {
+    const value = pickFieldValue(field, frontmatter, rawFields);
+    if (value !== undefined) {
+      candidate[field] = value;
+    }
+  }
+
+  return candidate;
+}
+
 /**
 * Try to satisfy `outputSchema` from frontmatter fields alone.
 *
@@ -32,16 +158,22 @@ export async function tryFrontmatterFastPath(

  const validationErrors = validateFrontmatter(frontmatter);
  if (validationErrors.length > 0) {
+    log(
+      "9GNPS4WY",
+      `frontmatter validation errors: ${validationErrors.map((e) => e.message).join("; ")}`,
+    );
    return null;
  }

-  const candidate: Record<string, unknown> = {
-    status: frontmatter.status,
-    next: frontmatter.next,
-    confidence: frontmatter.confidence,
-    artifacts: [...frontmatter.artifacts],
-    scope: frontmatter.scope,
-  };
+  const schema = getSchema(store, outputSchema);
+  if (schema === null) {
+    log("8FHMR2QX", `output schema not found in CAS: ${outputSchema}`);
+    return null;
+  }
+
+  const schemaFields = extractSchemaFields(schema);
+  const rawFields = parseRawFrontmatterFields(raw);
+  const candidate = buildCandidate(frontmatter, rawFields, schemaFields);

  let outputHash: CasRef;
  let node: ReturnType<Store["get"]>;
@@ -50,10 +182,12 @@ export async function tryFrontmatterFastPath(
    outputHash = await store.put(outputSchema, candidate);
    node = store.get(outputHash);
  } catch {
+    log("2KMQT7NR", "failed to store frontmatter candidate in CAS");
    return null;
  }

  if (node === null || !validate(store, node)) {
+    log("2KMQT7NR", "stored frontmatter candidate failed schema validation");
    return null;
  }

@@ -1,3 +1,4 @@
+export { buildContinuationPrompt } from "./build-continuation-prompt.js";
 export { buildOutputFormatInstruction } from "./build-output-format-instruction.js";
 export { buildRolePrompt } from "./build-role-prompt.js";
 export type { BuildContextMeta } from "./context.js";
@@ -11,5 +12,12 @@ export {
 export type { FrontmatterFastPathResult } from "./frontmatter.js";
 export { tryFrontmatterFastPath } from "./frontmatter.js";
 export { createAgent } from "./run.js";
-export { getConfigPath, getEnvPath, loadWorkflowConfig } from "./storage.js";
-export type { AgentContext, AgentOptions, AgentRunFn, AgentRunResult } from "./types.js";
+export { getCachedSessionId, setCachedSessionId } from "./session-cache.js";
+export { getConfigPath, getEnvPath, loadWorkflowConfig, resolveStorageRoot } from "./storage.js";
+export type {
+  AgentContext,
+  AgentContinueFn,
+  AgentOptions,
+  AgentRunFn,
+  AgentRunResult,
+} from "./types.js";
@@ -3,11 +3,12 @@ import type { CasRef, StepNodePayload, ThreadId } from "@uncaged/workflow-protoc
 import { config as loadDotenv } from "dotenv";
 import { buildOutputFormatInstruction } from "./build-output-format-instruction.js";
 import { buildContextWithMeta } from "./context.js";
-import { extract } from "./extract.js";
 import { tryFrontmatterFastPath } from "./frontmatter.js";
 import type { AgentStore } from "./storage.js";
-import { getEnvPath, loadWorkflowConfig, resolveStorageRoot } from "./storage.js";
-import type { AgentContext, AgentOptions, AgentRunResult } from "./types.js";
+import { getEnvPath, resolveStorageRoot } from "./storage.js";
+import type { AgentOptions } from "./types.js";
+
+const MAX_FRONTMATTER_RETRIES = 2;

 function fail(message: string): never {
  process.stderr.write(`${message}\n`);
@@ -49,6 +50,7 @@ async function writeStepNode(options: {
  outputHash: CasRef;
  detailHash: CasRef;
  agentName: string;
+  edgePrompt: string;
 }): Promise<CasRef> {
  const payload: StepNodePayload = {
    start: options.startHash,
@@ -57,6 +59,7 @@ async function writeStepNode(options: {
    output: options.outputHash,
    detail: options.detailHash,
    agent: options.agentName,
+    edgePrompt: options.edgePrompt,
  };
  const hash = await options.store.put(options.schemas.stepNode, payload);
  const node = options.store.get(hash);
@@ -66,31 +69,16 @@ async function writeStepNode(options: {
  return hash;
 }

-async function runAgent(options: AgentOptions, ctx: AgentContext): Promise<AgentRunResult> {
-  return runWithMessage("agent run failed", () => options.run(ctx));
-}
-
-async function extractOutput(
+async function tryExtractOutput(
  rawOutput: string,
  outputSchema: CasRef,
-  storageRoot: string,
  ctx: Awaited<ReturnType<typeof buildContextWithMeta>>,
-): Promise<CasRef> {
-  const fastPath = await runWithMessage("frontmatter fast path", () =>
-    tryFrontmatterFastPath(rawOutput, outputSchema, ctx.meta.store),
-  ).catch(() => null);
-
+): Promise<CasRef | null> {
+  const fastPath = await tryFrontmatterFastPath(rawOutput, outputSchema, ctx.meta.store);
  if (fastPath !== null) {
    return fastPath.outputHash;
  }
-
-  const config = await runWithMessage("failed to load config", () =>
-    loadWorkflowConfig(storageRoot),
-  );
-  const extracted = await runWithMessage("extract failed", () =>
-    extract(rawOutput, outputSchema, config),
-  );
-  return extracted.hash;
+  return null;
 }

 async function persistStep(options: {
@@ -109,14 +97,10 @@ async function persistStep(options: {
    outputHash: options.outputHash,
    detailHash: options.detailHash,
    agentName: options.agentName,
+    edgePrompt: options.ctx.edgePrompt,
  });
 }

-/**
- * Create an agent CLI entrypoint.
- * Parses argv (`<thread-id> <role>`), runs the agent, extracts structured output,
- * writes StepNode to CAS, and prints the new node hash to stdout.
- */
 export function createAgent(options: AgentOptions): () => Promise<void> {
  return async function main(): Promise<void> {
    const { threadId, role } = parseArgv(process.argv);
@@ -130,13 +114,36 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
      fail(`unknown role: ${role}`);
    }

-    const metaSchema = getSchema(ctx.meta.store, roleDef.meta);
-    if (metaSchema !== null) {
-      ctx.outputFormatInstruction = buildOutputFormatInstruction(metaSchema);
+    const frontmatterSchema = getSchema(ctx.meta.store, roleDef.frontmatter);
+    if (frontmatterSchema !== null) {
+      ctx.outputFormatInstruction = buildOutputFormatInstruction(frontmatterSchema);
+    }
+
+    let agentResult = await runWithMessage("agent run failed", () => options.run(ctx));
+
+    // Try to extract frontmatter; retry via continue if it fails
+    let outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
+
+    for (let retry = 0; retry < MAX_FRONTMATTER_RETRIES && outputHash === null; retry++) {
+      const correctionMessage =
+        "Your previous response did not contain valid YAML frontmatter matching the role schema.\n" +
+        "You MUST begin your response with a YAML frontmatter block (--- delimited).\n" +
+        "Please output ONLY the corrected frontmatter block followed by your work.";
+
+      agentResult = await runWithMessage("agent continue failed", () =>
+        options.continue(agentResult.sessionId, correctionMessage, ctx.meta.store),
+      );
+      outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
+    }
+
+    if (outputHash === null) {
+      fail(
+        "Agent output does not contain valid YAML frontmatter matching the role schema " +
+          `after ${MAX_FRONTMATTER_RETRIES} retries.\n` +
+          `Raw output (first 500 chars): ${agentResult.output.slice(0, 500)}`,
+      );
    }

-    const agentResult = await runAgent(options, ctx);
-    const outputHash = await extractOutput(agentResult.output, roleDef.meta, storageRoot, ctx);
    const stepHash = await persistStep({
      ctx,
      outputHash,
@@ -0,0 +1,75 @@
+import { randomBytes } from "node:crypto";
+import { mkdir, readFile, rename, writeFile } from "node:fs/promises";
+import { dirname, join } from "node:path";
+
+import type { ThreadId } from "@uncaged/workflow-protocol";
+
+import { resolveStorageRoot } from "./storage.js";
+
+type SessionCache = Record<string, string>;
+
+function getCachePath(): string {
+  return join(resolveStorageRoot(), "cache", "agent-sessions.json");
+}
+
+function cacheKey(threadId: ThreadId, role: string): string {
+  return `${threadId}:${role}`;
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+async function readCache(): Promise<SessionCache> {
+  const path = getCachePath();
+  try {
+    const text = await readFile(path, "utf8");
+    const raw = JSON.parse(text) as unknown;
+    if (!isRecord(raw)) {
+      return {};
+    }
+    const cache: SessionCache = {};
+    for (const [key, value] of Object.entries(raw)) {
+      if (typeof value === "string" && value !== "") {
+        cache[key] = value;
+      }
+    }
+    return cache;
+  } catch (e) {
+    const err = e as NodeJS.ErrnoException;
+    if (err.code === "ENOENT") {
+      return {};
+    }
+    throw e;
+  }
+}
+
+async function writeCache(cache: SessionCache): Promise<void> {
+  const path = getCachePath();
+  const dir = dirname(path);
+  await mkdir(dir, { recursive: true });
+  // Atomic write: write to temp file then rename to avoid partial reads on concurrent access.
+  // NOTE: Current workflow execution is serial (execFileSync), so true concurrency doesn't occur.
+  // This is a safety net for future parallel execution.
+  const tmpPath = join(dir, `.agent-sessions.${randomBytes(4).toString("hex")}.tmp`);
+  await writeFile(tmpPath, `${JSON.stringify(cache, null, 2)}\n`, "utf8");
+  await rename(tmpPath, path);
+}
+
+/** Read the cached session ID for a thread+role pair. */
+export async function getCachedSessionId(threadId: ThreadId, role: string): Promise<string | null> {
+  const cache = await readCache();
+  const sessionId = cache[cacheKey(threadId, role)];
+  return sessionId ?? null;
+}
+
+/** Write the session ID for a thread+role pair into the cache. */
+export async function setCachedSessionId(
+  threadId: ThreadId,
+  role: string,
+  sessionId: string,
+): Promise<void> {
+  const cache = await readCache();
+  cache[cacheKey(threadId, role)] = sessionId;
+  await writeCache(cache);
+}
@@ -12,16 +12,33 @@ export type AgentContext = ModeratorContext & {
   * role's output schema.  Populated by `createAgent` at run time.
   */
  outputFormatInstruction: string;
+  /**
+   * Edge prompt from the graph transition that led to this role (UWF_EDGE_PROMPT).
+   * Always the real moderator instruction for this step.
+   */
+  edgePrompt: string;
+  /**
+   * True when the current role has not appeared in steps history before this invocation.
+   */
+  isFirstVisit: boolean;
 };

 export type AgentRunResult = {
  output: string;
  detailHash: string;
+  sessionId: string;
 };

+export type AgentContinueFn = (
+  sessionId: string,
+  message: string,
+  store: AgentContext["store"],
+) => Promise<AgentRunResult>;
+
 export type AgentRunFn = (ctx: AgentContext) => Promise<AgentRunResult>;

 export type AgentOptions = {
  name: string;
  run: AgentRunFn;
+  continue: AgentContinueFn;
 };
@@ -0,0 +1,25 @@
+{
+  "$schema": "https://ui.shadcn.com/schema.json",
+  "style": "base-nova",
+  "rsc": false,
+  "tsx": true,
+  "tailwind": {
+    "config": "",
+    "css": "src/index.css",
+    "baseColor": "neutral",
+    "cssVariables": true,
+    "prefix": ""
+  },
+  "iconLibrary": "lucide",
+  "rtl": false,
+  "aliases": {
+    "components": "@/components",
+    "utils": "@/lib/utils",
+    "ui": "@/components/ui",
+    "lib": "@/lib",
+    "hooks": "@/hooks"
+  },
+  "menuColor": "default",
+  "menuAccent": "subtle",
+  "registries": {}
+}
@@ -0,0 +1,400 @@
+
+# Workflow UI — 开发上下文文档
+
+## 1. 项目定位
+
+workflow-dashboard 是一个 Web 图形编辑器，用于可视化展示和编辑工作流（Workflow）的结构。
+
+**核心场景**：
+- 用户本地执行 `uwf connect` 命令，通过 WebSocket 连接到此 Web 服务
+- CLI 将本地 YAML 工作流文件发送到 server
+- Server 解析后，提供图形化界面展示工作流的节点拓扑，允许用户进行逻辑编排和节点编辑
+- 编辑完成后，数据可回传给 CLI 或持久化
+
+## 2. 技术栈
+
+| 层 | 技术 | 说明 |
+|---|------|------|
+| 图编辑器 | @xyflow/react v12 | 节点/边渲染、拖拽、连线（strict 连接模式） |
+| 前端框架 | React 19 | UI 组件 |
+| 路由 | react-router v7 | Hash 模式路由 |
+| 状态管理 | 自研 (context.tsx) | 基于 useSyncExternalStore + Immer |
+| 样式 | Tailwind CSS v4 | 原子化 CSS |
+| 图标 | lucide-react | 图标库 |
+| 构建工具 | Vite 8 | Dev server + 打包 |
+| 后端框架 | Elysia | 轻量 REST API（当前为 stub） |
+
+## 3. 目录结构
+
+```
+workflow-dashboard/
+├── server.ts                 # Vite dev server 入口 (port 3000)
+├── vite.config.ts            # Vite 配置（react + tailwind + elysia 插件 + @ 别名）
+├── vite-dev.ts               # 自定义 Vite 插件
+├── components.json           # shadcn 配置
+├── server/
+│   ├── api.ts                # Elysia REST API (health + workflow CRUD)
+│   └── workflow.ts           # Workflow 文件读写 + 格式转换
+├── tmp/workflow/             # Workflow YAML 存储目录（开发阶段）
+├── src/
+│   ├── main.tsx              # React DOM 入口
+│   ├── router.tsx            # React Router 配置
+│   ├── app.tsx               # 根布局组件
+│   ├── lib/utils.ts          # Tailwind cn() 工具
+│   ├── components/ui/        # shadcn 组件（button, card, dialog, input, textarea）
+│   ├── pages/
+│   │   ├── home.tsx          # Home 列表页（workflow 管理）
+│   │   └── detail.tsx        # Workflow 详情/编辑页
+│   └── editor/               # ★ 核心编辑器
+│       ├── flow.tsx          # FlowEditor 组件 + 公开 API 导出
+│       ├── type.ts           # 内部类型定义
+│       ├── context.tsx       # 自研状态管理框架
+│       ├── injection.ts      # DI 容器（FlowModel / Injection）
+│       ├── model/            # 状态模型层
+│       ├── nodes/            # 节点渲染组件
+│       ├── edges/            # 边渲染组件
+│       ├── panel/            # UI 面板（工具栏、添加/编辑面板）
+│       ├── trans/            # 数据转换层（内外格式互转）
+│       ├── layout/           # 自动布局算法
+│       └── utils/            # 工具函数
+```
+
+## 4. 数据模型
+
+### 4.1 外部格式 — WorkFlowSteps（与 CLI 交换的数据）
+
+`WorkFlowSteps` 是 `WorkFlowStep[]`，每个 step 描述一个角色节点及其转移关系：
+
+```typescript
+type WorkFlowRole = {
+  name: string;          // 角色名称（唯一标识）
+  description: string;   // 角色描述
+  identity: string;      // 身份定义（system prompt）
+  prepare: string;       // 执行前准备指令
+  execute: string;       // 核心执行指令
+  report: string;        // 输出格式指令
+};
+
+type WorkFlowTransition = {
+  target: string;           // 目标角色名 或 'END'
+  condition: string | null; // 条件表达式，null 为 else（无条件兜底）
+};
+
+type WorkFlowStep = {
+  role: WorkFlowRole;
+  transitions: WorkFlowTransition[];
+};
+```
+
+### 4.2 内部格式 — ReactFlow Nodes & Edges
+
+编辑器内部使用 ReactFlow 的 Node/Edge 模型：
+
+**节点类型**：
+- `start` → 起始节点（右侧 1 个 source handle）
+- `end` → 结束节点（左侧 1 个 target handle）
+- `role` → 角色节点（6 个 handle，见下方）
+
+**Role 节点 Handle 布局**：
+
+| 位置 | 类型 | ID | 颜色 |
+|------|------|----|------|
+| 左侧 | target (in) | `input` | 蓝色 |
+| 上方 30% | target (in) | `input-top` | 蓝色 |
+| 下方 30% | target (in) | `input-bottom` | 蓝色 |
+| 右侧 | source (out) | `output` | 绿色 |
+| 上方 70% | source (out) | `output-top` | 绿色 |
+| 下方 70% | source (out) | `output-bottom` | 绿色 |
+
+- target handle 设置了 `isConnectableStart`，可以从 in 拖向 out 发起连线（`onConnect` 自动纠正方向）
+- source handle 设置了 `isConnectableEnd`
+
+**RoleNodeData** 对齐上游 `RoleDefinition`：
+```typescript
+type RoleNodeData = {
+  name: string;
+  description: string;
+  identity: string;
+  prepare: string;
+  execute: string;
+  report: string;
+};
+```
+
+**边类型**：
+- `default`（GradientEdge）→ 渐变色边（绿→蓝），节点仅有一条出边时使用
+- `conditional`（ConditionalEdge）→ 带条件标签的渐变色边，节点有多条出边时使用
+
+**边渲染特性**：
+- 渐变色：SVG linearGradient，从 source 端绿色（#10b981）到 target 端蓝色（#3b82f6）
+- 选中时：变为琥珀色（#f59e0b）单色，方便识别
+- 缺少条件时：红色（#ff5252）
+- 交互区域：20px 宽透明路径用于点击
+
+### 4.3 Else 分支机制
+
+当一个节点有多条 conditional 出边时：
+- **edges 数组中排第一个的 conditional 边自动成为 else**（兜底分支）
+- else 边显示灰色 `else` badge（不可点击，无需设置条件）
+- 其余边显示 `if` badge（需要设置条件，可点击编辑）
+- 只有一条 conditional 出边时不显示 else 标签
+- else 边在有 if 兄弟存在时不能被删除（`onBeforeDelete` 保护）
+- 序列化时 else 边输出 `condition: null`
+- 反序列化时 `condition: null` 的 transition 排序到第一个
+
+### 4.4 条件边自动升级与降级
+
+- **升级**：当用户从某节点拖出第二条边时，`edgesModel.onConnect` 自动将该节点所有出边升级为 `conditional` 类型。
+- **降级**：当删除 conditional 边后，若该 source 仅剩一条 conditional 出边，`handlers.onDelete` 自动将其降级回 `default` 类型。
+
+### 4.5 连线约束
+
+`onConnect` 中的校验逻辑：
+1. 禁止自连（source === target）
+2. 禁止同一对节点之间的重复边（source+target 去重）
+3. 方向归一化：从 input handle 拖到 output handle 时自动反转 source/target
+4. Handle 类型校验：source 端必须是 output handle，target 端必须是 input handle
+
+### 4.6 数据转换层（trans/）
+
+```
+WorkFlowSteps  ──transIn()──→  { nodes, edges }  ──transOut()──→  WorkFlowSteps
+                 （反序列化）                           （序列化）
+```
+
+- `transIn(steps)`: 外部步骤列表 → ReactFlow 节点和边
+- `transOut(nodes, edges)`: ReactFlow 节点和边 → 外部步骤列表
+- `validate(nodes, edges)`: 校验图结构合法性
+
+三个函数都是**纯函数**。
+
+### 4.7 验证规则
+
+1. start 恰好 1 个，输出恰好 1 条
+2. end 恰好 1 个，输入 ≥1 条，输出 0 条
+3. role 节点：输入 ≥1、输出 ≥1
+4. 多输出时：第一条 conditional 边为 else（跳过 condition 检查），其余必须有非空 condition
+5. role 节点总数 ≥2
+6. 无孤立节点（正向 BFS 从 start 可达 + 反向 BFS 从 end 可达）
+
+## 5. 架构分层
+
+### 5.1 状态管理框架（context.tsx）
+
+自研的轻量响应式系统，核心概念：
+
+| 概念 | 说明 |
+|------|------|
+| `generate<T>()` | 创建响应式 store（get/set/use/listen） |
+| `SubModel<T, A>` | 状态切片模板（name + make() + create()） |
+| `Model` | 事务管理器 + undo/redo 栈 |
+| `define.model()` | 定义有状态有 actions 的模型 |
+| `define.view()` | 定义只读视图模型 |
+| `define.memoize()` | 定义缓存计算模型 |
+| `define.compute()` | 定义响应式依赖计算（自动追踪） |
+
+使用 `useSyncExternalStore` 桥接 React 渲染。
+
+### 5.2 模型层（model/）
+
+| 模型 | 文件 | 职责 |
+|------|------|------|
+| `nodesModel` | nodes.ts | 节点数组状态 + CRUD 操作 |
+| `edgesModel` | edges.ts | 边数组状态 + 连线 + conditional 自动升级 + 连线约束 |
+| `addNodeViewModel` | add-node-view.ts | 添加节点面板的 UI 状态 |
+| `editNodeViewModel` | edit-node-view.ts | 编辑节点面板的 UI 状态 |
+| `injection` | inject.ts | DI 实例视图模型 |
+| `handlers` | handlers.ts | 事件处理器集合（拖拽、连线、删除保护、快捷键、布局、加载/保存） |
+
+### 5.3 DI 容器（injection.ts）
+
+```
+FlowModel（公开 API）          Injection（内部实现）
+  ├─ load(steps)  ──emit──→     emit('load', steps)  → handlers.loadSteps()
+  ├─ on('save', cb)              emit('save', steps)  ← handlers.saveData()
+  └─ 持有 Injection 实例
+```
+
+- `FlowModel` 是外部消费者唯一接触的类，提供 `load()` 和 `on('save')` 接口
+- 构造函数接受可选的 `inital_steps` 参数，用于加载默认工作流
+- `Injection` 是内部事件总线，解耦 server 通信与 UI 状态
+
+### 5.4 事务与 Undo/Redo
+
+Model 提供事务机制：
+- `startTransaction()` 快照当前状态
+- `endTransaction()` 将快照推入 undo 栈
+- Ctrl+Z / Ctrl+Y 触发撤销/重做
+- 拖拽、添加节点、删除等操作自动包裹在事务中
+
+## 6. 节点体系
+
+### 6.1 渲染组件
+
+```
+ReactFlow
+  ├─ nodeTypes: { start: NodeStart, end: NodeEnd, role: NodeRole }
+  └─ edgeTypes: { default: GradientEdge, conditional: ConditionalEdge }
+```
+
+`NodeRole` 显示角色名（data.name），使用 teal 色系图标和标签。Handle 分蓝色（in）和绿色（out）两种颜色。
+
+### 6.2 节点编辑
+
+角色节点的编辑器直接内联在 AddNodePanel 和 EditNodePanel 中，可编辑字段：
+- name（必填）
+- description、identity、prepare、execute、report（textarea）
+
+## 7. UI 面板
+
+| 面板 | 位置 | 内容 |
+|------|------|------|
+| Toolbar | 顶部居中 | Undo/Redo、添加角色、自动布局、保存 |
+| AddNodePanel | 右下角 | 角色节点创建表单（name + 6 字段 → 确认） |
+| EditNodePanel | 右下角 | 角色节点编辑表单（预填当前数据 → 确认） |
+
+AddNodePanel 和 EditNodePanel 互斥显示，点击外部自动关闭。
+
+## 8. 自动布局（layout/）
+
+`LayoutLR(nodes, edges)` 算法：
+1. 拓扑排序分层（BFS，start → layer 0，end → max+1）
+2. 按层分组
+3. 计算 X/Y 坐标（水平间距 80px，垂直间距 40px）
+4. 无变化时返回原数组（避免无效重渲染）
+
+## 9. 核心数据流
+
+### 加载工作流
+
+```
+FlowModel.load(steps) / FlowModel(initialSteps)
+  → Injection.emit('load', steps)
+  → handlers.loadSteps()
+  → transIn(steps) → { nodes, edges }
+    （condition: null 的 transition 排序到第一个，成为 else）
+  → nodesModel.set(nodes)
+  → edgesModel.set(edges)
+  → autoLayoutLR()
+  → model.reset()（清空 undo/redo）
+```
+
+### 保存工作流
+
+```
+用户点击 Save
+  → handlers.saveData()
+  → validate(nodes, edges)
+  → 校验失败 → Toast 提示错误
+  → 校验通过 → transOut(nodes, edges) → WorkFlowSteps
+    （第一条 conditional 边序列化为 condition: null）
+  → Injection.emit('save', steps)
+  → FlowModel.emit('save', steps)
+  → 外部消费者（server/CLI）接收
+```
+
+### 连线与条件边升级
+
+```
+用户拖线连接两个节点
+  → edgesModel.onConnect(params)
+  → normalizeConnection（方向纠正）
+  → 校验（自连、重复、handle 类型）
+  → 检查 source 已有出边数量
+  → 已有出边 → 新边 + 已有边全部升级为 conditional
+  → 首条出边 → 创建普通边
+```
+
+### 删除保护
+
+```
+用户选中节点/边按 Delete
+  → handlers.onBeforeDelete({ nodes, edges })
+  → start/end 节点 → 阻止
+  → else 边（有 if 兄弟时）→ 阻止
+  → 其他 → 允许
+```
+
+## 10. 上游数据模型参考
+
+workflow-dashboard 消费的 YAML 工作流最终映射自 `WorkflowPayload`（定义在 workflow-protocol）：
+
+```typescript
+type WorkflowPayload = {
+  name: string;
+  description: string;
+  roles: Record<string, RoleDefinition>;       // 角色定义（4 段式：identity/prepare/execute/report）
+  conditions: Record<string, ConditionDefinition>; // JSONata 条件表达式
+  graph: Record<string, Transition[]>;          // 角色间的转移图
+};
+```
+
+workflow-dashboard 使用 `WorkFlowSteps` 格式作为交换数据，其中 `WorkFlowRole` 的字段与 `RoleDefinition` 对齐（description/identity/prepare/execute/report），`WorkFlowTransition` 对应 graph 中的 `Transition`。外部（CLI/server）负责 `WorkflowPayload` ↔ `WorkFlowSteps` 的转换。
+
+## 11. 当前状态与待完善项
+
+- **WebSocket 集成**: 尚未实现，CLI connect 的 WebSocket 通信待开发
+- **验证**: 图结构校验 + 可达性检测 + else 分支规则已实现
+- **只读模式**: Detail 页面有"编辑/预览"切换按钮，但编辑器尚未实现真正的只读模式（禁止交互）
+
+## 12. 业务系统
+
+### 12.1 路由
+
+| 路由 | 页面 | 文件 |
+|------|------|------|
+| `/` | Home — Workflow 列表 | `src/pages/home.tsx` |
+| `/workflow/:name` | Detail — 预览/编辑 | `src/pages/detail.tsx` |
+
+### 12.2 后端 API
+
+Elysia REST API（`server/api.ts`），通过 Vite 插件（`vite-dev.ts`）集成到 dev server。
+
+| Method | Path | 说明 |
+|--------|------|------|
+| GET | `/api/workflows` | 列出所有 workflow（name + description） |
+| GET | `/api/workflows/:name` | 获取单个 workflow（返回 WorkFlowSteps JSON） |
+| POST | `/api/workflows` | 新建 workflow（body: `{name, description}`） |
+| PUT | `/api/workflows/:name` | 保存 workflow（body: WorkFlowSteps JSON） |
+| DELETE | `/api/workflows/:name` | 删除 workflow |
+
+### 12.3 数据存储
+
+- 存储目录：`tmp/workflow/`，文件名 `{name}.yaml`
+- 存储格式：WorkflowPayload YAML（与上游 workflow-protocol 一致）
+- Server 端负责 WorkflowPayload ↔ WorkFlowSteps 转换（`server/workflow.ts`）
+
+字段映射：
+| WorkFlowRole | RoleDefinition |
+|--------------|---------------|
+| name | roles map key |
+| description | description |
+| identity | goal |
+| prepare | capabilities (join/split by `\n`) |
+| execute | procedure |
+| report | output |
+
+条件映射：WorkFlowTransition.condition 存储表达式字符串，保存时提取为 named conditions map。
+
+### 12.4 shadcn/ui
+
+已初始化 shadcn（`components.json`），使用 `@` 路径别名。已安装组件：
+- button、card、dialog、input、textarea
+- 组件位于 `src/components/ui/`
+
+### 12.5 目录结构更新
+
+```
+workflow-dashboard/
+├── server/
+│   ├── api.ts                # Elysia REST API（health + workflow CRUD）
+│   └── workflow.ts           # Workflow 文件读写 + 格式转换
+├── src/
+│   ├── components/ui/        # shadcn 组件
+│   ├── pages/
+│   │   ├── home.tsx          # Home 列表页
+│   │   └── detail.tsx        # Workflow 详情/编辑页
+│   └── ...
+├── tmp/workflow/             # Workflow YAML 存储目录（开发阶段）
+└── components.json           # shadcn 配置
+```
@@ -0,0 +1,21 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Workflow UI</title>
+    <link rel="stylesheet" href="./src/index.css" />
+    <script>
+      (() => {
+        const t = localStorage.getItem("theme");
+        if (t === "dark" || (!t && matchMedia("(prefers-color-scheme: dark)").matches)) {
+          document.documentElement.classList.add("dark");
+        }
+      })();
+    </script>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="./src/main.tsx"></script>
+  </body>
+</html>
@@ -0,0 +1,38 @@
+{
+  "name": "@uncaged/workflow-dashboard",
+  "version": "0.5.0-alpha.4",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "bun server.ts",
+    "build": "vite build"
+  },
+  "dependencies": {
+    "@base-ui/react": "^1.5.0",
+    "@fontsource-variable/geist": "^5.2.9",
+    "@uncaged/workflow-protocol": "workspace:*",
+    "@xyflow/react": "^12.10.2",
+    "class-variance-authority": "^0.7.1",
+    "clsx": "^2.1.1",
+    "elysia": "^1.4.28",
+    "immer": "^11.1.8",
+    "lucide-react": "^1.16.0",
+    "react": "^19.2.6",
+    "react-dom": "^19.2.6",
+    "react-router": "^7.15.1",
+    "shadcn": "^4.8.0",
+    "tailwind-merge": "^3.6.0",
+    "tw-animate-css": "^1.4.0",
+    "yaml": "^2.9.0"
+  },
+  "devDependencies": {
+    "@tailwindcss/vite": "^4.3.0",
+    "@types/bun": "^1.2.14",
+    "@types/react": "^19.2.14",
+    "@types/react-dom": "^19.2.3",
+    "@vitejs/plugin-react": "^6.0.2",
+    "tailwindcss": "^4.2.4",
+    "typescript": "^5.8.3",
+    "vite": "^8.0.13"
+  }
+}
@@ -0,0 +1,9 @@
+import { createServer } from "vite";
+
+const PORT = 3000;
+
+const server = await createServer({
+  server: { port: PORT },
+});
+
+await server.listen();
@@ -0,0 +1,78 @@
+import { Elysia, t } from "elysia";
+import type { WorkFlowSteps } from "../shared/types.ts";
+import {
+  createWorkflow,
+  deleteWorkflow,
+  getWorkflow,
+  listWorkflows,
+  saveWorkflow,
+} from "./workflow.ts";
+
+export function createApi() {
+  return new Elysia({ prefix: "/api" })
+    .get("/health", () => ({ status: "ok" }))
+    .get("/workflows", () => listWorkflows())
+    .get("/workflows/:name", async ({ params }) => {
+      try {
+        const steps = await getWorkflow(params.name);
+        return steps;
+      } catch {
+        return new Response(JSON.stringify({ error: "not found" }), {
+          status: 404,
+          headers: { "Content-Type": "application/json" },
+        });
+      }
+    })
+    .post(
+      "/workflows",
+      async ({ body }) => {
+        await createWorkflow(body.name, body.description);
+        return { ok: true };
+      },
+      {
+        body: t.Object({
+          name: t.String(),
+          description: t.String(),
+        }),
+      },
+    )
+    .put(
+      "/workflows/:name",
+      async ({ params, body }) => {
+        const steps: WorkFlowSteps = typeof body === "string" ? JSON.parse(body) : body;
+        await saveWorkflow(params.name, steps);
+        return { ok: true };
+      },
+      {
+        body: t.Array(
+          t.Object({
+            role: t.Object({
+              name: t.String(),
+              description: t.String(),
+              identity: t.String(),
+              prepare: t.String(),
+              execute: t.String(),
+              report: t.String(),
+            }),
+            transitions: t.Array(
+              t.Object({
+                target: t.String(),
+                condition: t.Union([t.String(), t.Null()]),
+              }),
+            ),
+          }),
+        ),
+      },
+    )
+    .delete("/workflows/:name", async ({ params }) => {
+      try {
+        await deleteWorkflow(params.name);
+        return { ok: true };
+      } catch {
+        return new Response(JSON.stringify({ error: "not found" }), {
+          status: 404,
+          headers: { "Content-Type": "application/json" },
+        });
+      }
+    });
+}
@@ -0,0 +1,150 @@
+import { mkdir, readdir, readFile, unlink, writeFile } from "node:fs/promises";
+import { join } from "node:path";
+import type { RoleDefinition, Transition, WorkflowPayload } from "@uncaged/workflow-protocol";
+import YAML from "yaml";
+import type { WorkFlowSteps, WorkFlowTransition, WorkflowSummary } from "../shared/types.ts";
+
+const WORKFLOW_DIR = join(import.meta.dirname, "..", "tmp", "workflow");
+
+async function ensureDir() {
+  await mkdir(WORKFLOW_DIR, { recursive: true });
+}
+
+function payloadToSteps(payload: WorkflowPayload): WorkFlowSteps {
+  const conditionMap = new Map<string, string>();
+  for (const [name, def] of Object.entries(payload.conditions)) {
+    conditionMap.set(name, def.expression);
+  }
+
+  const steps: WorkFlowSteps = [];
+  for (const [roleName, roleDef] of Object.entries(payload.roles)) {
+    const graphTransitions = payload.graph[roleName] ?? [];
+    const transitions: WorkFlowTransition[] = graphTransitions.map((t) => ({
+      target: t.role === "$END" ? "END" : t.role,
+      condition: t.condition ? (conditionMap.get(t.condition) ?? t.condition) : null,
+    }));
+
+    steps.push({
+      role: {
+        name: roleName,
+        description: roleDef.description,
+        identity: roleDef.goal,
+        prepare: roleDef.capabilities.join("\n"),
+        execute: roleDef.procedure,
+        report: roleDef.output,
+      },
+      transitions,
+    });
+  }
+
+  return steps;
+}
+
+function stepsToPayload(name: string, description: string, steps: WorkFlowSteps): WorkflowPayload {
+  const roles: Record<string, RoleDefinition> = {};
+  const conditions: WorkflowPayload["conditions"] = {};
+  const graph: Record<string, Transition[]> = {};
+
+  const expressionToName = new Map<string, string>();
+  let condIdx = 0;
+
+  for (const step of steps) {
+    const r = step.role;
+    roles[r.name] = {
+      description: r.description,
+      goal: r.identity,
+      capabilities: r.prepare ? r.prepare.split("\n").filter(Boolean) : [],
+      procedure: r.execute,
+      output: r.report,
+      frontmatter: "",
+    };
+
+    const transitions: Transition[] = step.transitions.map((t) => {
+      let condName: string | null = null;
+      if (t.condition) {
+        if (expressionToName.has(t.condition)) {
+          condName = expressionToName.get(t.condition) ?? null;
+        } else {
+          condName = `cond${condIdx++}`;
+          expressionToName.set(t.condition, condName);
+          conditions[condName] = {
+            description: "",
+            expression: t.condition,
+          };
+        }
+      }
+      const targetRole = t.target === "END" ? "$END" : t.target;
+      return {
+        role: targetRole,
+        condition: condName,
+        prompt: `Transition to ${targetRole}.`,
+      };
+    });
+
+    graph[r.name] = transitions;
+  }
+
+  if (steps.length > 0) {
+    const firstRole = steps[0].role.name;
+    graph.$START = [
+      {
+        role: firstRole,
+        condition: null,
+        prompt: `Begin workflow at role ${firstRole}.`,
+      },
+    ];
+  }
+
+  return { name, description, roles, conditions, graph };
+}
+
+export async function listWorkflows(): Promise<WorkflowSummary[]> {
+  await ensureDir();
+  const files = await readdir(WORKFLOW_DIR);
+  const results: WorkflowSummary[] = [];
+
+  for (const file of files) {
+    if (!file.endsWith(".yaml")) continue;
+    const content = await readFile(join(WORKFLOW_DIR, file), "utf-8");
+    const payload = YAML.parse(content) as WorkflowPayload;
+    results.push({ name: payload.name, description: payload.description });
+  }
+
+  return results;
+}
+
+export async function getWorkflow(name: string): Promise<WorkFlowSteps> {
+  const content = await readFile(join(WORKFLOW_DIR, `${name}.yaml`), "utf-8");
+  const payload = YAML.parse(content) as WorkflowPayload;
+  return payloadToSteps(payload);
+}
+
+export async function createWorkflow(name: string, description: string): Promise<void> {
+  await ensureDir();
+  const payload: WorkflowPayload = {
+    name,
+    description,
+    roles: {},
+    conditions: {},
+    graph: {},
+  };
+  await writeFile(join(WORKFLOW_DIR, `${name}.yaml`), YAML.stringify(payload), "utf-8");
+}
+
+export async function saveWorkflow(name: string, steps: WorkFlowSteps): Promise<void> {
+  const filePath = join(WORKFLOW_DIR, `${name}.yaml`);
+  let description = "";
+  try {
+    const existing = await readFile(filePath, "utf-8");
+    const existingPayload = YAML.parse(existing) as WorkflowPayload;
+    description = existingPayload.description;
+  } catch {
+    // file doesn't exist, use empty description
+  }
+  const payload = stepsToPayload(name, description, steps);
+  await writeFile(filePath, YAML.stringify(payload), "utf-8");
+}
+
+export async function deleteWorkflow(name: string): Promise<void> {
+  await unlink(join(WORKFLOW_DIR, `${name}.yaml`));
+}
@@ -0,0 +1,25 @@
+export type WorkFlowRole = {
+  name: string;
+  description: string;
+  identity: string;
+  prepare: string;
+  execute: string;
+  report: string;
+};
+
+export type WorkFlowTransition = {
+  target: string;
+  condition: string | null;
+};
+
+export type WorkFlowStep = {
+  role: WorkFlowRole;
+  transitions: WorkFlowTransition[];
+};
+
+export type WorkFlowSteps = WorkFlowStep[];
+
+export type WorkflowSummary = {
+  name: string;
+  description: string;
+};
@@ -0,0 +1,10 @@
+import type { ReactNode } from "react";
+import { Outlet } from "react-router";
+
+export function Layout(): ReactNode {
+  return (
+    <div className="h-screen w-screen bg-background text-foreground">
+      <Outlet />
+    </div>
+  );
+}
@@ -0,0 +1,58 @@
+import { Button as ButtonPrimitive } from "@base-ui/react/button";
+import { cva, type VariantProps } from "class-variance-authority";
+
+import { cn } from "@/lib/utils";
+
+const buttonVariants = cva(
+  "group/button inline-flex shrink-0 items-center justify-center rounded-lg border border-transparent bg-clip-padding text-sm font-medium whitespace-nowrap transition-all outline-none select-none focus-visible:border-ring focus-visible:ring-3 focus-visible:ring-ring/50 active:not-aria-[haspopup]:translate-y-px disabled:pointer-events-none disabled:opacity-50 aria-invalid:border-destructive aria-invalid:ring-3 aria-invalid:ring-destructive/20 dark:aria-invalid:border-destructive/50 dark:aria-invalid:ring-destructive/40 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4",
+  {
+    variants: {
+      variant: {
+        default: "bg-primary text-primary-foreground [a]:hover:bg-primary/80",
+        outline:
+          "border-border bg-background hover:bg-muted hover:text-foreground aria-expanded:bg-muted aria-expanded:text-foreground dark:border-input dark:bg-input/30 dark:hover:bg-input/50",
+        secondary:
+          "bg-secondary text-secondary-foreground hover:bg-secondary/80 aria-expanded:bg-secondary aria-expanded:text-secondary-foreground",
+        ghost:
+          "hover:bg-muted hover:text-foreground aria-expanded:bg-muted aria-expanded:text-foreground dark:hover:bg-muted/50",
+        destructive:
+          "bg-destructive/10 text-destructive hover:bg-destructive/20 focus-visible:border-destructive/40 focus-visible:ring-destructive/20 dark:bg-destructive/20 dark:hover:bg-destructive/30 dark:focus-visible:ring-destructive/40",
+        link: "text-primary underline-offset-4 hover:underline",
+      },
+      size: {
+        default:
+          "h-8 gap-1.5 px-2.5 has-data-[icon=inline-end]:pr-2 has-data-[icon=inline-start]:pl-2",
+        xs: "h-6 gap-1 rounded-[min(var(--radius-md),10px)] px-2 text-xs in-data-[slot=button-group]:rounded-lg has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&_svg:not([class*='size-'])]:size-3",
+        sm: "h-7 gap-1 rounded-[min(var(--radius-md),12px)] px-2.5 text-[0.8rem] in-data-[slot=button-group]:rounded-lg has-data-[icon=inline-end]:pr-1.5 has-data-[icon=inline-start]:pl-1.5 [&_svg:not([class*='size-'])]:size-3.5",
+        lg: "h-9 gap-1.5 px-2.5 has-data-[icon=inline-end]:pr-2 has-data-[icon=inline-start]:pl-2",
+        icon: "size-8",
+        "icon-xs":
+          "size-6 rounded-[min(var(--radius-md),10px)] in-data-[slot=button-group]:rounded-lg [&_svg:not([class*='size-'])]:size-3",
+        "icon-sm":
+          "size-7 rounded-[min(var(--radius-md),12px)] in-data-[slot=button-group]:rounded-lg",
+        "icon-lg": "size-9",
+      },
+    },
+    defaultVariants: {
+      variant: "default",
+      size: "default",
+    },
+  },
+);
+
+function Button({
+  className,
+  variant = "default",
+  size = "default",
+  ...props
+}: ButtonPrimitive.Props & VariantProps<typeof buttonVariants>) {
+  return (
+    <ButtonPrimitive
+      data-slot="button"
+      className={cn(buttonVariants({ variant, size, className }))}
+      {...props}
+    />
+  );
+}
+
+export { Button, buttonVariants };
@@ -0,0 +1,92 @@
+import type * as React from "react";
+
+import { cn } from "@/lib/utils";
+
+function Card({
+  className,
+  size = "default",
+  ...props
+}: React.ComponentProps<"div"> & { size?: "default" | "sm" }) {
+  return (
+    <div
+      data-slot="card"
+      data-size={size}
+      className={cn(
+        "group/card flex flex-col gap-4 overflow-hidden rounded-xl bg-card py-4 text-sm text-card-foreground ring-1 ring-foreground/10 has-data-[slot=card-footer]:pb-0 has-[>img:first-child]:pt-0 data-[size=sm]:gap-3 data-[size=sm]:py-3 data-[size=sm]:has-data-[slot=card-footer]:pb-0 *:[img:first-child]:rounded-t-xl *:[img:last-child]:rounded-b-xl",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function CardHeader({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-header"
+      className={cn(
+        "group/card-header @container/card-header grid auto-rows-min items-start gap-1 rounded-t-xl px-4 group-data-[size=sm]/card:px-3 has-data-[slot=card-action]:grid-cols-[1fr_auto] has-data-[slot=card-description]:grid-rows-[auto_auto] [.border-b]:pb-4 group-data-[size=sm]/card:[.border-b]:pb-3",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function CardTitle({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-title"
+      className={cn(
+        "font-heading text-base leading-snug font-medium group-data-[size=sm]/card:text-sm",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function CardDescription({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-description"
+      className={cn("text-sm text-muted-foreground", className)}
+      {...props}
+    />
+  );
+}
+
+function CardAction({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-action"
+      className={cn("col-start-2 row-span-2 row-start-1 self-start justify-self-end", className)}
+      {...props}
+    />
+  );
+}
+
+function CardContent({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-content"
+      className={cn("px-4 group-data-[size=sm]/card:px-3", className)}
+      {...props}
+    />
+  );
+}
+
+function CardFooter({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div
+      data-slot="card-footer"
+      className={cn(
+        "flex items-center rounded-b-xl border-t bg-muted/50 p-4 group-data-[size=sm]/card:p-3",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+export { Card, CardAction, CardContent, CardDescription, CardFooter, CardHeader, CardTitle };
@@ -0,0 +1,135 @@
+import { Dialog as DialogPrimitive } from "@base-ui/react/dialog";
+import { XIcon } from "lucide-react";
+import type * as React from "react";
+import { Button } from "@/components/ui/button";
+import { cn } from "@/lib/utils";
+
+function Dialog({ ...props }: DialogPrimitive.Root.Props) {
+  return <DialogPrimitive.Root data-slot="dialog" {...props} />;
+}
+
+function DialogTrigger({ ...props }: DialogPrimitive.Trigger.Props) {
+  return <DialogPrimitive.Trigger data-slot="dialog-trigger" {...props} />;
+}
+
+function DialogPortal({ ...props }: DialogPrimitive.Portal.Props) {
+  return <DialogPrimitive.Portal data-slot="dialog-portal" {...props} />;
+}
+
+function DialogClose({ ...props }: DialogPrimitive.Close.Props) {
+  return <DialogPrimitive.Close data-slot="dialog-close" {...props} />;
+}
+
+function DialogOverlay({ className, ...props }: DialogPrimitive.Backdrop.Props) {
+  return (
+    <DialogPrimitive.Backdrop
+      data-slot="dialog-overlay"
+      className={cn(
+        "fixed inset-0 isolate z-50 bg-black/10 duration-100 supports-backdrop-filter:backdrop-blur-xs data-open:animate-in data-open:fade-in-0 data-closed:animate-out data-closed:fade-out-0",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+function DialogContent({
+  className,
+  children,
+  showCloseButton = true,
+  ...props
+}: DialogPrimitive.Popup.Props & {
+  showCloseButton?: boolean;
+}) {
+  return (
+    <DialogPortal>
+      <DialogOverlay />
+      <DialogPrimitive.Popup
+        data-slot="dialog-content"
+        className={cn(
+          "fixed top-1/2 left-1/2 z-50 grid w-full max-w-[calc(100%-2rem)] -translate-x-1/2 -translate-y-1/2 gap-4 rounded-xl bg-popover p-4 text-sm text-popover-foreground ring-1 ring-foreground/10 duration-100 outline-none sm:max-w-sm data-open:animate-in data-open:fade-in-0 data-open:zoom-in-95 data-closed:animate-out data-closed:fade-out-0 data-closed:zoom-out-95",
+          className,
+        )}
+        {...props}
+      >
+        {children}
+        {showCloseButton && (
+          <DialogPrimitive.Close
+            data-slot="dialog-close"
+            render={<Button variant="ghost" className="absolute top-2 right-2" size="icon-sm" />}
+          >
+            <XIcon />
+            <span className="sr-only">Close</span>
+          </DialogPrimitive.Close>
+        )}
+      </DialogPrimitive.Popup>
+    </DialogPortal>
+  );
+}
+
+function DialogHeader({ className, ...props }: React.ComponentProps<"div">) {
+  return (
+    <div data-slot="dialog-header" className={cn("flex flex-col gap-2", className)} {...props} />
+  );
+}
+
+function DialogFooter({
+  className,
+  showCloseButton = false,
+  children,
+  ...props
+}: React.ComponentProps<"div"> & {
+  showCloseButton?: boolean;
+}) {
+  return (
+    <div
+      data-slot="dialog-footer"
+      className={cn(
+        "-mx-4 -mb-4 flex flex-col-reverse gap-2 rounded-b-xl border-t bg-muted/50 p-4 sm:flex-row sm:justify-end",
+        className,
+      )}
+      {...props}
+    >
+      {children}
+      {showCloseButton && (
+        <DialogPrimitive.Close render={<Button variant="outline" />}>Close</DialogPrimitive.Close>
+      )}
+    </div>
+  );
+}
+
+function DialogTitle({ className, ...props }: DialogPrimitive.Title.Props) {
+  return (
+    <DialogPrimitive.Title
+      data-slot="dialog-title"
+      className={cn("font-heading text-base leading-none font-medium", className)}
+      {...props}
+    />
+  );
+}
+
+function DialogDescription({ className, ...props }: DialogPrimitive.Description.Props) {
+  return (
+    <DialogPrimitive.Description
+      data-slot="dialog-description"
+      className={cn(
+        "text-sm text-muted-foreground *:[a]:underline *:[a]:underline-offset-3 *:[a]:hover:text-foreground",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+export {
+  Dialog,
+  DialogClose,
+  DialogContent,
+  DialogDescription,
+  DialogFooter,
+  DialogHeader,
+  DialogOverlay,
+  DialogPortal,
+  DialogTitle,
+  DialogTrigger,
+};
@@ -0,0 +1,20 @@
+import { Input as InputPrimitive } from "@base-ui/react/input";
+import type * as React from "react";
+
+import { cn } from "@/lib/utils";
+
+function Input({ className, type, ...props }: React.ComponentProps<"input">) {
+  return (
+    <InputPrimitive
+      type={type}
+      data-slot="input"
+      className={cn(
+        "h-8 w-full min-w-0 rounded-lg border border-input bg-transparent px-2.5 py-1 text-base transition-colors outline-none file:inline-flex file:h-6 file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-muted-foreground focus-visible:border-ring focus-visible:ring-3 focus-visible:ring-ring/50 disabled:pointer-events-none disabled:cursor-not-allowed disabled:bg-input/50 disabled:opacity-50 aria-invalid:border-destructive aria-invalid:ring-3 aria-invalid:ring-destructive/20 md:text-sm dark:bg-input/30 dark:disabled:bg-input/80 dark:aria-invalid:border-destructive/50 dark:aria-invalid:ring-destructive/40",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+export { Input };
@@ -0,0 +1,19 @@
+import type * as React from "react";
+
+import { cn } from "@/lib/utils";
+
+function Label({ className, ...props }: React.ComponentProps<"label">) {
+  return (
+    // biome-ignore lint/a11y/noLabelWithoutControl: generic Label component; control association handled by consumer
+    <label
+      data-slot="label"
+      className={cn(
+        "flex items-center gap-2 text-sm leading-none font-medium select-none group-data-[disabled=true]:pointer-events-none group-data-[disabled=true]:opacity-50 peer-disabled:cursor-not-allowed peer-disabled:opacity-50",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+export { Label };
@@ -0,0 +1,21 @@
+"use client";
+
+import { Separator as SeparatorPrimitive } from "@base-ui/react/separator";
+
+import { cn } from "@/lib/utils";
+
+function Separator({ className, orientation = "horizontal", ...props }: SeparatorPrimitive.Props) {
+  return (
+    <SeparatorPrimitive
+      data-slot="separator"
+      orientation={orientation}
+      className={cn(
+        "shrink-0 bg-border data-horizontal:h-px data-horizontal:w-full data-vertical:w-px data-vertical:self-stretch",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+export { Separator };
@@ -0,0 +1,18 @@
+import type * as React from "react";
+
+import { cn } from "@/lib/utils";
+
+function Textarea({ className, ...props }: React.ComponentProps<"textarea">) {
+  return (
+    <textarea
+      data-slot="textarea"
+      className={cn(
+        "flex field-sizing-content min-h-16 w-full rounded-lg border border-input bg-transparent px-2.5 py-2 text-base transition-colors outline-none placeholder:text-muted-foreground focus-visible:border-ring focus-visible:ring-3 focus-visible:ring-ring/50 disabled:cursor-not-allowed disabled:bg-input/50 disabled:opacity-50 aria-invalid:border-destructive aria-invalid:ring-3 aria-invalid:ring-destructive/20 md:text-sm dark:bg-input/30 dark:disabled:bg-input/80 dark:aria-invalid:border-destructive/50 dark:aria-invalid:ring-destructive/40",
+        className,
+      )}
+      {...props}
+    />
+  );
+}
+
+export { Textarea };
@@ -0,0 +1,319 @@
+import { type ReactFlowInstance, useReactFlow } from "@xyflow/react";
+import type { FC, PropsWithChildren } from "react";
+import { createContext, useContext, useLayoutEffect, useMemo, useSyncExternalStore } from "react";
+import type { AnyWorkNode } from "./type";
+
+type Reduce<T> = (data: T) => T;
+type Setter<T> = (ch: Reduce<T> | T) => void;
+
+interface State<T, A> {
+  readonly get: () => T;
+  readonly set: Setter<T>;
+  readonly use: () => T;
+  readonly listen: (cb: VoidFunction) => VoidFunction;
+  readonly actions: A;
+  readonly onlyView: boolean;
+}
+type Use = <T, A>(sub: SubModel<T, A>) => [T, A];
+// biome-ignore lint/suspicious/noExplicitAny: UseV intentionally erases the action type
+type UseV = <T>(sub: SubModel<T, any>) => T;
+type Create<T, A> = (set: Setter<T>, get: () => T, model: Model) => A;
+
+export const uuid = () => Math.round((Math.random() + 1) * Date.now()).toString(36);
+
+export function generate<T>(val: T) {
+  const listener = new Set<VoidFunction>();
+  const get = () => val;
+  function set(ch: T | ((prev: T) => T)) {
+    const next = typeof ch === "function" ? (ch as (prev: T) => T)(val) : ch;
+    if (Object.is(val, next)) return;
+    val = next;
+    for (const call of listener) {
+      call();
+    }
+  }
+  const listen = (call: VoidFunction) => {
+    listener.add(call);
+    return () => listener.delete(call);
+  };
+  const use = () => useSyncExternalStore(listen, get, get);
+  return { get, set, use, listen };
+}
+
+class SubModel<T, A> {
+  public readonly name: string;
+  private readonly make: () => T;
+  private readonly create: Create<T, A>;
+  private readonly onlyView: boolean;
+
+  constructor(name: string, _make: () => T, _create: Create<T, A>, _onlyView = false) {
+    this.name = name;
+    this.make = _make;
+    this.create = _create;
+    this.onlyView = _onlyView;
+  }
+
+  public gen(model: Model): State<T, A> {
+    const { get, set, use, listen } = generate(this.make());
+    const actions = this.create(set, get, model);
+    return { get, set, use, listen, actions, onlyView: this.onlyView };
+  }
+
+  use(): [T, A] {
+    // biome-ignore lint/correctness/useHookAtTopLevel: use() is called as a hook by consumers
+    const { query } = useContext(Context);
+    const { use, actions } = query(this);
+    return [use(), actions];
+  }
+  useData(): T {
+    const { query } = useContext(Context);
+    return query(this).use();
+  }
+  useCreation(): A {
+    const { query } = useContext(Context);
+    return query(this).actions;
+  }
+}
+
+// biome-ignore lint/suspicious/noExplicitAny: snapshot data is heterogeneous
+type Snapshot = [name: string, data: any];
+class Model {
+  private ustack: Snapshot[][] = [];
+  private rstack: Snapshot[][] = [];
+  private transaction = 0;
+  // biome-ignore lint/suspicious/noExplicitAny: backup stores heterogeneous state values
+  private backup = new Map<string, any>();
+  public flow = {} as ReactFlowInstance<AnyWorkNode>;
+  private stackListeners = new Set<() => void>();
+  public readonly stackState: readonly [boolean, boolean] = [false, false];
+
+  // biome-ignore lint/suspicious/noExplicitAny: store holds heterogeneous state types
+  private readonly store: Map<string, State<any, any>>;
+  public readonly use: Use;
+
+  // biome-ignore lint/suspicious/noExplicitAny: store holds heterogeneous state types
+  constructor(store: Map<string, State<any, any>>, use: Use) {
+    this.store = store;
+    this.use = use;
+  }
+
+  public reset() {
+    this.ustack = [];
+    this.rstack = [];
+    this.transaction = 0;
+    this.backup.clear();
+    this.triggerStackState();
+  }
+
+  public readonly listenStackState = (cb: () => void) => {
+    this.stackListeners.add(cb);
+    return () => this.stackListeners.delete(cb);
+  };
+
+  private triggerStackState() {
+    // @ts-expect-error
+    this.stackState = [this.canUndo(), this.canRedo()];
+    for (const call of this.stackListeners) {
+      call();
+    }
+  }
+
+  private getStackState = () => this.stackState;
+  public useStackState() {
+    const get = this.getStackState;
+    return useSyncExternalStore(this.listenStackState, get, get);
+  }
+
+  public log() {
+    // biome-ignore lint/suspicious/noExplicitAny: debug log accumulates heterogeneous values
+    const snapshots: Record<string, any> = {};
+    for (const [name, state] of this.store) {
+      snapshots[name] = state.get();
+    }
+  }
+
+  public undo() {
+    const { ustack, rstack, store } = this;
+    const item = ustack.pop();
+    if (!item) return;
+    const step: Snapshot[] = [];
+    for (const [name, data] of item) {
+      const entry = store.get(name);
+      if (!entry) continue;
+      const { get, set } = entry;
+      step.push([name, get()]);
+      set(data);
+    }
+    rstack.push(step);
+    this.triggerStackState();
+  }
+
+  public redo() {
+    const { ustack, rstack, store } = this;
+    const item = rstack.pop();
+    if (!item) return;
+    const step: Snapshot[] = [];
+    for (const [name, data] of item) {
+      const entry = store.get(name);
+      if (!entry) continue;
+      const { get, set } = entry;
+      step.push([name, get()]);
+      set(data);
+    }
+    ustack.push(step);
+    this.triggerStackState();
+  }
+
+  public canUndo() {
+    return this.ustack.length > 0;
+  }
+
+  public canRedo() {
+    return this.rstack.length > 0;
+  }
+
+  public startTransaction() {
+    if (this.transaction === 0) {
+      this.backup.clear();
+      for (const [name, state] of this.store) {
+        if (state.onlyView) continue;
+        this.backup.set(name, state.get());
+      }
+    }
+    this.transaction += 1;
+    return this.endTransaction;
+  }
+
+  public endTransaction = () => {
+    if (this.transaction === 0) return;
+    this.transaction -= 1;
+    if (this.transaction === 0) {
+      const changes: Snapshot[] = [];
+      for (const [name, state] of this.store) {
+        if (state.onlyView) continue;
+        const before = this.backup.get(name);
+        if (Object.is(before, state.get())) continue;
+        changes.push([name, before]);
+      }
+      this.backup.clear();
+      if (changes.length === 0) return;
+      this.ustack.push(changes);
+      this.rstack.length = 0;
+      this.triggerStackState();
+    }
+  };
+}
+
+function build() {
+  // biome-ignore lint/suspicious/noExplicitAny: store holds heterogeneous state types
+  const store = new Map<string, State<any, any>>();
+
+  // biome-ignore lint/suspicious/noExplicitAny: memo cache stores heterogeneous values
+  const mem: Record<string, any> = {};
+  function use<T, A>(m: SubModel<T, A>): [T, A] {
+    const state = query(m);
+    return [state.get(), state.actions];
+  }
+
+  const model = new Model(store, use);
+  if (process.env.NODE_ENV === "development") {
+    // @ts-expect-error
+    window.__md__ = model;
+  }
+
+  function query<T, A>(m: SubModel<T, A>): State<T, A> {
+    const exist = store.get(m.name);
+    if (exist) return exist as State<T, A>;
+    const created = m.gen(model);
+    store.set(m.name, created);
+    return created;
+  }
+
+  return { query, model, mem, use };
+}
+
+const Context = createContext(build());
+
+export function useModel() {
+  return useContext(Context).model;
+}
+
+export function RegisterFlowToContext() {
+  const { model } = useContext(Context);
+  const instance = useReactFlow<AnyWorkNode>();
+  useLayoutEffect(() => {
+    model.flow = instance;
+  }, [instance, model]);
+  return null;
+}
+
+export const ModelProvider: FC<PropsWithChildren> = (p) => (
+  <Context.Provider value={useMemo(build, [])}>{p.children}</Context.Provider>
+);
+
+function defineModel<T, A>(name: string, make: () => T, create: Create<T, A>) {
+  return new SubModel<T, A>(name, make, create);
+}
+
+// biome-ignore lint/suspicious/noExplicitAny: default create returns setter directly
+const defaultCreate: Create<any, Setter<any>> = (set) => set;
+function defineView<T, A>(name: string, make: () => T, create: Create<T, A>): SubModel<T, A>;
+function defineView<T>(name: string, make: () => T): SubModel<T, Setter<T>>;
+function defineView<T>(
+  name: string,
+  make: () => T,
+  create?: Create<T, unknown>,
+): SubModel<T, unknown> {
+  // biome-ignore lint/suspicious/noExplicitAny: wraps into SubModel with erased action type
+  return new SubModel<T, any>(name, make, create ?? defaultCreate, true);
+}
+
+function memoize<T>(init: (use: Use, model: Model) => T) {
+  const id = uuid();
+  return {
+    use(): T {
+      // biome-ignore lint/correctness/useHookAtTopLevel: use() is called as a hook by consumers
+      const { mem, model, use } = useContext(Context);
+      if (!mem[id]) {
+        mem[id] = init(use, model);
+      }
+      return mem[id] as T;
+    },
+  };
+}
+
+function compute<T>(calc: (use: UseV) => T) {
+  const id = uuid();
+  return {
+    use(): T {
+      // biome-ignore lint/correctness/useHookAtTopLevel: use() is called as a hook by consumers
+      const { mem, query } = useContext(Context);
+      let state: ReturnType<typeof generate<T>> = mem[id];
+      if (state) return state.use();
+
+      // biome-ignore lint/suspicious/noExplicitAny: deps collect heterogeneous SubModels
+      const deps = new Set<SubModel<any, any>>();
+      // biome-ignore lint/suspicious/noExplicitAny: useV erases action type
+      let usev = (m: SubModel<any, any>) => {
+        deps.add(m);
+        return query(m).get();
+      };
+      mem[id] = state = generate<T>(calc(usev));
+      if (deps.size) {
+        usev = (m) => query(m).get();
+        const update = () => state.set(calc(usev));
+        for (const m of deps) {
+          query(m).listen(update);
+        }
+      }
+      return state.use();
+    },
+  };
+}
+
+export const define = {
+  model: defineModel,
+  view: defineView,
+  memoize,
+  compute,
+};
@@ -0,0 +1,276 @@
+import {
+  type Edge,
+  EdgeLabelRenderer,
+  type EdgeProps,
+  getSmoothStepPath,
+  useReactFlow,
+} from "@xyflow/react";
+import { Check } from "lucide-react";
+import { type ReactNode, useEffect, useMemo, useRef, useState } from "react";
+import { cn } from "../../lib/utils.ts";
+import { useModel } from "../context.tsx";
+import type { ConditionalEdge as ConditionalEdgeType } from "../type.ts";
+
+const SOURCE_COLOR = "#10b981";
+const TARGET_COLOR = "#3b82f6";
+const LACK_COLOR = "#ff5252";
+const RADIUS = 12;
+
+function GradientPath({
+  id,
+  path,
+  sourceX,
+  sourceY,
+  targetX,
+  targetY,
+  hasCondition,
+  selected,
+}: {
+  id: string;
+  path: string;
+  sourceX: number;
+  sourceY: number;
+  targetX: number;
+  targetY: number;
+  hasCondition: boolean | null;
+  selected: boolean;
+}) {
+  const gradientId = `gradient-${id}`;
+  const showLack = hasCondition === false;
+  const strokeStyle = selected
+    ? { stroke: "#f59e0b", strokeWidth: 2 }
+    : { stroke: `url(#${gradientId})`, strokeWidth: 1.5 };
+
+  return (
+    <>
+      <defs>
+        <linearGradient
+          id={gradientId}
+          gradientUnits="userSpaceOnUse"
+          x1={sourceX}
+          y1={sourceY}
+          x2={targetX}
+          y2={targetY}
+        >
+          <stop offset="0%" stopColor={showLack ? LACK_COLOR : SOURCE_COLOR} />
+          <stop offset="100%" stopColor={showLack ? LACK_COLOR : TARGET_COLOR} />
+        </linearGradient>
+      </defs>
+      <path
+        d={path}
+        fill="none"
+        stroke="transparent"
+        strokeWidth={20}
+        className="react-flow__edge-interaction"
+      />
+      <path id={id} d={path} fill="none" className="react-flow__edge-path" style={strokeStyle} />
+    </>
+  );
+}
+
+function ElseBadge({ labelX, labelY }: { labelX: number; labelY: number }): ReactNode {
+  return (
+    <div
+      className="absolute pointer-events-none"
+      style={{
+        transform: `translate(-50%, -50%) translate(${labelX}px,${labelY}px)`,
+      }}
+    >
+      <span className="inline-block px-1 bg-white rounded text-[10px] border border-gray-300 text-gray-500">
+        else
+      </span>
+    </div>
+  );
+}
+
+type ConditionLabelProps = {
+  condition: string | undefined;
+  labelX: number;
+  labelY: number;
+  onSave: (value: string) => void;
+};
+
+function ConditionLabel({ condition, labelX, labelY, onSave }: ConditionLabelProps): ReactNode {
+  const [isOpen, setIsOpen] = useState(false);
+  const [inputValue, setInputValue] = useState("");
+  const containerRef = useRef<HTMLDivElement>(null);
+
+  function handleBadgeClick() {
+    setInputValue(condition || "");
+    setIsOpen(true);
+  }
+
+  function handleSave() {
+    if (inputValue.trim()) {
+      onSave(inputValue.trim());
+    }
+    setIsOpen(false);
+  }
+
+  function handleKeyDown(e: React.KeyboardEvent) {
+    if (e.key === "Enter") {
+      handleSave();
+    }
+    if (e.key === "Escape") {
+      setIsOpen(false);
+    }
+  }
+
+  useEffect(() => {
+    if (!isOpen) return;
+    function handleClickOutside(e: PointerEvent) {
+      if (containerRef.current && !containerRef.current.contains(e.target as Node)) {
+        setIsOpen(false);
+      }
+    }
+    document.addEventListener("pointerdown", handleClickOutside, true);
+    return () => document.removeEventListener("pointerdown", handleClickOutside, true);
+  }, [isOpen]);
+
+  return (
+    <div
+      ref={containerRef}
+      className="absolute pointer-events-auto"
+      style={{
+        transform: `translate(-50%, -50%) translate(${labelX}px,${labelY}px)`,
+        zIndex: isOpen ? 1000 : undefined,
+      }}
+      onPointerDown={(e) => e.stopPropagation()}
+    >
+      {/* biome-ignore lint/a11y/noStaticElementInteractions: click handler on badge label */}
+      <div onClick={handleBadgeClick} onKeyDown={undefined} className="cursor-pointer">
+        <span
+          className={cn(
+            "inline-block px-1 bg-white rounded text-[10px]",
+            condition ? "border border-gray-300 text-black" : "border border-dashed text-red-500",
+          )}
+          style={condition ? undefined : { borderColor: LACK_COLOR }}
+        >
+          if
+        </span>
+      </div>
+      {isOpen && (
+        <div className="absolute left-1/2 -translate-x-1/2 top-full mt-1 z-50 bg-white rounded shadow-lg border border-gray-200 p-1">
+          <div className="flex items-center gap-0.5">
+            <input
+              type="text"
+              className="w-32 rounded border border-gray-300 px-1 py-0.5 text-[10px] focus:border-blue-500 focus:outline-none"
+              placeholder="输入条件"
+              value={inputValue}
+              onChange={(e) => setInputValue(e.target.value)}
+              onKeyDown={handleKeyDown}
+            />
+            <button
+              type="button"
+              onClick={handleSave}
+              className="p-0.5 text-blue-600 hover:bg-blue-50 rounded"
+            >
+              <Check size={10} />
+            </button>
+          </div>
+        </div>
+      )}
+    </div>
+  );
+}
+
+export function isElseEdge(edgeId: string, source: string, allEdges: Edge[]): boolean {
+  const siblings = allEdges.filter((e) => e.source === source && e.type === "conditional");
+  return siblings.length >= 2 && siblings[0].id === edgeId;
+}
+
+export function ConditionalEdge({
+  id,
+  source,
+  sourceX,
+  sourceY,
+  targetX,
+  targetY,
+  sourcePosition,
+  targetPosition,
+  selected,
+  data,
+}: EdgeProps<ConditionalEdgeType>): ReactNode {
+  const [edgePath, labelX, labelY] = getSmoothStepPath({
+    sourceX,
+    sourceY,
+    targetX,
+    targetY,
+    sourcePosition,
+    targetPosition,
+    borderRadius: RADIUS,
+  });
+  const flow = useReactFlow();
+  const model = useModel();
+
+  const allEdges = flow.getEdges();
+  const isElse = useMemo(() => isElseEdge(id, source, allEdges), [id, source, allEdges]);
+
+  const condition = data?.condition;
+  function handleSave(value: string) {
+    model.startTransaction();
+    flow.updateEdgeData(id, { condition: value });
+    requestAnimationFrame(model.endTransaction);
+  }
+
+  return (
+    <>
+      <GradientPath
+        id={id}
+        path={edgePath}
+        sourceX={sourceX}
+        sourceY={sourceY}
+        targetX={targetX}
+        targetY={targetY}
+        hasCondition={isElse ? null : !!condition}
+        selected={!!selected}
+      />
+      <EdgeLabelRenderer>
+        {isElse ? (
+          <ElseBadge labelX={labelX} labelY={labelY} />
+        ) : (
+          <ConditionLabel
+            condition={condition}
+            labelX={labelX}
+            labelY={labelY}
+            onSave={handleSave}
+          />
+        )}
+      </EdgeLabelRenderer>
+    </>
+  );
+}
+
+export function GradientEdge({
+  id,
+  sourceX,
+  sourceY,
+  targetX,
+  targetY,
+  sourcePosition,
+  targetPosition,
+  selected,
+}: EdgeProps<Edge>): ReactNode {
+  const [edgePath] = getSmoothStepPath({
+    sourceX,
+    sourceY,
+    targetX,
+    targetY,
+    sourcePosition,
+    targetPosition,
+    borderRadius: RADIUS,
+  });
+
+  return (
+    <GradientPath
+      id={id}
+      path={edgePath}
+      sourceX={sourceX}
+      sourceY={sourceY}
+      targetX={targetX}
+      targetY={targetY}
+      hasCondition={null}
+      selected={!!selected}
+    />
+  );
+}
@@ -0,0 +1,6 @@
+import { ConditionalEdge, GradientEdge } from "./conditional";
+
+export const edgeTypes = {
+  conditional: ConditionalEdge,
+  default: GradientEdge,
+};
@@ -0,0 +1,90 @@
+import { Background, Controls, type Edge, ReactFlow, ReactFlowProvider } from "@xyflow/react";
+import { createContext, createElement, memo, useContext, useEffect, useLayoutEffect } from "react";
+// @ts-expect-error
+import "@xyflow/react/dist/style.css";
+import { ModelProvider, RegisterFlowToContext } from "./context";
+import { edgeTypes } from "./edges";
+import { FlowModel, InternalField } from "./injection";
+import { edgesModel, handlers, injection, nodesModel } from "./model";
+import { nodeTypes } from "./nodes";
+import { Dialogs, TopCenterPanel } from "./panel";
+import type { AnyWorkNode } from "./type";
+
+export * from "./trans/type";
+
+const proOptions = { hideAttribution: true };
+
+const ReadonlyContext = createContext(false);
+export const useReadonly = () => useContext(ReadonlyContext);
+
+function Flow() {
+  const [nodes, { onNodesChange }] = nodesModel.use();
+  const [edges, { onEdgesChange, onConnect }] = edgesModel.use();
+  const { onNodeDragStart, onNodeDragStop, onConnectEnd, onBeforeDelete, onDelete, handleKeyDown } =
+    handlers.use();
+  const readonly = useReadonly();
+
+  return (
+    // biome-ignore lint/a11y/noStaticElementInteractions: keyboard handler for flow shortcuts
+    <div style={{ height: "100%" }} onKeyDown={readonly ? undefined : handleKeyDown}>
+      <ReactFlowProvider>
+        <ReactFlow<AnyWorkNode, Edge>
+          nodes={nodes}
+          edges={edges}
+          onNodesChange={readonly ? undefined : onNodesChange}
+          onEdgesChange={readonly ? undefined : onEdgesChange}
+          onConnect={readonly ? undefined : onConnect}
+          fitView
+          proOptions={proOptions}
+          onNodeDragStart={readonly ? undefined : onNodeDragStart}
+          onNodeDragStop={readonly ? undefined : onNodeDragStop}
+          onConnectEnd={readonly ? undefined : onConnectEnd}
+          onBeforeDelete={readonly ? undefined : onBeforeDelete}
+          onDelete={readonly ? undefined : onDelete}
+          nodeTypes={nodeTypes}
+          edgeTypes={edgeTypes}
+          nodesDraggable={!readonly}
+          nodesConnectable={!readonly}
+          elementsSelectable={!readonly}
+        >
+          <RegisterFlowToContext />
+          <Background />
+          <Controls />
+          {!readonly && <TopCenterPanel />}
+          {!readonly && <Dialogs />}
+        </ReactFlow>
+      </ReactFlowProvider>
+    </div>
+  );
+}
+
+const MemoFlow = memo(Flow);
+
+interface Props {
+  model: FlowModel;
+  readonly?: boolean;
+}
+
+function Connect({ model }: { model: FlowModel }) {
+  const { loadSteps } = handlers.use();
+  const inject = injection.useCreation();
+  const instance = model[InternalField];
+
+  useLayoutEffect(() => {
+    return inject(instance);
+  }, [instance, inject]);
+
+  useEffect(() => {
+    return instance.on("load", loadSteps);
+  }, [instance, loadSteps]);
+
+  return <MemoFlow />;
+}
+
+export { FlowModel };
+// biome-ignore lint/style/noDefaultExport: FlowEditor is the main public component
+export default ({ model, readonly = false }: Props) => (
+  <ReadonlyContext.Provider value={readonly}>
+    <ModelProvider>{createElement(Connect, { model })}</ModelProvider>
+  </ReadonlyContext.Provider>
+);
@@ -0,0 +1,48 @@
+import type { WorkFlowSteps } from "./trans";
+import { Eventer } from "./utils/eventer";
+
+interface PublicEvents {
+  save: WorkFlowSteps;
+}
+
+interface PrivateEvents {
+  load: WorkFlowSteps;
+}
+
+export const InternalField = Symbol("InternalField");
+
+export class Injection extends Eventer<PrivateEvents> {
+  public readonly emitPublic: Eventer<PublicEvents>["emit"];
+  private inital_steps: WorkFlowSteps | undefined;
+
+  constructor(emitPublic: Eventer<PublicEvents>["emit"], inital_steps?: WorkFlowSteps) {
+    super();
+    this.emitPublic = emitPublic;
+    this.inital_steps = inital_steps;
+  }
+
+  public on: Eventer<PrivateEvents>["on"] = (type, lisenter) => {
+    const off = super.on(type, lisenter);
+    if (type === "load" && this.inital_steps) {
+      lisenter(this.inital_steps);
+      this.inital_steps = undefined;
+    }
+    return off;
+  };
+}
+
+export class FlowModel {
+  private readonly eventer = new Eventer<PublicEvents>();
+  public on = this.eventer.on.bind(this.eventer);
+  public off = this.eventer.off.bind(this.eventer);
+
+  public readonly [InternalField]: Injection;
+
+  constructor(inital_steps?: WorkFlowSteps) {
+    this[InternalField] = new Injection(this.eventer.emit.bind(this.eventer), inital_steps);
+  }
+
+  public load(steps: WorkFlowSteps) {
+    this[InternalField].emit("load", steps);
+  }
+}
@@ -0,0 +1,239 @@
+import type { Edge, Node } from "@xyflow/react";
+
+const DEFAULT_NODE_WIDTH = 120;
+const DEFAULT_NODE_HEIGHT = 50;
+const HORIZONTAL_GAP = 80; // 层与层之间的水平间距
+const VERTICAL_GAP = 40; // 同层节点之间的垂直间距
+
+/**
+ * 获取节点的尺寸
+ */
+function getNodeSize(node: Node): { width: number; height: number } {
+  return {
+    width: node.measured?.width ?? DEFAULT_NODE_WIDTH,
+    height: node.measured?.height ?? DEFAULT_NODE_HEIGHT,
+  };
+}
+
+/**
+ * 构建邻接表（出边）和入度表
+ */
+function buildGraph(nodes: Node[], edges: Edge[]) {
+  const nodeIds = new Set(nodes.map((n) => n.id));
+  const outgoing = new Map<string, string[]>(); // nodeId -> [targetIds]
+  const incoming = new Map<string, string[]>(); // nodeId -> [sourceIds]
+  const inDegree = new Map<string, number>();
+
+  // 初始化
+  for (const node of nodes) {
+    outgoing.set(node.id, []);
+    incoming.set(node.id, []);
+    inDegree.set(node.id, 0);
+  }
+
+  // 构建图
+  for (const edge of edges) {
+    if (nodeIds.has(edge.source) && nodeIds.has(edge.target)) {
+      outgoing.get(edge.source)?.push(edge.target);
+      incoming.get(edge.target)?.push(edge.source);
+      inDegree.set(edge.target, (inDegree.get(edge.target) ?? 0) + 1);
+    }
+  }
+
+  return { outgoing, incoming, inDegree };
+}
+
+/**
+ * 使用拓扑排序将节点分层
+ * - 'start' 节点固定在第 0 层
+ * - 'end' 节点固定在最后一层
+ * - 孤立节点放在中间层
+ */
+function assignLayers(nodes: Node[], edges: Edge[]): Map<string, number> {
+  const { outgoing, inDegree } = buildGraph(nodes, edges);
+  const layers = new Map<string, number>();
+  const queue: string[] = [];
+
+  // 1. start 节点固定在第 0 层
+  layers.set("start", 0);
+  queue.push("start");
+
+  // 2. BFS 分层（排除 end 节点，稍后单独处理）
+  while (queue.length > 0) {
+    const current = queue.shift() ?? "";
+    const currentLayer = layers.get(current) ?? 0;
+
+    for (const target of outgoing.get(current) ?? []) {
+      // 跳过 end 节点，稍后处理
+      if (target === "end") continue;
+
+      const newLayer = currentLayer + 1;
+      const existingLayer = layers.get(target);
+
+      if (existingLayer === undefined) {
+        layers.set(target, newLayer);
+        inDegree.set(target, (inDegree.get(target) ?? 1) - 1);
+        if (inDegree.get(target) === 0) {
+          queue.push(target);
+        }
+      } else {
+        // 如果已有层级，取更大的值（确保所有前驱都在前面）
+        layers.set(target, Math.max(existingLayer, newLayer));
+      }
+    }
+  }
+
+  // 3. 找到当前最大层级
+  let maxLayer = 0;
+  for (const layer of layers.values()) {
+    maxLayer = Math.max(maxLayer, layer);
+  }
+
+  // 4. 处理孤立节点（没有被分配层级的非 start/end 节点）
+  // 把它们放在中间层
+  const middleLayer = Math.max(1, Math.floor((maxLayer + 1) / 2));
+  for (const node of nodes) {
+    if (node.id !== "start" && node.id !== "end" && !layers.has(node.id)) {
+      layers.set(node.id, middleLayer);
+    }
+  }
+
+  // 5. 重新计算最大层级（可能因为孤立节点而变化）
+  maxLayer = 0;
+  for (const [id, layer] of layers) {
+    if (id !== "end") {
+      maxLayer = Math.max(maxLayer, layer);
+    }
+  }
+
+  // 6. end 节点固定在最后一层
+  layers.set("end", maxLayer + 1);
+
+  return layers;
+}
+
+/**
+ * 按层级分组节点
+ */
+function groupByLayer<N extends Node>(nodes: N[], layers: Map<string, number>): Map<number, N[]> {
+  const groups = new Map<number, N[]>();
+
+  for (const node of nodes) {
+    const layer = layers.get(node.id) ?? 0;
+    if (!groups.has(layer)) {
+      groups.set(layer, []);
+    }
+    groups.get(layer)?.push(node);
+  }
+
+  return groups;
+}
+
+/**
+ * 计算每层的最大宽度
+ */
+function calculateLayerWidths(layerGroups: Map<number, Node[]>): Map<number, number> {
+  const widths = new Map<number, number>();
+
+  for (const [layer, nodesInLayer] of layerGroups) {
+    let maxWidth = 0;
+    for (const node of nodesInLayer) {
+      const { width } = getNodeSize(node);
+      maxWidth = Math.max(maxWidth, width);
+    }
+    widths.set(layer, maxWidth);
+  }
+
+  return widths;
+}
+
+/**
+ * 计算每层的 X 起始位置
+ */
+function calculateLayerXPositions(
+  layerWidths: Map<number, number>,
+  maxLayer: number,
+): Map<number, number> {
+  const xPositions = new Map<number, number>();
+  let currentX = 0;
+
+  for (let layer = 0; layer <= maxLayer; layer++) {
+    xPositions.set(layer, currentX);
+    const layerWidth = layerWidths.get(layer) ?? DEFAULT_NODE_WIDTH;
+    currentX += layerWidth + HORIZONTAL_GAP;
+  }
+
+  return xPositions;
+}
+
+/**
+ * Todo: 1-N 情况下的布局优化
+ * Todo: 如果计算完了之后，所有节点的位置都没变，则不更新节点，避免不必要的重渲染
+ * node 中有 measured 属性，可以获得其尺寸，如果没有，则使用一个默认尺寸 120*50
+ * edge 的 source 和 target 分别对应两端的 node 的 id
+ *
+ * 算法步骤：
+ * 1. 使用拓扑排序将节点分层（从左到右）
+ * 2. 计算每层的 X 位置
+ * 3. 在每层内垂直居中排列节点
+ */
+export function LayoutLR<N extends Node>(nodes: N[], edges: Edge[]): N[] {
+  if (nodes.length === 0) {
+    return [];
+  }
+
+  // 1. 分配层级
+  const layers = assignLayers(nodes, edges);
+
+  // 2. 按层级分组
+  const layerGroups = groupByLayer(nodes, layers);
+
+  // 3. 计算每层宽度和 X 位置
+  const maxLayer = Math.max(...layers.values());
+  const layerWidths = calculateLayerWidths(layerGroups);
+  const layerXPositions = calculateLayerXPositions(layerWidths, maxLayer);
+
+  // 4. 计算每层的总高度，用于垂直居中
+  const layerHeights = new Map<number, number>();
+  for (const [layer, nodesInLayer] of layerGroups) {
+    let totalHeight = 0;
+    for (const node of nodesInLayer) {
+      const { height } = getNodeSize(node);
+      totalHeight += height;
+    }
+    totalHeight += (nodesInLayer.length - 1) * VERTICAL_GAP;
+    layerHeights.set(layer, totalHeight);
+  }
+
+  // 找到最大高度，用于垂直居中对齐
+  const maxHeight = Math.max(...layerHeights.values());
+
+  // 5. 为每个节点分配位置，并检查是否有变化
+  const layoutedNodes: N[] = [];
+  let hasChanged = false;
+
+  for (const [layer, nodesInLayer] of layerGroups) {
+    const layerHeight = layerHeights.get(layer) ?? 0;
+    const startY = (maxHeight - layerHeight) / 2; // 垂直居中
+    const x = layerXPositions.get(layer) ?? 0;
+
+    let currentY = startY;
+
+    for (const node of nodesInLayer) {
+      const { height } = getNodeSize(node);
+      const newPosition = { x, y: currentY };
+      if (node.position.x !== newPosition.x || node.position.y !== newPosition.y) {
+        hasChanged = true;
+        layoutedNodes.push({
+          ...node,
+          position: newPosition,
+        });
+      } else {
+        layoutedNodes.push(node);
+      }
+      currentY += height + VERTICAL_GAP;
+    }
+  }
+
+  return hasChanged ? layoutedNodes : nodes;
+}
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`export { generateCliReference as cmdSkillCli } from "@uncaged/workflow-util";`