fix: cancelled threads show distinct status instead of completed

Fixes #522
Merge pull request 'feat(skill): expand uwf skill with architecture, yaml, moderator, list subcommands' (#521 ) from fix/517-expand-skill into main
2026-05-25 15:39:59 +00:00 · 2026-05-25 15:00:34 +00:00 · 2026-05-25 22:59:38 +08:00 · 2026-05-25 22:47:00 +08:00 · 2026-05-25 22:44:32 +08:00 · 2026-05-25 22:42:05 +08:00
22 changed files with 645 additions and 242 deletions
@@ -12,4 +12,4 @@ packages/workflow-template-develop/develop.esm.js
 .DS_Store
 *.py
 .claude
-tmp
+tmp.worktrees/
@@ -8,7 +8,7 @@
  ],
  "type": "module",
  "bin": {
-    "uwf": "./src/cli.ts"
+    "uwf": "./dist/cli.js"
  },
  "dependencies": {
    "@uncaged/json-cas": "^0.5.3",
@@ -40,6 +40,7 @@ describe("resolveHeadHash", () => {
      workflow: workflowHash,
      head: headHash,
      completedAt: Date.now(),
+      reason: null,
    });

    const result = await resolveHeadHash(tmpDir, threadId);
@@ -64,6 +65,7 @@ describe("resolveHeadHash", () => {
      workflow: workflowHash,
      head: historicalHash,
      completedAt: Date.now(),
+      reason: null,
    });

    const result = await resolveHeadHash(tmpDir, threadId);
@@ -87,18 +89,21 @@ describe("resolveHeadHash", () => {
      workflow: workflowHash,
      head: hash1,
      completedAt: Date.now() - 2000,
+      reason: null,
    });
    await appendThreadHistory(tmpDir, {
      thread: threadId2,
      workflow: workflowHash,
      head: hash2,
      completedAt: Date.now() - 1000,
+      reason: null,
    });
    await appendThreadHistory(tmpDir, {
      thread: threadId3,
      workflow: workflowHash,
      head: hash3,
      completedAt: Date.now(),
+      reason: null,
    });

    const result = await resolveHeadHash(tmpDir, threadId2);
@@ -0,0 +1,78 @@
+import { execFileSync } from "node:child_process";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
+import { describe, expect, test } from "vitest";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+import {
+  cmdSkillArchitecture,
+  cmdSkillCli,
+  cmdSkillList,
+  cmdSkillModerator,
+  cmdSkillYaml,
+} from "../commands/skill.js";
+
+describe("skill commands", () => {
+  test("skill list returns all skill names", () => {
+    const result = cmdSkillList();
+    expect(result).toBeInstanceOf(Array);
+    expect(result).toContain("cli");
+    expect(result).toContain("architecture");
+    expect(result).toContain("yaml");
+    expect(result).toContain("moderator");
+    for (const name of result) {
+      expect(typeof name).toBe("string");
+      expect(name).toMatch(/^\S+$/);
+    }
+  });
+
+  test("skill architecture returns non-empty markdown string", () => {
+    const result = cmdSkillArchitecture();
+    expect(typeof result).toBe("string");
+    expect(result).toContain("CAS");
+    expect(result).toContain("Thread");
+    expect(result).toContain("Workflow");
+    expect(result).toContain("Step");
+    expect(result.length).toBeGreaterThan(200);
+  });
+
+  test("skill yaml returns non-empty markdown string", () => {
+    const result = cmdSkillYaml();
+    expect(typeof result).toBe("string");
+    expect(result).toContain("roles");
+    expect(result).toContain("graph");
+    expect(result).toContain("frontmatter");
+    expect(result.length).toBeGreaterThan(200);
+  });
+
+  test("skill moderator returns non-empty markdown string", () => {
+    const result = cmdSkillModerator();
+    expect(typeof result).toBe("string");
+    expect(result).toContain("routing");
+    expect(result).toContain("status");
+    expect(result.length).toBeGreaterThan(200);
+    // Check for edge or graph
+    expect(result).toMatch(/edge|graph/i);
+  });
+
+  test("skill cli returns CLI reference markdown", () => {
+    const result = cmdSkillCli();
+    expect(typeof result).toBe("string");
+    expect(result).toContain("uwf");
+  });
+
+  test("skill help subcommand is suppressed", () => {
+    const output = execFileSync("bun", ["src/cli.ts", "skill", "--help"], {
+      cwd: join(__dirname, "..", ".."),
+      encoding: "utf-8",
+      env: { ...process.env, PATH: `/opt/homebrew/bin:${process.env.PATH}` },
+    });
+    expect(output).not.toMatch(/help\s+\[command\]/i);
+    expect(output).toContain("cli");
+    expect(output).toContain("architecture");
+    expect(output).toContain("yaml");
+    expect(output).toContain("moderator");
+    expect(output).toContain("list");
+  });
+});
@@ -453,7 +453,78 @@ describe("step read", () => {
    expect(markdown).not.toContain("## Turn");
  });

-  test("test 6: turn content with special characters", async () => {
+  test("test 6: displays role and tool calls in turn body", async () => {
+    const casDir = join(tmpDir, "cas");
+    await mkdir(casDir, { recursive: true });
+    const store = createFsStore(casDir);
+    const schemas = await registerUwfSchemas(store);
+    const detailSchemas = await registerDetailSchemas(store);
+
+    const workflowHash = await store.put(schemas.workflow, {
+      name: "test-wf",
+      description: "desc",
+      roles: {
+        worker: {
+          description: "Worker",
+          goal: "You are a worker agent.",
+          capabilities: [],
+          procedure: "Do the work.",
+          output: "Summarize the work.",
+          meta: "placeholder00" as CasRef,
+        },
+      },
+      conditions: {},
+      graph: {},
+    });
+
+    const startHash = await store.put(schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Test task",
+    });
+
+    const outputHash = await store.put(schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    const turnHash = await store.put(detailSchemas.turn, {
+      index: 0,
+      role: "assistant",
+      content: "",
+      toolCalls: [{ name: "terminal", args: '{"command":"echo hi"}' }],
+      reasoning: null,
+    });
+
+    const detailHash = await store.put(detailSchemas.detail, {
+      sessionId: "session-1",
+      model: "test-model",
+      duration: 1000,
+      turnCount: 1,
+      turns: [turnHash],
+    });
+
+    const stepHash = await store.put(schemas.stepNode, {
+      start: startHash,
+      prev: null,
+      role: "worker",
+      output: outputHash,
+      detail: detailHash,
+      agent: "uwf-hermes",
+      startedAtMs: 1000000000000,
+      completedAtMs: 1000000005000,
+    });
+
+    const markdown = await cmdStepRead(tmpDir, stepHash, 4000);
+
+    expect(markdown).toContain("**Turn role:** assistant");
+    expect(markdown).toContain("**terminal**");
+    expect(markdown).toContain('{"command":"echo hi"}');
+  });
+
+  test("test 7: turn content with special characters", async () => {
    const casDir = join(tmpDir, "cas");
    await mkdir(casDir, { recursive: true });
    const store = createFsStore(casDir);
@@ -0,0 +1,85 @@
+import { mkdtemp } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
+import { describe, expect, test } from "vitest";
+import { appendThreadHistory, loadThreadHistory } from "../store.js";
+
+describe("thread cancel status", () => {
+  test("cancelled history entry has reason 'cancelled'", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
+    const threadId = "01JTEST000000000000CANCEL1" as ThreadId;
+
+    await appendThreadHistory(tmpDir, {
+      thread: threadId,
+      workflow: "test-workflow",
+      head: "test-head-hash" as CasRef,
+      completedAt: Date.now(),
+      reason: "cancelled",
+    });
+
+    const history = await loadThreadHistory(tmpDir);
+    expect(history).toHaveLength(1);
+    expect(history[0]?.reason).toBe("cancelled");
+  });
+
+  test("completed history entry has reason 'completed'", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
+    const threadId = "01JTEST000000000000CANCEL2" as ThreadId;
+
+    await appendThreadHistory(tmpDir, {
+      thread: threadId,
+      workflow: "test-workflow",
+      head: "test-head-hash" as CasRef,
+      completedAt: Date.now(),
+      reason: "completed",
+    });
+
+    const history = await loadThreadHistory(tmpDir);
+    expect(history).toHaveLength(1);
+    expect(history[0]?.reason).toBe("completed");
+  });
+
+  test("legacy history entry without reason parses as null", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
+    const threadId = "01JTEST000000000000CANCEL3" as ThreadId;
+
+    // Simulate legacy entry without reason field
+    await appendThreadHistory(tmpDir, {
+      thread: threadId,
+      workflow: "test-workflow",
+      head: "test-head-hash" as CasRef,
+      completedAt: Date.now(),
+      reason: null,
+    });
+
+    const history = await loadThreadHistory(tmpDir);
+    expect(history).toHaveLength(1);
+    expect(history[0]?.reason).toBeNull();
+  });
+
+  test("mixed completed and cancelled entries preserve distinct reasons", async () => {
+    const tmpDir = await mkdtemp(join(tmpdir(), "uwf-cancel-test-"));
+
+    await appendThreadHistory(tmpDir, {
+      thread: "01JTEST000000000000CANCEL4" as ThreadId,
+      workflow: "test-workflow",
+      head: "head1" as CasRef,
+      completedAt: Date.now(),
+      reason: "completed",
+    });
+
+    await appendThreadHistory(tmpDir, {
+      thread: "01JTEST000000000000CANCEL5" as ThreadId,
+      workflow: "test-workflow",
+      head: "head2" as CasRef,
+      completedAt: Date.now(),
+      reason: "cancelled",
+    });
+
+    const history = await loadThreadHistory(tmpDir);
+    expect(history).toHaveLength(2);
+    expect(history[0]?.reason).toBe("completed");
+    expect(history[1]?.reason).toBe("cancelled");
+  });
+});
@@ -74,6 +74,7 @@ async function completeThread(
    workflow: workflowHash,
    head: headHash,
    completedAt: Date.now(),
+    reason: null,
  });
 }

@@ -758,6 +758,7 @@ describe("cmdStepList with completed threads", () => {
      workflow: workflowHash,
      head: step2Hash,
      completedAt: Date.now(),
+      reason: null,
    });

    const result = await cmdStepList(tmpDir, threadId);
@@ -886,6 +887,7 @@ describe("cmdStepShow with completed threads", () => {
      workflow: workflowHash,
      head: stepHash,
      completedAt: Date.now(),
+      reason: null,
    });

    const result = await cmdStepShow(tmpDir, stepHash);
@@ -949,6 +951,7 @@ describe("cmdThreadRead with completed threads", () => {
      workflow: workflowHash,
      head: stepHash,
      completedAt: Date.now(),
+      reason: null,
    });

    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
@@ -1011,6 +1014,7 @@ describe("cmdThreadRead with completed threads", () => {
      workflow: workflowHash,
      head: step3Hash,
      completedAt: Date.now(),
+      reason: null,
    });

    const markdown = await cmdThreadRead(
@@ -1,4 +1,4 @@
-#!/usr/bin/env bun
+#!/usr/bin/env node

 import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
 import { Command } from "commander";
@@ -15,7 +15,13 @@ import {
 } from "./commands/cas.js";
 import { cmdLogClean, cmdLogList, cmdLogShow } from "./commands/log.js";
 import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js";
-import { cmdSkillCli } from "./commands/skill.js";
+import {
+  cmdSkillArchitecture,
+  cmdSkillCli,
+  cmdSkillList,
+  cmdSkillModerator,
+  cmdSkillYaml,
+} from "./commands/skill.js";
 import { cmdStepFork, cmdStepList, cmdStepRead, cmdStepShow } from "./commands/step.js";
 import {
  cmdThreadCancel,
@@ -175,11 +181,11 @@ function parseStatusFilter(status: string | undefined): ThreadStatus[] | null {
  if (raw === "active") return ["idle", "running"];

  const parts = raw.split(",").map((s) => s.trim());
-  const validStatuses: ThreadStatus[] = ["idle", "running", "completed"];
+  const validStatuses: ThreadStatus[] = ["idle", "running", "completed", "cancelled"];
  for (const part of parts) {
    if (!validStatuses.includes(part as ThreadStatus)) {
      process.stderr.write(
-        `Invalid status: ${part}. Must be one of: idle, running, completed, active\n`,
+        `Invalid status: ${part}. Must be one of: idle, running, completed, cancelled, active\n`,
      );
      process.exit(1);
    }
@@ -232,7 +238,7 @@ thread
  .description("List threads")
  .option(
    "--status <status>",
-    "Filter by status: idle, running, completed, active (idle+running), or comma-separated values",
+    "Filter by status: idle, running, completed, cancelled, active (idle+running), or comma-separated values",
  )
  .option("--after <date>", "Filter threads created after this date (ISO or relative like '7d')")
  .option("--before <date>", "Filter threads created before this date (ISO or relative like '7d')")
@@ -473,6 +479,7 @@ For more information, see: uwf help thread list
  });

 const skill = program.command("skill").description("Built-in skill references for agents");
+skill.addHelpCommand(false);

 skill
  .command("cli")
@@ -481,6 +488,34 @@ skill
    console.log(cmdSkillCli());
  });

+skill
+  .command("architecture")
+  .description("Print the architecture reference")
+  .action(() => {
+    console.log(cmdSkillArchitecture());
+  });
+
+skill
+  .command("yaml")
+  .description("Print the workflow YAML schema reference")
+  .action(() => {
+    console.log(cmdSkillYaml());
+  });
+
+skill
+  .command("moderator")
+  .description("Print the moderator reference")
+  .action(() => {
+    console.log(cmdSkillModerator());
+  });
+
+skill
+  .command("list")
+  .description("List all available skill names")
+  .action(() => {
+    console.log(cmdSkillList().join("\n"));
+  });
+
 program
  .command("setup")
  .description("Configure provider, model, and agent")
@@ -1 +1,12 @@
-export { generateCliReference as cmdSkillCli } from "@uncaged/workflow-util";
+export {
+  generateArchitectureReference as cmdSkillArchitecture,
+  generateCliReference as cmdSkillCli,
+  generateModeratorReference as cmdSkillModerator,
+  generateYamlReference as cmdSkillYaml,
+} from "@uncaged/workflow-util";
+
+const SKILL_NAMES = ["cli", "architecture", "yaml", "moderator"] as const;
+
+export function cmdSkillList(): ReadonlyArray<string> {
+  return [...SKILL_NAMES];
+}
@@ -19,9 +19,16 @@ import {
  walkChain,
 } from "./shared.js";

+type TurnToolCall = {
+  name: string;
+  args: string;
+};
+
 type TurnData = {
  index: number;
+  role: string;
  content: string;
+  toolCalls: TurnToolCall[] | null;
 };

 /**
@@ -128,8 +135,74 @@ function loadStepDetail(store: BootstrapCapableStore, detailRef: CasRef): Record
  return detailNode.payload as Record<string, unknown>;
 }

+function parseTurnToolCalls(raw: unknown): TurnToolCall[] | null {
+  if (!Array.isArray(raw) || raw.length === 0) {
+    return null;
+  }
+  const calls: TurnToolCall[] = [];
+  for (const entry of raw) {
+    if (typeof entry !== "object" || entry === null) {
+      continue;
+    }
+    const record = entry as Record<string, unknown>;
+    const name = record.name;
+    const args = record.args;
+    if (typeof name === "string") {
+      calls.push({ name, args: typeof args === "string" ? args : "" });
+    }
+  }
+  return calls.length > 0 ? calls : null;
+}
+
+function formatTurnBody(turn: TurnData): string {
+  const parts: string[] = [];
+  parts.push(`**Turn role:** ${turn.role}`);
+
+  if (turn.toolCalls !== null) {
+    for (const call of turn.toolCalls) {
+      const argsSuffix = call.args !== "" ? ` — \`${call.args}\`` : "";
+      parts.push(`- **${call.name}**${argsSuffix}`);
+    }
+  }
+
+  if (turn.content !== "") {
+    if (parts.length > 0) {
+      parts.push("");
+    }
+    parts.push(turn.content);
+  }
+
+  return parts.join("\n");
+}
+
+function parseSingleTurn(
+  store: BootstrapCapableStore,
+  turnRef: unknown,
+  fallbackIndex: number,
+): TurnData | null {
+  if (typeof turnRef !== "string") {
+    return null;
+  }
+  const turnNode = store.get(turnRef as CasRef);
+  if (turnNode === null) {
+    return null;
+  }
+  const turn = turnNode.payload as Record<string, unknown>;
+  const content = typeof turn.content === "string" ? turn.content : "";
+  const toolCalls = parseTurnToolCalls(turn.toolCalls);
+  if (content === "" && toolCalls === null) {
+    return null;
+  }
+  return {
+    index: typeof turn.index === "number" ? turn.index : fallbackIndex,
+    role: typeof turn.role === "string" ? turn.role : "assistant",
+    content,
+    toolCalls,
+  };
+}
+
 /**
- * Load all turn nodes from CAS store and extract content
+ * Load all turn nodes from CAS store and extract display fields
 */
 function loadTurnData(store: BootstrapCapableStore, turns: unknown): TurnData[] {
  if (!Array.isArray(turns) || turns.length === 0) {
@@ -138,19 +211,9 @@ function loadTurnData(store: BootstrapCapableStore, turns: unknown): TurnData[]

  const turnData: TurnData[] = [];
  for (const turnRef of turns) {
-    if (typeof turnRef !== "string") {
-      continue;
-    }
-    const turnNode = store.get(turnRef as CasRef);
-    if (turnNode === null) {
-      continue;
-    }
-    const turn = turnNode.payload as Record<string, unknown>;
-    if (typeof turn.content === "string") {
-      turnData.push({
-        index: typeof turn.index === "number" ? turn.index : turnData.length,
-        content: turn.content,
-      });
+    const parsed = parseSingleTurn(store, turnRef, turnData.length);
+    if (parsed !== null) {
+      turnData.push(parsed);
    }
  }
  return turnData;
@@ -168,7 +231,7 @@ function selectTurnsForQuota(turnData: TurnData[], availableQuota: number): Turn
    if (turn === undefined) continue;

    const turnHeader = `## Turn ${turn.index + 1}\n\n`;
-    const turnBlock = turnHeader + turn.content;
+    const turnBlock = turnHeader + formatTurnBody(turn);
    const separatorCost = selectedTurns.length > 0 ? 2 : 0;
    const addCost = turnBlock.length + separatorCost;

@@ -213,7 +276,7 @@ function formatStepMarkdown(
    parts.push("");
    parts.push(`## Turn ${turn.index + 1}`);
    parts.push("");
-    parts.push(turn.content);
+    parts.push(formatTurnBody(turn));
  }

  return parts.join("\n");
@@ -331,7 +331,7 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
  fail(`thread not found: ${threadId}`);
 }

-export type ThreadStatus = "idle" | "running" | "completed";
+export type ThreadStatus = "idle" | "running" | "completed" | "cancelled";

 export type ThreadListItemWithStatus = ThreadListItem & {
  status: ThreadStatus;
@@ -389,7 +389,7 @@ async function collectCompletedThreads(
        thread: entry.thread,
        workflow: entry.workflow,
        head: entry.head,
-        status: "completed",
+        status: entry.reason === "cancelled" ? "cancelled" : "completed",
      });
    }
  }
@@ -444,7 +444,10 @@ export async function cmdThreadList(
  let items = await collectActiveThreads(storageRoot, uwf, index);

  // Collect completed threads (if relevant for status filter)
-  const includeCompleted = statusFilter === null || statusFilter.includes("completed");
+  const includeCompleted =
+    statusFilter === null ||
+    statusFilter.includes("completed") ||
+    statusFilter.includes("cancelled");
  if (includeCompleted) {
    const activeIds = new Set(items.map((i) => i.thread));
    const completedItems = await collectCompletedThreads(storageRoot, activeIds);
@@ -811,6 +814,7 @@ async function archiveThread(
    workflow,
    head,
    completedAt: Date.now(),
+    reason: "completed",
  });
 }

@@ -1147,6 +1151,7 @@ export async function cmdThreadCancel(
    workflow,
    head,
    completedAt: Date.now(),
+    reason: "cancelled",
  };
  await appendThreadHistory(storageRoot, historyEntry);

@@ -88,6 +88,7 @@ export function getHistoryPath(storageRoot: string): string {

 export type ThreadHistoryLine = ThreadListItem & {
  completedAt: number;
+  reason: "completed" | "cancelled" | null;
 };

 export type UwfStore = {
@@ -228,7 +229,15 @@ export async function loadThreadHistory(storageRoot: string): Promise<ThreadHist
        typeof head === "string" &&
        typeof completedAt === "number"
      ) {
-        lines.push({ thread: thread as ThreadId, workflow, head, completedAt });
+        const reason = rec.reason;
+        const parsedReason = reason === "completed" || reason === "cancelled" ? reason : null;
+        lines.push({
+          thread: thread as ThreadId,
+          workflow,
+          head,
+          completedAt,
+          reason: parsedReason,
+        });
      }
    }
    return lines;
@@ -18,6 +18,15 @@ bun add -g @uncaged/workflow-agent-hermes

 Requires the `hermes` CLI on `PATH`.

+Hermes must write session JSON snapshots so `uwf-hermes` can load structured tool calls from disk. Add this to `~/.hermes/config.yaml`:
+
+```yaml
+sessions:
+  write_json_snapshots: true
+```
+
+Session files are stored at `~/.hermes/sessions/session_{sessionId}.json`.
+
 ## CLI Usage

 Invoked by `uwf thread step` (not typically run directly):
@@ -2,7 +2,7 @@ import { afterEach, beforeEach, describe, expect, it } from "bun:test";

 import { HermesAcpClient } from "../src/acp-client.js";

-describe("handleSessionUpdate — helper extraction", () => {
+describe("handleSessionUpdate — text extraction", () => {
  let client: HermesAcpClient;

  beforeEach(() => {
@@ -14,80 +14,41 @@ describe("handleSessionUpdate — helper extraction", () => {
  });

  it("agent_message_chunk accumulates text in messageChunks", () => {
-    (client as any).handleSessionUpdate({
+    (
+      client as unknown as { handleSessionUpdate: (u: Record<string, unknown>) => void }
+    ).handleSessionUpdate({
      sessionUpdate: "agent_message_chunk",
      content: { type: "text", text: "hello" },
    });
-    (client as any).handleSessionUpdate({
+    (
+      client as unknown as { handleSessionUpdate: (u: Record<string, unknown>) => void }
+    ).handleSessionUpdate({
      sessionUpdate: "agent_message_chunk",
      content: { type: "text", text: " world" },
    });
-    expect((client as any).messageChunks).toEqual(["hello", " world"]);
+    expect((client as unknown as { messageChunks: string[] }).messageChunks).toEqual([
+      "hello",
+      " world",
+    ]);
  });

-  it("agent_thought_chunk accumulates reasoning in reasoningChunks", () => {
-    (client as any).handleSessionUpdate({
-      sessionUpdate: "agent_thought_chunk",
-      content: { type: "text", text: "thinking" },
+  it("non-text chunks and other update types are ignored", () => {
+    (
+      client as unknown as { handleSessionUpdate: (u: Record<string, unknown>) => void }
+    ).handleSessionUpdate({
+      sessionUpdate: "agent_message_chunk",
+      content: { type: "image", text: "ignored" },
    });
-    expect((client as any).reasoningChunks).toEqual(["thinking"]);
-  });
-
-  it("tool_call registers a pending tool and flushes message chunks", () => {
-    (client as any).messageChunks = ["pre-tool text"];
-    (client as any).handleSessionUpdate({
+    (
+      client as unknown as { handleSessionUpdate: (u: Record<string, unknown>) => void }
+    ).handleSessionUpdate({
      sessionUpdate: "tool_call",
      title: "Bash",
-      rawInput: { command: "ls" },
      toolCallId: "tc-1",
    });
-    expect((client as any).pendingTools.get("tc-1")).toEqual({
-      name: "Bash",
-      args: JSON.stringify({ command: "ls" }),
-    });
-    expect((client as any).messageChunks).toEqual([]);
-    expect((client as any).messages).toHaveLength(1);
-    expect((client as any).messages[0].role).toBe("assistant");
-  });
-
-  it("tool_call_update completed pushes tool_call and tool messages", () => {
-    (client as any).pendingTools.set("tc-2", { name: "Read", args: '{"path":"/foo"}' });
-    (client as any).handleSessionUpdate({
-      sessionUpdate: "tool_call_update",
-      status: "completed",
-      toolCallId: "tc-2",
-      rawOutput: "file contents",
-    });
-    const msgs = (client as any).messages as Array<{
-      role: string;
-      tool_calls: unknown;
-      content: string | null;
-    }>;
-    expect(msgs).toHaveLength(2);
-    expect(msgs[0].role).toBe("assistant");
-    expect(msgs[0].tool_calls).toEqual([
-      { function: { name: "Read", arguments: '{"path":"/foo"}' } },
-    ]);
-    expect(msgs[1].role).toBe("tool");
-    expect(msgs[1].content).toBe("file contents");
-    expect((client as any).pendingTools.has("tc-2")).toBe(false);
-  });
-
-  it("tool_call_update with non-string rawOutput JSON-stringifies it", () => {
-    (client as any).pendingTools.set("tc-3", { name: "Fetch", args: "" });
-    (client as any).handleSessionUpdate({
-      sessionUpdate: "tool_call_update",
-      status: "completed",
-      toolCallId: "tc-3",
-      rawOutput: { html: "<p>page</p>" },
-    });
-    const msgs = (client as any).messages as Array<{ role: string; content: string | null }>;
-    expect(msgs[1].content).toBe(JSON.stringify({ html: "<p>page</p>" }));
-  });
-
-  it("unknown updateType is a no-op", () => {
-    (client as any).handleSessionUpdate({ sessionUpdate: "unknown_type", data: {} });
-    expect((client as any).messages).toHaveLength(0);
-    expect((client as any).messageChunks).toHaveLength(0);
+    (
+      client as unknown as { handleSessionUpdate: (u: Record<string, unknown>) => void }
+    ).handleSessionUpdate({ sessionUpdate: "unknown_type", data: {} });
+    expect((client as unknown as { messageChunks: string[] }).messageChunks).toHaveLength(0);
  });
 });
@@ -53,23 +53,4 @@ describe("HermesAcpClient", () => {
    },
    { timeout: 2 * 60 * 1000 },
  );
-
-  // TODO(#435): flaky — depends on live LLM; mock or move to integration suite
-  it.skip(
-    "prompt() collects structured messages including tool calls",
-    async () => {
-      await client.connect(process.cwd());
-      const result = await client.prompt("Run this command: echo TOOL_DETAIL_TEST");
-      expect(result.messages.length).toBeGreaterThan(0);
-      const toolMessages = result.messages.filter((m) => m.role === "tool");
-      expect(toolMessages.length).toBeGreaterThan(0);
-      const toolContent = toolMessages[0]?.content ?? "";
-      expect(toolContent).toContain("TOOL_DETAIL_TEST");
-      const assistantWithTools = result.messages.filter(
-        (m) => m.role === "assistant" && m.tool_calls !== null,
-      );
-      expect(assistantWithTools.length).toBeGreaterThan(0);
-    },
-    { timeout: 2 * 60 * 1000 },
-  );
 });
@@ -2,8 +2,6 @@ import type { ChildProcess } from "node:child_process";
 import { spawn } from "node:child_process";
 import { createInterface } from "node:readline";

-import type { HermesSessionMessage } from "./types.js";
-
 const HERMES_COMMAND = "hermes";
 const PROTOCOL_VERSION = 1;

@@ -19,16 +17,9 @@ type PendingRequest = {
  reject: (reason: Error) => void;
 };

-/** Tracks in-flight tool calls so we can build complete messages when they finish. */
-type PendingToolCall = {
-  name: string;
-  args: string;
-};
-
 export type AcpPromptResult = {
  text: string;
  sessionId: string;
-  messages: HermesSessionMessage[];
 };

 export class HermesAcpClient {
@@ -38,11 +29,8 @@ export class HermesAcpClient {
  private stderrBuffer = "";
  private pending = new Map<number, PendingRequest>();

-  // Message collection state
+  /** Accumulated assistant text chunks from agent_message_chunk updates. */
  private messageChunks: string[] = [];
-  private reasoningChunks: string[] = [];
-  private pendingTools = new Map<string, PendingToolCall>();
-  messages: HermesSessionMessage[] = [];

  /** Spawn hermes acp, initialize, create session */
  async connect(cwd: string): Promise<string> {
@@ -84,14 +72,13 @@ export class HermesAcpClient {
    return sessionId;
  }

-  /** Send prompt and collect full response text + structured messages. */
+  /** Send prompt and collect final assistant text from ACP stream chunks. */
  async prompt(text: string): Promise<AcpPromptResult> {
    if (this.sessionId === null) {
      throw new Error("Not connected — call connect() first");
    }

    this.messageChunks = [];
-    this.reasoningChunks = [];

    const response = await this.sendRequest("session/prompt", {
      sessionId: this.sessionId,
@@ -104,28 +91,9 @@ export class HermesAcpClient {
      );
    }

-    // Flush any trailing assistant text that wasn't followed by a tool call.
-    this.flushAssistantMessage();
-
-    // Extract the final assistant text from collected messages.
-    let finalText = "";
-    for (let i = this.messages.length - 1; i >= 0; i--) {
-      const msg = this.messages[i];
-      if (
-        msg !== undefined &&
-        msg.role === "assistant" &&
-        msg.content !== null &&
-        msg.content.trim() !== ""
-      ) {
-        finalText = msg.content;
-        break;
-      }
-    }
-
    return {
-      text: finalText,
+      text: this.messageChunks.join(""),
      sessionId: this.sessionId,
-      messages: this.messages,
    };
  }

@@ -242,94 +210,16 @@ export class HermesAcpClient {
    }
  }

-  // ---- Session update → structured messages ----
-
  private handleSessionUpdate(update: Record<string, unknown>): void {
-    switch (update.sessionUpdate as string) {
-      case "agent_message_chunk":
-        this.handleAgentMessageChunk(update);
-        break;
-      case "agent_thought_chunk":
-        this.handleAgentThoughtChunk(update);
-        break;
-      case "tool_call":
-        this.handleToolCall(update);
-        break;
-      case "tool_call_update":
-        this.handleToolCallUpdate(update);
-        break;
-      default:
-        break;
+    if (update.sessionUpdate !== "agent_message_chunk") {
+      return;
    }
-  }
-
-  private handleAgentMessageChunk(update: Record<string, unknown>): void {
    const content = update.content as { type?: string; text?: string } | undefined;
    if (content?.type === "text" && typeof content.text === "string") {
      this.messageChunks.push(content.text);
    }
  }

-  private handleAgentThoughtChunk(update: Record<string, unknown>): void {
-    const content = update.content as { type?: string; text?: string } | undefined;
-    if (content?.type === "text" && typeof content.text === "string") {
-      this.reasoningChunks.push(content.text);
-    }
-  }
-
-  private handleToolCall(update: Record<string, unknown>): void {
-    const title = (update.title as string) ?? "";
-    const rawInput = update.rawInput;
-    const args = rawInput !== undefined && rawInput !== null ? JSON.stringify(rawInput) : "";
-    const toolCallId = update.toolCallId as string;
-    this.pendingTools.set(toolCallId, { name: title, args });
-    this.flushAssistantMessage();
-  }
-
-  private handleToolCallUpdate(update: Record<string, unknown>): void {
-    const status = update.status as string | undefined;
-    if (status !== "completed" && status !== "failed") return;
-    const toolCallId = update.toolCallId as string;
-    const pending = this.pendingTools.get(toolCallId);
-    const toolName = pending?.name ?? toolCallId;
-    const rawOutput = update.rawOutput;
-    const outputStr =
-      rawOutput !== undefined && rawOutput !== null
-        ? typeof rawOutput === "string"
-          ? rawOutput
-          : JSON.stringify(rawOutput)
-        : "";
-    this.messages.push({
-      role: "assistant",
-      content: null,
-      reasoning: null,
-      tool_calls: [{ function: { name: toolName, arguments: pending?.args ?? "" } }],
-    });
-    this.messages.push({
-      role: "tool",
-      content: outputStr,
-      reasoning: null,
-      tool_calls: null,
-    });
-    this.pendingTools.delete(toolCallId);
-  }
-
-  /** Flush any accumulated text/reasoning into an assistant message. */
-  private flushAssistantMessage(): void {
-    const text = this.messageChunks.join("");
-    const reasoning = this.reasoningChunks.join("");
-    if (text !== "" || reasoning !== "") {
-      this.messages.push({
-        role: "assistant",
-        content: text || null,
-        reasoning: reasoning || null,
-        tool_calls: null,
-      });
-    }
-    this.messageChunks = [];
-    this.reasoningChunks = [];
-  }
-
  private rejectAll(err: Error): void {
    for (const handler of this.pending.values()) {
      handler.reject(err);
@@ -10,7 +10,7 @@ import {

 import { HermesAcpClient } from "./acp-client.js";
 import { getCachedSessionId, isResumeDisabled, setCachedSessionId } from "./session-cache.js";
-import { storeHermesSessionDetail } from "./session-detail.js";
+import { loadHermesSession, storeHermesSessionDetail } from "./session-detail.js";

 const log = createLogger({ sink: { kind: "stderr" } });

@@ -49,17 +49,11 @@ export function buildHermesPrompt(ctx: AgentContext): string {
  return parts.join("\n");
 }

-async function storePromptResult(
-  store: Store,
-  sessionId: string,
-  messages: Awaited<ReturnType<HermesAcpClient["prompt"]>>["messages"],
-): Promise<{ detailHash: string }> {
-  const session = {
-    session_id: sessionId,
-    model: "",
-    session_start: new Date().toISOString(),
-    messages,
-  };
+async function storePromptResult(store: Store, sessionId: string): Promise<{ detailHash: string }> {
+  const session = await loadHermesSession(sessionId);
+  if (session === null) {
+    throw new Error(`Hermes session file not found: ${sessionId}`);
+  }
  return storeHermesSessionDetail(store, session);
 }

@@ -116,8 +110,8 @@ export function createHermesAgent(): () => Promise<void> {
  async function runPrompt(ctx: AgentContext, useContinuation: boolean): Promise<AgentRunResult> {
    const effectiveCtx = useContinuation ? ctx : { ...ctx, isFirstVisit: true };
    const fullPrompt = buildHermesPrompt(effectiveCtx);
-    const { text, sessionId, messages } = await client.prompt(fullPrompt);
-    const { detailHash } = await storePromptResult(ctx.store, sessionId, messages);
+    const { text, sessionId } = await client.prompt(fullPrompt);
+    const { detailHash } = await storePromptResult(ctx.store, sessionId);

    if (!isResumeDisabled()) {
      await setCachedSessionId(ctx.threadId, ctx.role, sessionId);
@@ -152,8 +146,8 @@ export function createHermesAgent(): () => Promise<void> {
  ): Promise<AgentRunResult> {
    // Client is already connected from runHermes — same ACP session,
    // so the agent sees the full conversation history (crucial for retries).
-    const { text, sessionId, messages } = await client.prompt(message);
-    const { detailHash } = await storePromptResult(store, sessionId, messages);
+    const { text, sessionId } = await client.prompt(message);
+    const { detailHash } = await storePromptResult(store, sessionId);
    return { output: text, detailHash, sessionId };
  }

@@ -0,0 +1,60 @@
+export function generateArchitectureReference(): string {
+  return `# Workflow Engine — Architecture Reference
+
+## Key Concepts
+
+### CAS (Content-Addressed Storage)
+Every artifact in the workflow engine is stored as a CAS node — an immutable, content-addressed record identified by its XXH64 hash (13-char Crockford Base32). CAS provides deduplication, integrity verification, and an append-only audit trail.
+
+Stored artifacts include:
+- **Workflow definitions** — the YAML-parsed payload
+- **Step nodes** — each moderator→agent→extract cycle
+- **Detail nodes** — per-step metadata and turn history
+- **Turn records** — individual agent interactions within a step
+
+### Thread
+A Thread is a single execution of a Workflow, identified by a ULID (26-char Crockford Base32: 10 timestamp + 16 random). Thread state is an immutable CAS chain — each step points to its predecessor via a \`prev\` hash, forming a linked list.
+
+Active threads are indexed in \`threads.yaml\`; completed threads move to \`history.jsonl\`.
+
+A thread progresses by running \`uwf thread exec\`, which performs one moderator→agent→extract cycle per step.
+
+### Workflow
+A Workflow is a YAML definition (\`WorkflowPayload\`) stored as a CAS node. It defines:
+- **Roles** — named actors with system prompts and output schemas
+- **Graph** — status-based routing edges between roles
+- **Conditions** — edge predicates evaluated by the moderator
+
+Workflow names follow verb-first kebab-case: \`solve-issue\`, \`review-code\`.
+
+### Step
+A Step is one moderator→agent→extract cycle, stored as a CAS node (\`StepNodePayload\`). Each step contains:
+- **output** — the agent's extracted frontmatter output
+- **detail** — a CAS reference to turn-level records
+- **prev** — CAS hash of the previous step (forming the chain)
+- **role** — which role produced this step
+
+### Turn
+A Turn is an agent-internal interaction within a single Step. Turns are stored per-turn in the detail node, capturing the raw agent I/O before extraction.
+
+## Data Flow
+
+\`\`\`
+uwf thread exec <thread-id>
+  → Moderator evaluates graph edges based on current status
+  → Selects next role (or $END)
+  → Agent CLI is spawned with context
+  → Agent produces frontmatter markdown
+  → Extract pipeline parses output into structured data
+  → New CAS step node is appended to the thread chain
+\`\`\`
+
+## Storage Layout
+
+All data lives under \`~/.uncaged/workflow/\`:
+- \`cas/\` — content-addressed store (XXH64-keyed)
+- \`threads.yaml\` — active thread index
+- \`history.jsonl\` — completed thread archive
+- \`registry.yaml\` — workflow name → CAS hash mapping
+`;
+}
@@ -1,3 +1,4 @@
+export { generateArchitectureReference } from "./architecture-reference.js";
 export { encodeUint64AsCrockford } from "./base32.js";
 export { generateCliReference } from "./cli-reference.js";
 export { env } from "./env.js";
@@ -13,6 +14,7 @@ export {
  validateFrontmatter,
 } from "./frontmatter-markdown/index.js";
 export { createLogger } from "./logger.js";
+export { generateModeratorReference } from "./moderator-reference.js";
 export type {
  CreateProcessLoggerOptions,
  ProcessLogFn,
@@ -25,3 +27,4 @@ export { err, ok } from "./result.js";
 export { getDefaultWorkflowStorageRoot, getGlobalCasDir } from "./storage-root.js";
 export type { LogFn, Result } from "./types.js";
 export { extractUlidTimestamp, generateUlid } from "./ulid.js";
+export { generateYamlReference } from "./yaml-reference.js";
@@ -0,0 +1,56 @@
+export function generateModeratorReference(): string {
+  return `# Moderator Reference
+
+## Overview
+
+The moderator is the workflow engine's routing component. It evaluates the directed graph defined in the workflow YAML to determine the next role (or \`$END\`) after each step — with zero LLM cost.
+
+## Status-Based Routing
+
+The moderator uses **status-based routing**: it inspects the previous step's extracted output (specifically the \`$status\` field) and looks up the corresponding edge in the graph.
+
+### Graph Structure
+
+The graph is a nested map: \`Record<Role | "$START", Record<Status, Target>>\`. Each role maps its possible \`$status\` values to a target with a \`role\` and \`prompt\`:
+
+\`\`\`yaml
+graph:
+  $START:
+    _: { role: planner, prompt: "Analyze the issue." }
+  planner:
+    ready: { role: developer, prompt: "Implement the plan (CAS hash: {{{plan}}})." }
+    insufficient_info: { role: $END, prompt: "Not enough info." }
+  developer:
+    done: { role: reviewer, prompt: "Review branch {{{branch}}} at {{{worktree}}}." }
+    failed: { role: $END, prompt: "Developer failed: {{{reason}}}." }
+  reviewer:
+    approved: { role: tester, prompt: "Run tests on {{{branch}}} at {{{worktree}}}." }
+    rejected: { role: developer, prompt: "Fix issues: {{{comments}}}." }
+\`\`\`
+
+### Routing Algorithm
+
+1. Look up \`graph[lastRole]\` to get the status map for the current role
+2. Look up \`statusMap[lastOutput.$status]\` to get the target
+3. If target role is \`$END\`, mark thread as completed
+4. Otherwise, render the edge prompt (Mustache templates with \`{{{field}}}\` from output) and spawn the next agent
+
+### Edge Prompts and Mustache Templates
+
+Edge prompts use triple-brace Mustache syntax (\`{{{field}}}\`) to interpolate values from the previous step's output into the next agent's task prompt. This passes structured data (branch names, file paths, CAS hashes) between roles without manual wiring.
+
+## Special Nodes
+
+- \`$START\` — entry point; uses status key \`_\` (unconditional) since there is no previous output
+- \`$END\` — terminal node; thread completes when reached and is moved to history
+
+## Integration with Steps
+
+Each \`uwf thread exec\` cycle:
+1. Moderator reads the thread's head step output
+2. Looks up \`graph[lastRole][output.$status]\` to pick the next role
+3. If next is \`$END\`, marks thread as completed
+4. Otherwise, renders the edge prompt and spawns the agent for the selected role
+5. Extract pipeline parses agent output → new step node → append to CAS chain
+`;
+}
@@ -0,0 +1,82 @@
+export function generateYamlReference(): string {
+  return `# Workflow YAML Schema Reference
+
+## Top-Level Structure
+
+A workflow YAML file defines the complete workflow specification:
+
+\`\`\`yaml
+name: solve-issue          # verb-first kebab-case identifier
+description: "..."         # human-readable description
+
+roles:                     # named actors in the workflow
+  planner:
+    description: "Analyzes issue and outputs a plan"
+    goal: "You are a planning agent."
+    capabilities:
+      - issue-analysis
+      - planning
+    procedure: |
+      1. Read the issue
+      2. Produce a test spec
+    output: "Output the plan summary. Set $status to ready or insufficient_info."
+    frontmatter:           # JSON Schema for structured output (drives routing)
+      oneOf:
+        - properties:
+            $status: { const: ready }
+            plan: { type: string }
+          required: [$status, plan]
+        - properties:
+            $status: { const: insufficient_info }
+          required: [$status]
+
+graph:                     # status-based routing (nested map)
+  $START:
+    _: { role: planner, prompt: "Analyze the issue." }
+  planner:
+    ready: { role: developer, prompt: "Implement plan {{{plan}}}." }
+    insufficient_info: { role: $END, prompt: "Not enough info." }
+\`\`\`
+
+## roles
+
+Each role defines an actor in the workflow:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| \`description\` | string | Short description of the role's purpose |
+| \`goal\` | string | System-level goal statement for the agent |
+| \`capabilities\` | string[] | Tags describing what the role can do |
+| \`procedure\` | string | Step-by-step instructions for the agent |
+| \`output\` | string | Description of expected output format |
+| \`frontmatter\` | JSON Schema | Defines the structured output the agent must produce |
+
+### frontmatter
+
+The \`frontmatter\` field is a standard JSON Schema object. The extract pipeline validates agent output against it. Key conventions:
+- \`$status\` field drives routing decisions in the graph
+- Use \`const\` or \`enum\` to constrain status values
+- Use \`oneOf\` to define multiple valid output shapes (one per status)
+- All \`required\` fields must appear in the agent's frontmatter output
+
+## graph
+
+The graph is a nested map defining status-based routing:
+
+\`\`\`
+Record<Role | "$START", Record<Status, { role: string, prompt: string }>>
+\`\`\`
+
+| Level | Key | Value |
+|-------|-----|-------|
+| Outer | Role name or \`$START\` | Status map for that role |
+| Inner | \`$status\` value (or \`_\` for unconditional) | Target: \`{ role, prompt }\` |
+
+### Special Nodes
+- \`$START\` — entry point; uses status key \`_\` (unconditional, no previous output)
+- \`$END\` — terminal node; thread completes when reached
+
+### Edge Prompts
+Prompts use triple-brace Mustache templates (\`{{{field}}}\`) to interpolate values from the previous step's output. Example: \`"Implement plan {{{plan}}} in repo {{{repoPath}}}."\`
+`;
+}
Author	SHA1	Message	Date
xiaoju	96039dbbbf	fix: cancelled threads show distinct status instead of completed CI / test (pull_request) Failing after 34s Details Fixes #522	2026-05-25 15:39:59 +00:00
xiaomo	4a39d3fdef	Merge pull request 'feat(skill): expand uwf skill with architecture, yaml, moderator, list subcommands' (#521 ) from fix/517-expand-skill into main CI / test (push) Failing after 22m38s Details	2026-05-25 15:00:34 +00:00
xingyue	4de13cea44	fix: correct skill references and remove hardcoded test path CI / test (pull_request) Failing after 23m48s Details - moderator-reference: use nested map graph format matching evaluate.ts - yaml-reference: use goal/procedure/output/capabilities/frontmatter fields matching actual WorkflowPayload, not fabricated system/outputSchema - skill.test.ts: replace hardcoded absolute path with __dirname-relative - skill.test.ts: assert 'frontmatter' instead of 'outputSchema'	2026-05-25 22:59:38 +08:00
xingyue	d9d542c570	fix: correct biome suppressions and formatting for #517 CI / test (pull_request) Failing after 9m9s Details	2026-05-25 22:47:00 +08:00
xingyue	cf6115517c	fix: auto-fix biome lint violations in skill.test.ts	2026-05-25 22:44:32 +08:00
xingyue	108f134020	feat(skill): add architecture, yaml, moderator, list subcommands (#517 )	2026-05-25 22:42:05 +08:00
xiaomo	8123399189	Merge pull request 'fix(uwf-hermes): read turn data from session file instead of ACP stream' (#520 ) from fix/519-read-session-file into main CI / test (push) Failing after 17m33s Details	2026-05-25 14:24:41 +00:00
xingyue	6324122168	fix(uwf-hermes): read turn data from Hermes session file instead of ACP stream CI / test (pull_request) Failing after 12m19s Details Closes #519 The ACP protocol's tool_call updates only carry a display title (not a structured tool name) and omit rawInput for polished tools, making the reconstructed messages unusable for step read/show. Changes: - hermes.ts: storePromptResult reads ~/.hermes/sessions/session_{id}.json via loadHermesSession() instead of using ACP-reconstructed messages - acp-client.ts: strip message/tool-call collection logic, keep only text chunk accumulation for final response extraction - step.ts: TurnData gains role + toolCalls fields; formatTurnBody renders them in step read markdown output - README: document sessions.write_json_snapshots requirement	2026-05-25 22:21:03 +08:00
xiaoju	25b411f22e	Merge pull request 'fix(validate): support enum-based multi-exit frontmatter schemas' (#518 ) from fix/enum-multi-exit-validation into main CI / test (push) Failing after 15m56s Details	2026-05-25 13:23:10 +00:00