fix: correct skill references and remove hardcoded test path

- moderator-reference: use nested map graph format matching evaluate.ts - yaml-reference: use goal/procedure/output/capabilities/frontmatter fields matching actual WorkflowPayload, not fabricated system/outputSchema - skill.test.ts: replace hardcoded absolute path with __dirname-relative - skill.test.ts: assert 'frontmatter' instead of 'outputSchema'
2026-05-25 22:59:38 +08:00
parent d9d542c570
commit 4de13cea44
3 changed files with 84 additions and 78 deletions
@@ -1,5 +1,10 @@
 import { execFileSync } from "node:child_process";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
 import { describe, expect, test } from "vitest";
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
 import {
  cmdSkillArchitecture,
  cmdSkillCli,
@@ -37,7 +42,7 @@ describe("skill commands", () => {
    expect(typeof result).toBe("string");
    expect(result).toContain("roles");
    expect(result).toContain("graph");
-    expect(result).toContain("outputSchema");
+    expect(result).toContain("frontmatter");
    expect(result.length).toBeGreaterThan(200);
  });

@@ -59,7 +64,7 @@ describe("skill commands", () => {

  test("skill help subcommand is suppressed", () => {
    const output = execFileSync("bun", ["src/cli.ts", "skill", "--help"], {
-      cwd: "/Users/scottwei/Code/workflow/.worktrees/fix/517-expand-skill/packages/cli-workflow",
+      cwd: join(__dirname, "..", ".."),
      encoding: "utf-8",
      env: { ...process.env, PATH: `/opt/homebrew/bin:${process.env.PATH}` },
    });
@@ -7,58 +7,50 @@ The moderator is the workflow engine's routing component. It evaluates the direc

 ## Status-Based Routing

-The moderator uses **status-based routing**: it inspects the previous step's extracted output (specifically the \`$status\` field and other output fields) and matches them against edge conditions in the graph.
+The moderator uses **status-based routing**: it inspects the previous step's extracted output (specifically the \`$status\` field) and looks up the corresponding edge in the graph.

-### Routing Algorithm
+### Graph Structure

-1. Find all edges where \`from\` matches the current role
-2. For each edge (in order), evaluate the \`when\` condition:
-   - If \`when\` is absent → unconditional match (always taken)
-   - If \`when\` is present → every key/value pair must match the step output
-3. The first matching edge determines the next role
-4. If no edge matches → thread stalls (error condition)
-
-### Example
+The graph is a nested map: \`Record<Role | "$START", Record<Status, Target>>\`. Each role maps its possible \`$status\` values to a target with a \`role\` and \`prompt\`:

 \`\`\`yaml
 graph:
-  - from: developer
-    to: reviewer
-    when:
-      $status: done
-  - from: developer
-    to: $END
-    when:
-      $status: failed
-  - from: reviewer
-    to: developer
-    when:
-      $status: needs-changes
-  - from: reviewer
-    to: $END
-    when:
-      $status: approved
+  $START:
+    _: { role: planner, prompt: "Analyze the issue." }
+  planner:
+    ready: { role: developer, prompt: "Implement the plan (CAS hash: {{{plan}}})." }
+    insufficient_info: { role: $END, prompt: "Not enough info." }
+  developer:
+    done: { role: reviewer, prompt: "Review branch {{{branch}}} at {{{worktree}}}." }
+    failed: { role: $END, prompt: "Developer failed: {{{reason}}}." }
+  reviewer:
+    approved: { role: tester, prompt: "Run tests on {{{branch}}} at {{{worktree}}}." }
+    rejected: { role: developer, prompt: "Fix issues: {{{comments}}}." }
 \`\`\`

-In this graph:
- After \`developer\` produces \`$status: done\`, the moderator routes to \`reviewer\`
- After \`reviewer\` produces \`$status: needs-changes\`, it routes back to \`developer\`
- \`$status: failed\` or \`$status: approved\` terminates the thread
+### Routing Algorithm

-## Edge Evaluation Details
+1. Look up \`graph[lastRole]\` to get the status map for the current role
+2. Look up \`statusMap[lastOutput.$status]\` to get the target
+3. If target role is \`$END\`, mark thread as completed
+4. Otherwise, render the edge prompt (Mustache templates with \`{{{field}}}\` from output) and spawn the next agent

- Edges are evaluated **in declaration order** — put specific conditions before general ones
- \`when\` values are compared as **exact string matches**
- Multiple \`when\` fields are ANDed — all must match
- An edge without \`when\` acts as a **fallback** — place it last
+### Edge Prompts and Mustache Templates
+
+Edge prompts use triple-brace Mustache syntax (\`{{{field}}}\`) to interpolate values from the previous step's output into the next agent's task prompt. This passes structured data (branch names, file paths, CAS hashes) between roles without manual wiring.
+
+## Special Nodes
+
+- \`$START\` — entry point; uses status key \`_\` (unconditional) since there is no previous output
+- \`$END\` — terminal node; thread completes when reached and is moved to history

 ## Integration with Steps

 Each \`uwf thread exec\` cycle:
 1. Moderator reads the thread's head step output
-2. Evaluates graph edges to pick the next role
+2. Looks up \`graph[lastRole][output.$status]\` to pick the next role
 3. If next is \`$END\`, marks thread as completed
-4. Otherwise, spawns the agent for the selected role
+4. Otherwise, renders the edge prompt and spawns the agent for the selected role
 5. Extract pipeline parses agent output → new step node → append to CAS chain
 `;
 }
@@ -11,29 +11,31 @@ description: "..."         # human-readable description

 roles:                     # named actors in the workflow
  planner:
-    system: |              # system prompt for the agent
-      You are a planner...
-    outputSchema:          # JSON Schema for structured output
-      type: object
-      required: [plan, $status]
-      properties:
-        plan:
-          type: string
-        $status:
-          type: string
-          enum: [ready, failed]
+    description: "Analyzes issue and outputs a plan"
+    goal: "You are a planning agent."
+    capabilities:
+      - issue-analysis
+      - planning
+    procedure: |
+      1. Read the issue
+      2. Produce a test spec
+    output: "Output the plan summary. Set $status to ready or insufficient_info."
+    frontmatter:           # JSON Schema for structured output (drives routing)
+      oneOf:
+        - properties:
+            $status: { const: ready }
+            plan: { type: string }
+          required: [$status, plan]
+        - properties:
+            $status: { const: insufficient_info }
+          required: [$status]

-graph:                     # status-based routing edges
-  - from: $START
-    to: planner
-  - from: planner
-    to: developer
-    when:
-      $status: ready
-  - from: planner
-    to: $END
-    when:
-      $status: failed
+graph:                     # status-based routing (nested map)
+  $START:
+    _: { role: planner, prompt: "Analyze the issue." }
+  planner:
+    ready: { role: developer, prompt: "Implement plan {{{plan}}}." }
+    insufficient_info: { role: $END, prompt: "Not enough info." }
 \`\`\`

 ## roles
@@ -42,32 +44,39 @@ Each role defines an actor in the workflow:

 | Field | Type | Description |
 |-------|------|-------------|
-| \`system\` | string | System prompt — instructions for the agent |
-| \`outputSchema\` | JSON Schema | Defines the structured output the agent must produce |
-| \`agent\` | string (optional) | Override the default agent command for this role |
+| \`description\` | string | Short description of the role's purpose |
+| \`goal\` | string | System-level goal statement for the agent |
+| \`capabilities\` | string[] | Tags describing what the role can do |
+| \`procedure\` | string | Step-by-step instructions for the agent |
+| \`output\` | string | Description of expected output format |
+| \`frontmatter\` | JSON Schema | Defines the structured output the agent must produce |

-### outputSchema
+### frontmatter

-The \`outputSchema\` is a standard JSON Schema object. The extract pipeline validates agent output against it. Key conventions:
+The \`frontmatter\` field is a standard JSON Schema object. The extract pipeline validates agent output against it. Key conventions:
 - \`$status\` field drives routing decisions in the graph
- Use \`enum\` to constrain status values
- All required fields must appear in the agent's frontmatter output
+- Use \`const\` or \`enum\` to constrain status values
+- Use \`oneOf\` to define multiple valid output shapes (one per status)
+- All \`required\` fields must appear in the agent's frontmatter output

 ## graph

-The graph is an array of directed edges defining status-based routing:
+The graph is a nested map defining status-based routing:

-| Field | Type | Description |
-|-------|------|-------------|
-| \`from\` | string | Source role name, or \`$START\` |
-| \`to\` | string | Target role name, or \`$END\` |
-| \`when\` | object | Condition map — field/value pairs to match against previous output |
+\`\`\`
+Record<Role | "$START", Record<Status, { role: string, prompt: string }>>
+\`\`\`
+
+| Level | Key | Value |
+|-------|-----|-------|
+| Outer | Role name or \`$START\` | Status map for that role |
+| Inner | \`$status\` value (or \`_\` for unconditional) | Target: \`{ role, prompt }\` |

 ### Special Nodes
- \`$START\` — entry point, must have exactly one outgoing edge
- \`$END\` — terminal node, thread completes when reached
+- \`$START\` — entry point; uses status key \`_\` (unconditional, no previous output)
+- \`$END\` — terminal node; thread completes when reached

-### Edge Evaluation
-Edges are evaluated in order. The first edge whose \`when\` condition matches the current step output is selected. If no \`when\` is specified, the edge is unconditional (always matches).
+### Edge Prompts
+Prompts use triple-brace Mustache templates (\`{{{field}}}\`) to interpolate values from the previous step's output. Example: \`"Implement plan {{{plan}}} in repo {{{repoPath}}}."\`
 `;
 }