Merge pull request 'feat(skill): expand uwf skill with architecture, yaml, moderator, list subcommands' (#521) from fix/517-expand-skill into main

2026-05-25 15:00:34 +00:00
parent 8123399189 4de13cea44
commit 4a39d3fdef
7 changed files with 327 additions and 2 deletions
@@ -0,0 +1,78 @@
 import { execFileSync } from "node:child_process";
 import { dirname, join } from "node:path";
 import { fileURLToPath } from "node:url";
 import { describe, expect, test } from "vitest";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 import {
  cmdSkillArchitecture,
  cmdSkillCli,
  cmdSkillList,
  cmdSkillModerator,
  cmdSkillYaml,
 } from "../commands/skill.js";
 describe("skill commands", () => {
  test("skill list returns all skill names", () => {
    const result = cmdSkillList();
    expect(result).toBeInstanceOf(Array);
    expect(result).toContain("cli");
    expect(result).toContain("architecture");
    expect(result).toContain("yaml");
    expect(result).toContain("moderator");
    for (const name of result) {
      expect(typeof name).toBe("string");
      expect(name).toMatch(/^\S+$/);
    }
  });
  test("skill architecture returns non-empty markdown string", () => {
    const result = cmdSkillArchitecture();
    expect(typeof result).toBe("string");
    expect(result).toContain("CAS");
    expect(result).toContain("Thread");
    expect(result).toContain("Workflow");
    expect(result).toContain("Step");
    expect(result.length).toBeGreaterThan(200);
  });
  test("skill yaml returns non-empty markdown string", () => {
    const result = cmdSkillYaml();
    expect(typeof result).toBe("string");
    expect(result).toContain("roles");
    expect(result).toContain("graph");
    expect(result).toContain("frontmatter");
    expect(result.length).toBeGreaterThan(200);
  });
  test("skill moderator returns non-empty markdown string", () => {
    const result = cmdSkillModerator();
    expect(typeof result).toBe("string");
    expect(result).toContain("routing");
    expect(result).toContain("status");
    expect(result.length).toBeGreaterThan(200);
    // Check for edge or graph
    expect(result).toMatch(/edge|graph/i);
  });
  test("skill cli returns CLI reference markdown", () => {
    const result = cmdSkillCli();
    expect(typeof result).toBe("string");
    expect(result).toContain("uwf");
  });
  test("skill help subcommand is suppressed", () => {
    const output = execFileSync("bun", ["src/cli.ts", "skill", "--help"], {
      cwd: join(__dirname, "..", ".."),
      encoding: "utf-8",
      env: { ...process.env, PATH: `/opt/homebrew/bin:${process.env.PATH}` },
    });
    expect(output).not.toMatch(/help\s+\[command\]/i);
    expect(output).toContain("cli");
    expect(output).toContain("architecture");
    expect(output).toContain("yaml");
    expect(output).toContain("moderator");
    expect(output).toContain("list");
  });
 });
@@ -15,7 +15,13 @@ import {
 } from "./commands/cas.js";
 import { cmdLogClean, cmdLogList, cmdLogShow } from "./commands/log.js";
 import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js";
-import { cmdSkillCli } from "./commands/skill.js";
+import {
  cmdSkillArchitecture,
  cmdSkillCli,
  cmdSkillList,
  cmdSkillModerator,
  cmdSkillYaml,
 } from "./commands/skill.js";
 import { cmdStepFork, cmdStepList, cmdStepRead, cmdStepShow } from "./commands/step.js";
 import {
  cmdThreadCancel,
@@ -473,6 +479,7 @@ For more information, see: uwf help thread list
  });
 const skill = program.command("skill").description("Built-in skill references for agents");
 skill.addHelpCommand(false);
 skill
  .command("cli")
@@ -481,6 +488,34 @@ skill
    console.log(cmdSkillCli());
  });
 skill
  .command("architecture")
  .description("Print the architecture reference")
  .action(() => {
    console.log(cmdSkillArchitecture());
  });
 skill
  .command("yaml")
  .description("Print the workflow YAML schema reference")
  .action(() => {
    console.log(cmdSkillYaml());
  });
 skill
  .command("moderator")
  .description("Print the moderator reference")
  .action(() => {
    console.log(cmdSkillModerator());
  });
 skill
  .command("list")
  .description("List all available skill names")
  .action(() => {
    console.log(cmdSkillList().join("\n"));
  });
 program
  .command("setup")
  .description("Configure provider, model, and agent")
@@ -1 +1,12 @@
-export { generateCliReference as cmdSkillCli } from "@uncaged/workflow-util";
+export {
  generateArchitectureReference as cmdSkillArchitecture,
  generateCliReference as cmdSkillCli,
  generateModeratorReference as cmdSkillModerator,
  generateYamlReference as cmdSkillYaml,
 } from "@uncaged/workflow-util";
 const SKILL_NAMES = ["cli", "architecture", "yaml", "moderator"] as const;
 export function cmdSkillList(): ReadonlyArray<string> {
  return [...SKILL_NAMES];
 }
@@ -0,0 +1,60 @@
 export function generateArchitectureReference(): string {
  return `# Workflow Engine — Architecture Reference
 ## Key Concepts
 ### CAS (Content-Addressed Storage)
 Every artifact in the workflow engine is stored as a CAS node — an immutable, content-addressed record identified by its XXH64 hash (13-char Crockford Base32). CAS provides deduplication, integrity verification, and an append-only audit trail.
 Stored artifacts include:
 - **Workflow definitions** — the YAML-parsed payload
 - **Step nodes** — each moderator→agent→extract cycle
 - **Detail nodes** — per-step metadata and turn history
 - **Turn records** — individual agent interactions within a step
 ### Thread
 A Thread is a single execution of a Workflow, identified by a ULID (26-char Crockford Base32: 10 timestamp + 16 random). Thread state is an immutable CAS chain — each step points to its predecessor via a \`prev\` hash, forming a linked list.
 Active threads are indexed in \`threads.yaml\`; completed threads move to \`history.jsonl\`.
 A thread progresses by running \`uwf thread exec\`, which performs one moderator→agent→extract cycle per step.
 ### Workflow
 A Workflow is a YAML definition (\`WorkflowPayload\`) stored as a CAS node. It defines:
 - **Roles** — named actors with system prompts and output schemas
 - **Graph** — status-based routing edges between roles
 - **Conditions** — edge predicates evaluated by the moderator
 Workflow names follow verb-first kebab-case: \`solve-issue\`, \`review-code\`.
 ### Step
 A Step is one moderator→agent→extract cycle, stored as a CAS node (\`StepNodePayload\`). Each step contains:
 - **output** — the agent's extracted frontmatter output
 - **detail** — a CAS reference to turn-level records
 - **prev** — CAS hash of the previous step (forming the chain)
 - **role** — which role produced this step
 ### Turn
 A Turn is an agent-internal interaction within a single Step. Turns are stored per-turn in the detail node, capturing the raw agent I/O before extraction.
 ## Data Flow
 \`\`\`
 uwf thread exec <thread-id>
  → Moderator evaluates graph edges based on current status
  → Selects next role (or $END)
  → Agent CLI is spawned with context
  → Agent produces frontmatter markdown
  → Extract pipeline parses output into structured data
  → New CAS step node is appended to the thread chain
 \`\`\`
 ## Storage Layout
 All data lives under \`~/.uncaged/workflow/\`:
 - \`cas/\` — content-addressed store (XXH64-keyed)
 - \`threads.yaml\` — active thread index
 - \`history.jsonl\` — completed thread archive
 - \`registry.yaml\` — workflow name → CAS hash mapping
 `;
 }
@@ -1,3 +1,4 @@
 export { generateArchitectureReference } from "./architecture-reference.js";
 export { encodeUint64AsCrockford } from "./base32.js";
 export { generateCliReference } from "./cli-reference.js";
 export { env } from "./env.js";
@@ -13,6 +14,7 @@ export {
  validateFrontmatter,
 } from "./frontmatter-markdown/index.js";
 export { createLogger } from "./logger.js";
 export { generateModeratorReference } from "./moderator-reference.js";
 export type {
  CreateProcessLoggerOptions,
  ProcessLogFn,
@@ -25,3 +27,4 @@ export { err, ok } from "./result.js";
 export { getDefaultWorkflowStorageRoot, getGlobalCasDir } from "./storage-root.js";
 export type { LogFn, Result } from "./types.js";
 export { extractUlidTimestamp, generateUlid } from "./ulid.js";
 export { generateYamlReference } from "./yaml-reference.js";
@@ -0,0 +1,56 @@
 export function generateModeratorReference(): string {
  return `# Moderator Reference
 ## Overview
 The moderator is the workflow engine's routing component. It evaluates the directed graph defined in the workflow YAML to determine the next role (or \`$END\`) after each step — with zero LLM cost.
 ## Status-Based Routing
 The moderator uses **status-based routing**: it inspects the previous step's extracted output (specifically the \`$status\` field) and looks up the corresponding edge in the graph.
 ### Graph Structure
 The graph is a nested map: \`Record<Role | "$START", Record<Status, Target>>\`. Each role maps its possible \`$status\` values to a target with a \`role\` and \`prompt\`:
 \`\`\`yaml
 graph:
  $START:
    _: { role: planner, prompt: "Analyze the issue." }
  planner:
    ready: { role: developer, prompt: "Implement the plan (CAS hash: {{{plan}}})." }
    insufficient_info: { role: $END, prompt: "Not enough info." }
  developer:
    done: { role: reviewer, prompt: "Review branch {{{branch}}} at {{{worktree}}}." }
    failed: { role: $END, prompt: "Developer failed: {{{reason}}}." }
  reviewer:
    approved: { role: tester, prompt: "Run tests on {{{branch}}} at {{{worktree}}}." }
    rejected: { role: developer, prompt: "Fix issues: {{{comments}}}." }
 \`\`\`
 ### Routing Algorithm
 1. Look up \`graph[lastRole]\` to get the status map for the current role
 2. Look up \`statusMap[lastOutput.$status]\` to get the target
 3. If target role is \`$END\`, mark thread as completed
 4. Otherwise, render the edge prompt (Mustache templates with \`{{{field}}}\` from output) and spawn the next agent
 ### Edge Prompts and Mustache Templates
 Edge prompts use triple-brace Mustache syntax (\`{{{field}}}\`) to interpolate values from the previous step's output into the next agent's task prompt. This passes structured data (branch names, file paths, CAS hashes) between roles without manual wiring.
 ## Special Nodes
 - \`$START\` — entry point; uses status key \`_\` (unconditional) since there is no previous output
 - \`$END\` — terminal node; thread completes when reached and is moved to history
 ## Integration with Steps
 Each \`uwf thread exec\` cycle:
 1. Moderator reads the thread's head step output
 2. Looks up \`graph[lastRole][output.$status]\` to pick the next role
 3. If next is \`$END\`, marks thread as completed
 4. Otherwise, renders the edge prompt and spawns the agent for the selected role
 5. Extract pipeline parses agent output → new step node → append to CAS chain
 `;
 }
@@ -0,0 +1,82 @@
 export function generateYamlReference(): string {
  return `# Workflow YAML Schema Reference
 ## Top-Level Structure
 A workflow YAML file defines the complete workflow specification:
 \`\`\`yaml
 name: solve-issue          # verb-first kebab-case identifier
 description: "..."         # human-readable description
 roles:                     # named actors in the workflow
  planner:
    description: "Analyzes issue and outputs a plan"
    goal: "You are a planning agent."
    capabilities:
      - issue-analysis
      - planning
    procedure: |
      1. Read the issue
      2. Produce a test spec
    output: "Output the plan summary. Set $status to ready or insufficient_info."
    frontmatter:           # JSON Schema for structured output (drives routing)
      oneOf:
        - properties:
            $status: { const: ready }
            plan: { type: string }
          required: [$status, plan]
        - properties:
            $status: { const: insufficient_info }
          required: [$status]
 graph:                     # status-based routing (nested map)
  $START:
    _: { role: planner, prompt: "Analyze the issue." }
  planner:
    ready: { role: developer, prompt: "Implement plan {{{plan}}}." }
    insufficient_info: { role: $END, prompt: "Not enough info." }
 \`\`\`
 ## roles
 Each role defines an actor in the workflow:
 | Field | Type | Description |
 |-------|------|-------------|
 | \`description\` | string | Short description of the role's purpose |
 | \`goal\` | string | System-level goal statement for the agent |
 | \`capabilities\` | string[] | Tags describing what the role can do |
 | \`procedure\` | string | Step-by-step instructions for the agent |
 | \`output\` | string | Description of expected output format |
 | \`frontmatter\` | JSON Schema | Defines the structured output the agent must produce |
 ### frontmatter
 The \`frontmatter\` field is a standard JSON Schema object. The extract pipeline validates agent output against it. Key conventions:
 - \`$status\` field drives routing decisions in the graph
 - Use \`const\` or \`enum\` to constrain status values
 - Use \`oneOf\` to define multiple valid output shapes (one per status)
 - All \`required\` fields must appear in the agent's frontmatter output
 ## graph
 The graph is a nested map defining status-based routing:
 \`\`\`
 Record<Role | "$START", Record<Status, { role: string, prompt: string }>>
 \`\`\`
 | Level | Key | Value |
 |-------|-----|-------|
 | Outer | Role name or \`$START\` | Status map for that role |
 | Inner | \`$status\` value (or \`_\` for unconditional) | Target: \`{ role, prompt }\` |
 ### Special Nodes
 - \`$START\` — entry point; uses status key \`_\` (unconditional, no previous output)
 - \`$END\` — terminal node; thread completes when reached
 ### Edge Prompts
 Prompts use triple-brace Mustache templates (\`{{{field}}}\`) to interpolate values from the previous step's output. Example: \`"Implement plan {{{plan}}} in repo {{{repoPath}}}."\`
 `;
 }