diff --git a/packages/cli-workflow/src/__tests__/skill.test.ts b/packages/cli-workflow/src/__tests__/skill.test.ts new file mode 100644 index 0000000..e739b3c --- /dev/null +++ b/packages/cli-workflow/src/__tests__/skill.test.ts @@ -0,0 +1,67 @@ +import { describe, expect, test } from "vitest"; +import { cmdSkillCli, cmdSkillArchitecture, cmdSkillYaml, cmdSkillModerator, cmdSkillList } from "../commands/skill.js"; +import { execFileSync } from "node:child_process"; + +describe("skill commands", () => { + test("skill list returns all skill names", () => { + const result = cmdSkillList(); + expect(result).toBeInstanceOf(Array); + expect(result).toContain("cli"); + expect(result).toContain("architecture"); + expect(result).toContain("yaml"); + expect(result).toContain("moderator"); + for (const name of result) { + expect(typeof name).toBe("string"); + expect(name).toMatch(/^\S+$/); + } + }); + + test("skill architecture returns non-empty markdown string", () => { + const result = cmdSkillArchitecture(); + expect(typeof result).toBe("string"); + expect(result).toContain("CAS"); + expect(result).toContain("Thread"); + expect(result).toContain("Workflow"); + expect(result).toContain("Step"); + expect(result.length).toBeGreaterThan(200); + }); + + test("skill yaml returns non-empty markdown string", () => { + const result = cmdSkillYaml(); + expect(typeof result).toBe("string"); + expect(result).toContain("roles"); + expect(result).toContain("graph"); + expect(result).toContain("outputSchema"); + expect(result.length).toBeGreaterThan(200); + }); + + test("skill moderator returns non-empty markdown string", () => { + const result = cmdSkillModerator(); + expect(typeof result).toBe("string"); + expect(result).toContain("routing"); + expect(result).toContain("status"); + expect(result.length).toBeGreaterThan(200); + // Check for edge or graph + expect(result).toMatch(/edge|graph/i); + }); + + test("skill cli returns CLI reference markdown", () => { + const result = cmdSkillCli(); + expect(typeof result).toBe("string"); + expect(result).toContain("uwf"); + }); + + test("skill help subcommand is suppressed", () => { + const output = execFileSync("bun", ["src/cli.ts", "skill", "--help"], { + cwd: "/Users/scottwei/Code/workflow/.worktrees/fix/517-expand-skill/packages/cli-workflow", + encoding: "utf-8", + env: { ...process.env, PATH: "/opt/homebrew/bin:" + process.env.PATH }, + }); + expect(output).not.toMatch(/help\s+\[command\]/i); + expect(output).toContain("cli"); + expect(output).toContain("architecture"); + expect(output).toContain("yaml"); + expect(output).toContain("moderator"); + expect(output).toContain("list"); + }); +}); diff --git a/packages/cli-workflow/src/cli.ts b/packages/cli-workflow/src/cli.ts index 6e84c91..cfe115d 100755 --- a/packages/cli-workflow/src/cli.ts +++ b/packages/cli-workflow/src/cli.ts @@ -15,7 +15,7 @@ import { } from "./commands/cas.js"; import { cmdLogClean, cmdLogList, cmdLogShow } from "./commands/log.js"; import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js"; -import { cmdSkillCli } from "./commands/skill.js"; +import { cmdSkillArchitecture, cmdSkillCli, cmdSkillList, cmdSkillModerator, cmdSkillYaml } from "./commands/skill.js"; import { cmdStepFork, cmdStepList, cmdStepRead, cmdStepShow } from "./commands/step.js"; import { cmdThreadCancel, @@ -473,14 +473,48 @@ For more information, see: uwf help thread list }); const skill = program.command("skill").description("Built-in skill references for agents"); +skill.addHelpCommand(false); skill .command("cli") .description("Print a markdown reference of all uwf commands") .action(() => { + // biome-ignore lint/nursery/noConsole: CLI user-facing output console.log(cmdSkillCli()); }); +skill + .command("architecture") + .description("Print the architecture reference") + .action(() => { + // biome-ignore lint/nursery/noConsole: CLI user-facing output + console.log(cmdSkillArchitecture()); + }); + +skill + .command("yaml") + .description("Print the workflow YAML schema reference") + .action(() => { + // biome-ignore lint/nursery/noConsole: CLI user-facing output + console.log(cmdSkillYaml()); + }); + +skill + .command("moderator") + .description("Print the moderator reference") + .action(() => { + // biome-ignore lint/nursery/noConsole: CLI user-facing output + console.log(cmdSkillModerator()); + }); + +skill + .command("list") + .description("List all available skill names") + .action(() => { + // biome-ignore lint/nursery/noConsole: CLI user-facing output + console.log(cmdSkillList().join("\n")); + }); + program .command("setup") .description("Configure provider, model, and agent") diff --git a/packages/cli-workflow/src/commands/skill.ts b/packages/cli-workflow/src/commands/skill.ts index 59307ca..0543959 100644 --- a/packages/cli-workflow/src/commands/skill.ts +++ b/packages/cli-workflow/src/commands/skill.ts @@ -1 +1,10 @@ export { generateCliReference as cmdSkillCli } from "@uncaged/workflow-util"; +export { generateArchitectureReference as cmdSkillArchitecture } from "@uncaged/workflow-util"; +export { generateYamlReference as cmdSkillYaml } from "@uncaged/workflow-util"; +export { generateModeratorReference as cmdSkillModerator } from "@uncaged/workflow-util"; + +const SKILL_NAMES = ["cli", "architecture", "yaml", "moderator"] as const; + +export function cmdSkillList(): ReadonlyArray { + return [...SKILL_NAMES]; +} diff --git a/packages/workflow-util/src/architecture-reference.ts b/packages/workflow-util/src/architecture-reference.ts new file mode 100644 index 0000000..59a80e8 --- /dev/null +++ b/packages/workflow-util/src/architecture-reference.ts @@ -0,0 +1,60 @@ +export function generateArchitectureReference(): string { + return `# Workflow Engine — Architecture Reference + +## Key Concepts + +### CAS (Content-Addressed Storage) +Every artifact in the workflow engine is stored as a CAS node — an immutable, content-addressed record identified by its XXH64 hash (13-char Crockford Base32). CAS provides deduplication, integrity verification, and an append-only audit trail. + +Stored artifacts include: +- **Workflow definitions** — the YAML-parsed payload +- **Step nodes** — each moderator→agent→extract cycle +- **Detail nodes** — per-step metadata and turn history +- **Turn records** — individual agent interactions within a step + +### Thread +A Thread is a single execution of a Workflow, identified by a ULID (26-char Crockford Base32: 10 timestamp + 16 random). Thread state is an immutable CAS chain — each step points to its predecessor via a \`prev\` hash, forming a linked list. + +Active threads are indexed in \`threads.yaml\`; completed threads move to \`history.jsonl\`. + +A thread progresses by running \`uwf thread exec\`, which performs one moderator→agent→extract cycle per step. + +### Workflow +A Workflow is a YAML definition (\`WorkflowPayload\`) stored as a CAS node. It defines: +- **Roles** — named actors with system prompts and output schemas +- **Graph** — status-based routing edges between roles +- **Conditions** — edge predicates evaluated by the moderator + +Workflow names follow verb-first kebab-case: \`solve-issue\`, \`review-code\`. + +### Step +A Step is one moderator→agent→extract cycle, stored as a CAS node (\`StepNodePayload\`). Each step contains: +- **output** — the agent's extracted frontmatter output +- **detail** — a CAS reference to turn-level records +- **prev** — CAS hash of the previous step (forming the chain) +- **role** — which role produced this step + +### Turn +A Turn is an agent-internal interaction within a single Step. Turns are stored per-turn in the detail node, capturing the raw agent I/O before extraction. + +## Data Flow + +\`\`\` +uwf thread exec + → Moderator evaluates graph edges based on current status + → Selects next role (or $END) + → Agent CLI is spawned with context + → Agent produces frontmatter markdown + → Extract pipeline parses output into structured data + → New CAS step node is appended to the thread chain +\`\`\` + +## Storage Layout + +All data lives under \`~/.uncaged/workflow/\`: +- \`cas/\` — content-addressed store (XXH64-keyed) +- \`threads.yaml\` — active thread index +- \`history.jsonl\` — completed thread archive +- \`registry.yaml\` — workflow name → CAS hash mapping +`; +} diff --git a/packages/workflow-util/src/index.ts b/packages/workflow-util/src/index.ts index a08f3ad..152c408 100644 --- a/packages/workflow-util/src/index.ts +++ b/packages/workflow-util/src/index.ts @@ -1,5 +1,8 @@ export { encodeUint64AsCrockford } from "./base32.js"; +export { generateArchitectureReference } from "./architecture-reference.js"; export { generateCliReference } from "./cli-reference.js"; +export { generateModeratorReference } from "./moderator-reference.js"; +export { generateYamlReference } from "./yaml-reference.js"; export { env } from "./env.js"; export type { AgentFrontmatter, diff --git a/packages/workflow-util/src/moderator-reference.ts b/packages/workflow-util/src/moderator-reference.ts new file mode 100644 index 0000000..17082a8 --- /dev/null +++ b/packages/workflow-util/src/moderator-reference.ts @@ -0,0 +1,64 @@ +export function generateModeratorReference(): string { + return `# Moderator Reference + +## Overview + +The moderator is the workflow engine's routing component. It evaluates the directed graph defined in the workflow YAML to determine the next role (or \`$END\`) after each step — with zero LLM cost. + +## Status-Based Routing + +The moderator uses **status-based routing**: it inspects the previous step's extracted output (specifically the \`$status\` field and other output fields) and matches them against edge conditions in the graph. + +### Routing Algorithm + +1. Find all edges where \`from\` matches the current role +2. For each edge (in order), evaluate the \`when\` condition: + - If \`when\` is absent → unconditional match (always taken) + - If \`when\` is present → every key/value pair must match the step output +3. The first matching edge determines the next role +4. If no edge matches → thread stalls (error condition) + +### Example + +\`\`\`yaml +graph: + - from: developer + to: reviewer + when: + $status: done + - from: developer + to: $END + when: + $status: failed + - from: reviewer + to: developer + when: + $status: needs-changes + - from: reviewer + to: $END + when: + $status: approved +\`\`\` + +In this graph: +- After \`developer\` produces \`$status: done\`, the moderator routes to \`reviewer\` +- After \`reviewer\` produces \`$status: needs-changes\`, it routes back to \`developer\` +- \`$status: failed\` or \`$status: approved\` terminates the thread + +## Edge Evaluation Details + +- Edges are evaluated **in declaration order** — put specific conditions before general ones +- \`when\` values are compared as **exact string matches** +- Multiple \`when\` fields are ANDed — all must match +- An edge without \`when\` acts as a **fallback** — place it last + +## Integration with Steps + +Each \`uwf thread exec\` cycle: +1. Moderator reads the thread's head step output +2. Evaluates graph edges to pick the next role +3. If next is \`$END\`, marks thread as completed +4. Otherwise, spawns the agent for the selected role +5. Extract pipeline parses agent output → new step node → append to CAS chain +`; +} diff --git a/packages/workflow-util/src/yaml-reference.ts b/packages/workflow-util/src/yaml-reference.ts new file mode 100644 index 0000000..2b548fc --- /dev/null +++ b/packages/workflow-util/src/yaml-reference.ts @@ -0,0 +1,73 @@ +export function generateYamlReference(): string { + return `# Workflow YAML Schema Reference + +## Top-Level Structure + +A workflow YAML file defines the complete workflow specification: + +\`\`\`yaml +name: solve-issue # verb-first kebab-case identifier +description: "..." # human-readable description + +roles: # named actors in the workflow + planner: + system: | # system prompt for the agent + You are a planner... + outputSchema: # JSON Schema for structured output + type: object + required: [plan, $status] + properties: + plan: + type: string + $status: + type: string + enum: [ready, failed] + +graph: # status-based routing edges + - from: $START + to: planner + - from: planner + to: developer + when: + $status: ready + - from: planner + to: $END + when: + $status: failed +\`\`\` + +## roles + +Each role defines an actor in the workflow: + +| Field | Type | Description | +|-------|------|-------------| +| \`system\` | string | System prompt — instructions for the agent | +| \`outputSchema\` | JSON Schema | Defines the structured output the agent must produce | +| \`agent\` | string (optional) | Override the default agent command for this role | + +### outputSchema + +The \`outputSchema\` is a standard JSON Schema object. The extract pipeline validates agent output against it. Key conventions: +- \`$status\` field drives routing decisions in the graph +- Use \`enum\` to constrain status values +- All required fields must appear in the agent's frontmatter output + +## graph + +The graph is an array of directed edges defining status-based routing: + +| Field | Type | Description | +|-------|------|-------------| +| \`from\` | string | Source role name, or \`$START\` | +| \`to\` | string | Target role name, or \`$END\` | +| \`when\` | object | Condition map — field/value pairs to match against previous output | + +### Special Nodes +- \`$START\` — entry point, must have exactly one outgoing edge +- \`$END\` — terminal node, thread completes when reached + +### Edge Evaluation +Edges are evaluated in order. The first edge whose \`when\` condition matches the current step output is selected. If no \`when\` is specified, the edge is unconditional (always matches). +`; +}