diff --git a/packages/cli-workflow/src/__tests__/skill.test.ts b/packages/cli-workflow/src/__tests__/skill.test.ts new file mode 100644 index 0000000..34aa2f5 --- /dev/null +++ b/packages/cli-workflow/src/__tests__/skill.test.ts @@ -0,0 +1,78 @@ +import { execFileSync } from "node:child_process"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { describe, expect, test } from "vitest"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +import { + cmdSkillArchitecture, + cmdSkillCli, + cmdSkillList, + cmdSkillModerator, + cmdSkillYaml, +} from "../commands/skill.js"; + +describe("skill commands", () => { + test("skill list returns all skill names", () => { + const result = cmdSkillList(); + expect(result).toBeInstanceOf(Array); + expect(result).toContain("cli"); + expect(result).toContain("architecture"); + expect(result).toContain("yaml"); + expect(result).toContain("moderator"); + for (const name of result) { + expect(typeof name).toBe("string"); + expect(name).toMatch(/^\S+$/); + } + }); + + test("skill architecture returns non-empty markdown string", () => { + const result = cmdSkillArchitecture(); + expect(typeof result).toBe("string"); + expect(result).toContain("CAS"); + expect(result).toContain("Thread"); + expect(result).toContain("Workflow"); + expect(result).toContain("Step"); + expect(result.length).toBeGreaterThan(200); + }); + + test("skill yaml returns non-empty markdown string", () => { + const result = cmdSkillYaml(); + expect(typeof result).toBe("string"); + expect(result).toContain("roles"); + expect(result).toContain("graph"); + expect(result).toContain("frontmatter"); + expect(result.length).toBeGreaterThan(200); + }); + + test("skill moderator returns non-empty markdown string", () => { + const result = cmdSkillModerator(); + expect(typeof result).toBe("string"); + expect(result).toContain("routing"); + expect(result).toContain("status"); + expect(result.length).toBeGreaterThan(200); + // Check for edge or graph + expect(result).toMatch(/edge|graph/i); + }); + + test("skill cli returns CLI reference markdown", () => { + const result = cmdSkillCli(); + expect(typeof result).toBe("string"); + expect(result).toContain("uwf"); + }); + + test("skill help subcommand is suppressed", () => { + const output = execFileSync("bun", ["src/cli.ts", "skill", "--help"], { + cwd: join(__dirname, "..", ".."), + encoding: "utf-8", + env: { ...process.env, PATH: `/opt/homebrew/bin:${process.env.PATH}` }, + }); + expect(output).not.toMatch(/help\s+\[command\]/i); + expect(output).toContain("cli"); + expect(output).toContain("architecture"); + expect(output).toContain("yaml"); + expect(output).toContain("moderator"); + expect(output).toContain("list"); + }); +}); diff --git a/packages/cli-workflow/src/cli.ts b/packages/cli-workflow/src/cli.ts index 6e84c91..9a76671 100755 --- a/packages/cli-workflow/src/cli.ts +++ b/packages/cli-workflow/src/cli.ts @@ -15,7 +15,13 @@ import { } from "./commands/cas.js"; import { cmdLogClean, cmdLogList, cmdLogShow } from "./commands/log.js"; import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js"; -import { cmdSkillCli } from "./commands/skill.js"; +import { + cmdSkillArchitecture, + cmdSkillCli, + cmdSkillList, + cmdSkillModerator, + cmdSkillYaml, +} from "./commands/skill.js"; import { cmdStepFork, cmdStepList, cmdStepRead, cmdStepShow } from "./commands/step.js"; import { cmdThreadCancel, @@ -473,6 +479,7 @@ For more information, see: uwf help thread list }); const skill = program.command("skill").description("Built-in skill references for agents"); +skill.addHelpCommand(false); skill .command("cli") @@ -481,6 +488,34 @@ skill console.log(cmdSkillCli()); }); +skill + .command("architecture") + .description("Print the architecture reference") + .action(() => { + console.log(cmdSkillArchitecture()); + }); + +skill + .command("yaml") + .description("Print the workflow YAML schema reference") + .action(() => { + console.log(cmdSkillYaml()); + }); + +skill + .command("moderator") + .description("Print the moderator reference") + .action(() => { + console.log(cmdSkillModerator()); + }); + +skill + .command("list") + .description("List all available skill names") + .action(() => { + console.log(cmdSkillList().join("\n")); + }); + program .command("setup") .description("Configure provider, model, and agent") diff --git a/packages/cli-workflow/src/commands/skill.ts b/packages/cli-workflow/src/commands/skill.ts index 59307ca..8a5ddeb 100644 --- a/packages/cli-workflow/src/commands/skill.ts +++ b/packages/cli-workflow/src/commands/skill.ts @@ -1 +1,12 @@ -export { generateCliReference as cmdSkillCli } from "@uncaged/workflow-util"; +export { + generateArchitectureReference as cmdSkillArchitecture, + generateCliReference as cmdSkillCli, + generateModeratorReference as cmdSkillModerator, + generateYamlReference as cmdSkillYaml, +} from "@uncaged/workflow-util"; + +const SKILL_NAMES = ["cli", "architecture", "yaml", "moderator"] as const; + +export function cmdSkillList(): ReadonlyArray { + return [...SKILL_NAMES]; +} diff --git a/packages/workflow-util/src/architecture-reference.ts b/packages/workflow-util/src/architecture-reference.ts new file mode 100644 index 0000000..59a80e8 --- /dev/null +++ b/packages/workflow-util/src/architecture-reference.ts @@ -0,0 +1,60 @@ +export function generateArchitectureReference(): string { + return `# Workflow Engine — Architecture Reference + +## Key Concepts + +### CAS (Content-Addressed Storage) +Every artifact in the workflow engine is stored as a CAS node — an immutable, content-addressed record identified by its XXH64 hash (13-char Crockford Base32). CAS provides deduplication, integrity verification, and an append-only audit trail. + +Stored artifacts include: +- **Workflow definitions** — the YAML-parsed payload +- **Step nodes** — each moderator→agent→extract cycle +- **Detail nodes** — per-step metadata and turn history +- **Turn records** — individual agent interactions within a step + +### Thread +A Thread is a single execution of a Workflow, identified by a ULID (26-char Crockford Base32: 10 timestamp + 16 random). Thread state is an immutable CAS chain — each step points to its predecessor via a \`prev\` hash, forming a linked list. + +Active threads are indexed in \`threads.yaml\`; completed threads move to \`history.jsonl\`. + +A thread progresses by running \`uwf thread exec\`, which performs one moderator→agent→extract cycle per step. + +### Workflow +A Workflow is a YAML definition (\`WorkflowPayload\`) stored as a CAS node. It defines: +- **Roles** — named actors with system prompts and output schemas +- **Graph** — status-based routing edges between roles +- **Conditions** — edge predicates evaluated by the moderator + +Workflow names follow verb-first kebab-case: \`solve-issue\`, \`review-code\`. + +### Step +A Step is one moderator→agent→extract cycle, stored as a CAS node (\`StepNodePayload\`). Each step contains: +- **output** — the agent's extracted frontmatter output +- **detail** — a CAS reference to turn-level records +- **prev** — CAS hash of the previous step (forming the chain) +- **role** — which role produced this step + +### Turn +A Turn is an agent-internal interaction within a single Step. Turns are stored per-turn in the detail node, capturing the raw agent I/O before extraction. + +## Data Flow + +\`\`\` +uwf thread exec + → Moderator evaluates graph edges based on current status + → Selects next role (or $END) + → Agent CLI is spawned with context + → Agent produces frontmatter markdown + → Extract pipeline parses output into structured data + → New CAS step node is appended to the thread chain +\`\`\` + +## Storage Layout + +All data lives under \`~/.uncaged/workflow/\`: +- \`cas/\` — content-addressed store (XXH64-keyed) +- \`threads.yaml\` — active thread index +- \`history.jsonl\` — completed thread archive +- \`registry.yaml\` — workflow name → CAS hash mapping +`; +} diff --git a/packages/workflow-util/src/index.ts b/packages/workflow-util/src/index.ts index a08f3ad..380c195 100644 --- a/packages/workflow-util/src/index.ts +++ b/packages/workflow-util/src/index.ts @@ -1,3 +1,4 @@ +export { generateArchitectureReference } from "./architecture-reference.js"; export { encodeUint64AsCrockford } from "./base32.js"; export { generateCliReference } from "./cli-reference.js"; export { env } from "./env.js"; @@ -13,6 +14,7 @@ export { validateFrontmatter, } from "./frontmatter-markdown/index.js"; export { createLogger } from "./logger.js"; +export { generateModeratorReference } from "./moderator-reference.js"; export type { CreateProcessLoggerOptions, ProcessLogFn, @@ -25,3 +27,4 @@ export { err, ok } from "./result.js"; export { getDefaultWorkflowStorageRoot, getGlobalCasDir } from "./storage-root.js"; export type { LogFn, Result } from "./types.js"; export { extractUlidTimestamp, generateUlid } from "./ulid.js"; +export { generateYamlReference } from "./yaml-reference.js"; diff --git a/packages/workflow-util/src/moderator-reference.ts b/packages/workflow-util/src/moderator-reference.ts new file mode 100644 index 0000000..c4303eb --- /dev/null +++ b/packages/workflow-util/src/moderator-reference.ts @@ -0,0 +1,56 @@ +export function generateModeratorReference(): string { + return `# Moderator Reference + +## Overview + +The moderator is the workflow engine's routing component. It evaluates the directed graph defined in the workflow YAML to determine the next role (or \`$END\`) after each step — with zero LLM cost. + +## Status-Based Routing + +The moderator uses **status-based routing**: it inspects the previous step's extracted output (specifically the \`$status\` field) and looks up the corresponding edge in the graph. + +### Graph Structure + +The graph is a nested map: \`Record>\`. Each role maps its possible \`$status\` values to a target with a \`role\` and \`prompt\`: + +\`\`\`yaml +graph: + $START: + _: { role: planner, prompt: "Analyze the issue." } + planner: + ready: { role: developer, prompt: "Implement the plan (CAS hash: {{{plan}}})." } + insufficient_info: { role: $END, prompt: "Not enough info." } + developer: + done: { role: reviewer, prompt: "Review branch {{{branch}}} at {{{worktree}}}." } + failed: { role: $END, prompt: "Developer failed: {{{reason}}}." } + reviewer: + approved: { role: tester, prompt: "Run tests on {{{branch}}} at {{{worktree}}}." } + rejected: { role: developer, prompt: "Fix issues: {{{comments}}}." } +\`\`\` + +### Routing Algorithm + +1. Look up \`graph[lastRole]\` to get the status map for the current role +2. Look up \`statusMap[lastOutput.$status]\` to get the target +3. If target role is \`$END\`, mark thread as completed +4. Otherwise, render the edge prompt (Mustache templates with \`{{{field}}}\` from output) and spawn the next agent + +### Edge Prompts and Mustache Templates + +Edge prompts use triple-brace Mustache syntax (\`{{{field}}}\`) to interpolate values from the previous step's output into the next agent's task prompt. This passes structured data (branch names, file paths, CAS hashes) between roles without manual wiring. + +## Special Nodes + +- \`$START\` — entry point; uses status key \`_\` (unconditional) since there is no previous output +- \`$END\` — terminal node; thread completes when reached and is moved to history + +## Integration with Steps + +Each \`uwf thread exec\` cycle: +1. Moderator reads the thread's head step output +2. Looks up \`graph[lastRole][output.$status]\` to pick the next role +3. If next is \`$END\`, marks thread as completed +4. Otherwise, renders the edge prompt and spawns the agent for the selected role +5. Extract pipeline parses agent output → new step node → append to CAS chain +`; +} diff --git a/packages/workflow-util/src/yaml-reference.ts b/packages/workflow-util/src/yaml-reference.ts new file mode 100644 index 0000000..fb47621 --- /dev/null +++ b/packages/workflow-util/src/yaml-reference.ts @@ -0,0 +1,82 @@ +export function generateYamlReference(): string { + return `# Workflow YAML Schema Reference + +## Top-Level Structure + +A workflow YAML file defines the complete workflow specification: + +\`\`\`yaml +name: solve-issue # verb-first kebab-case identifier +description: "..." # human-readable description + +roles: # named actors in the workflow + planner: + description: "Analyzes issue and outputs a plan" + goal: "You are a planning agent." + capabilities: + - issue-analysis + - planning + procedure: | + 1. Read the issue + 2. Produce a test spec + output: "Output the plan summary. Set $status to ready or insufficient_info." + frontmatter: # JSON Schema for structured output (drives routing) + oneOf: + - properties: + $status: { const: ready } + plan: { type: string } + required: [$status, plan] + - properties: + $status: { const: insufficient_info } + required: [$status] + +graph: # status-based routing (nested map) + $START: + _: { role: planner, prompt: "Analyze the issue." } + planner: + ready: { role: developer, prompt: "Implement plan {{{plan}}}." } + insufficient_info: { role: $END, prompt: "Not enough info." } +\`\`\` + +## roles + +Each role defines an actor in the workflow: + +| Field | Type | Description | +|-------|------|-------------| +| \`description\` | string | Short description of the role's purpose | +| \`goal\` | string | System-level goal statement for the agent | +| \`capabilities\` | string[] | Tags describing what the role can do | +| \`procedure\` | string | Step-by-step instructions for the agent | +| \`output\` | string | Description of expected output format | +| \`frontmatter\` | JSON Schema | Defines the structured output the agent must produce | + +### frontmatter + +The \`frontmatter\` field is a standard JSON Schema object. The extract pipeline validates agent output against it. Key conventions: +- \`$status\` field drives routing decisions in the graph +- Use \`const\` or \`enum\` to constrain status values +- Use \`oneOf\` to define multiple valid output shapes (one per status) +- All \`required\` fields must appear in the agent's frontmatter output + +## graph + +The graph is a nested map defining status-based routing: + +\`\`\` +Record> +\`\`\` + +| Level | Key | Value | +|-------|-----|-------| +| Outer | Role name or \`$START\` | Status map for that role | +| Inner | \`$status\` value (or \`_\` for unconditional) | Target: \`{ role, prompt }\` | + +### Special Nodes +- \`$START\` — entry point; uses status key \`_\` (unconditional, no previous output) +- \`$END\` — terminal node; thread completes when reached + +### Edge Prompts +Prompts use triple-brace Mustache templates (\`{{{field}}}\`) to interpolate values from the previous step's output. Example: \`"Implement plan {{{plan}}} in repo {{{repoPath}}}."\` +`; +}