Merge pull request 'feat(skill): expand uwf skill with architecture, yaml, moderator, list subcommands' (#521) from fix/517-expand-skill into main
This commit is contained in:
@@ -0,0 +1,78 @@
|
|||||||
|
import { execFileSync } from "node:child_process";
|
||||||
|
import { dirname, join } from "node:path";
|
||||||
|
import { fileURLToPath } from "node:url";
|
||||||
|
import { describe, expect, test } from "vitest";
|
||||||
|
|
||||||
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||||
|
|
||||||
|
import {
|
||||||
|
cmdSkillArchitecture,
|
||||||
|
cmdSkillCli,
|
||||||
|
cmdSkillList,
|
||||||
|
cmdSkillModerator,
|
||||||
|
cmdSkillYaml,
|
||||||
|
} from "../commands/skill.js";
|
||||||
|
|
||||||
|
describe("skill commands", () => {
|
||||||
|
test("skill list returns all skill names", () => {
|
||||||
|
const result = cmdSkillList();
|
||||||
|
expect(result).toBeInstanceOf(Array);
|
||||||
|
expect(result).toContain("cli");
|
||||||
|
expect(result).toContain("architecture");
|
||||||
|
expect(result).toContain("yaml");
|
||||||
|
expect(result).toContain("moderator");
|
||||||
|
for (const name of result) {
|
||||||
|
expect(typeof name).toBe("string");
|
||||||
|
expect(name).toMatch(/^\S+$/);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
test("skill architecture returns non-empty markdown string", () => {
|
||||||
|
const result = cmdSkillArchitecture();
|
||||||
|
expect(typeof result).toBe("string");
|
||||||
|
expect(result).toContain("CAS");
|
||||||
|
expect(result).toContain("Thread");
|
||||||
|
expect(result).toContain("Workflow");
|
||||||
|
expect(result).toContain("Step");
|
||||||
|
expect(result.length).toBeGreaterThan(200);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("skill yaml returns non-empty markdown string", () => {
|
||||||
|
const result = cmdSkillYaml();
|
||||||
|
expect(typeof result).toBe("string");
|
||||||
|
expect(result).toContain("roles");
|
||||||
|
expect(result).toContain("graph");
|
||||||
|
expect(result).toContain("frontmatter");
|
||||||
|
expect(result.length).toBeGreaterThan(200);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("skill moderator returns non-empty markdown string", () => {
|
||||||
|
const result = cmdSkillModerator();
|
||||||
|
expect(typeof result).toBe("string");
|
||||||
|
expect(result).toContain("routing");
|
||||||
|
expect(result).toContain("status");
|
||||||
|
expect(result.length).toBeGreaterThan(200);
|
||||||
|
// Check for edge or graph
|
||||||
|
expect(result).toMatch(/edge|graph/i);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("skill cli returns CLI reference markdown", () => {
|
||||||
|
const result = cmdSkillCli();
|
||||||
|
expect(typeof result).toBe("string");
|
||||||
|
expect(result).toContain("uwf");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("skill help subcommand is suppressed", () => {
|
||||||
|
const output = execFileSync("bun", ["src/cli.ts", "skill", "--help"], {
|
||||||
|
cwd: join(__dirname, "..", ".."),
|
||||||
|
encoding: "utf-8",
|
||||||
|
env: { ...process.env, PATH: `/opt/homebrew/bin:${process.env.PATH}` },
|
||||||
|
});
|
||||||
|
expect(output).not.toMatch(/help\s+\[command\]/i);
|
||||||
|
expect(output).toContain("cli");
|
||||||
|
expect(output).toContain("architecture");
|
||||||
|
expect(output).toContain("yaml");
|
||||||
|
expect(output).toContain("moderator");
|
||||||
|
expect(output).toContain("list");
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -15,7 +15,13 @@ import {
|
|||||||
} from "./commands/cas.js";
|
} from "./commands/cas.js";
|
||||||
import { cmdLogClean, cmdLogList, cmdLogShow } from "./commands/log.js";
|
import { cmdLogClean, cmdLogList, cmdLogShow } from "./commands/log.js";
|
||||||
import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js";
|
import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js";
|
||||||
import { cmdSkillCli } from "./commands/skill.js";
|
import {
|
||||||
|
cmdSkillArchitecture,
|
||||||
|
cmdSkillCli,
|
||||||
|
cmdSkillList,
|
||||||
|
cmdSkillModerator,
|
||||||
|
cmdSkillYaml,
|
||||||
|
} from "./commands/skill.js";
|
||||||
import { cmdStepFork, cmdStepList, cmdStepRead, cmdStepShow } from "./commands/step.js";
|
import { cmdStepFork, cmdStepList, cmdStepRead, cmdStepShow } from "./commands/step.js";
|
||||||
import {
|
import {
|
||||||
cmdThreadCancel,
|
cmdThreadCancel,
|
||||||
@@ -473,6 +479,7 @@ For more information, see: uwf help thread list
|
|||||||
});
|
});
|
||||||
|
|
||||||
const skill = program.command("skill").description("Built-in skill references for agents");
|
const skill = program.command("skill").description("Built-in skill references for agents");
|
||||||
|
skill.addHelpCommand(false);
|
||||||
|
|
||||||
skill
|
skill
|
||||||
.command("cli")
|
.command("cli")
|
||||||
@@ -481,6 +488,34 @@ skill
|
|||||||
console.log(cmdSkillCli());
|
console.log(cmdSkillCli());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
skill
|
||||||
|
.command("architecture")
|
||||||
|
.description("Print the architecture reference")
|
||||||
|
.action(() => {
|
||||||
|
console.log(cmdSkillArchitecture());
|
||||||
|
});
|
||||||
|
|
||||||
|
skill
|
||||||
|
.command("yaml")
|
||||||
|
.description("Print the workflow YAML schema reference")
|
||||||
|
.action(() => {
|
||||||
|
console.log(cmdSkillYaml());
|
||||||
|
});
|
||||||
|
|
||||||
|
skill
|
||||||
|
.command("moderator")
|
||||||
|
.description("Print the moderator reference")
|
||||||
|
.action(() => {
|
||||||
|
console.log(cmdSkillModerator());
|
||||||
|
});
|
||||||
|
|
||||||
|
skill
|
||||||
|
.command("list")
|
||||||
|
.description("List all available skill names")
|
||||||
|
.action(() => {
|
||||||
|
console.log(cmdSkillList().join("\n"));
|
||||||
|
});
|
||||||
|
|
||||||
program
|
program
|
||||||
.command("setup")
|
.command("setup")
|
||||||
.description("Configure provider, model, and agent")
|
.description("Configure provider, model, and agent")
|
||||||
|
|||||||
@@ -1 +1,12 @@
|
|||||||
export { generateCliReference as cmdSkillCli } from "@uncaged/workflow-util";
|
export {
|
||||||
|
generateArchitectureReference as cmdSkillArchitecture,
|
||||||
|
generateCliReference as cmdSkillCli,
|
||||||
|
generateModeratorReference as cmdSkillModerator,
|
||||||
|
generateYamlReference as cmdSkillYaml,
|
||||||
|
} from "@uncaged/workflow-util";
|
||||||
|
|
||||||
|
const SKILL_NAMES = ["cli", "architecture", "yaml", "moderator"] as const;
|
||||||
|
|
||||||
|
export function cmdSkillList(): ReadonlyArray<string> {
|
||||||
|
return [...SKILL_NAMES];
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,60 @@
|
|||||||
|
export function generateArchitectureReference(): string {
|
||||||
|
return `# Workflow Engine — Architecture Reference
|
||||||
|
|
||||||
|
## Key Concepts
|
||||||
|
|
||||||
|
### CAS (Content-Addressed Storage)
|
||||||
|
Every artifact in the workflow engine is stored as a CAS node — an immutable, content-addressed record identified by its XXH64 hash (13-char Crockford Base32). CAS provides deduplication, integrity verification, and an append-only audit trail.
|
||||||
|
|
||||||
|
Stored artifacts include:
|
||||||
|
- **Workflow definitions** — the YAML-parsed payload
|
||||||
|
- **Step nodes** — each moderator→agent→extract cycle
|
||||||
|
- **Detail nodes** — per-step metadata and turn history
|
||||||
|
- **Turn records** — individual agent interactions within a step
|
||||||
|
|
||||||
|
### Thread
|
||||||
|
A Thread is a single execution of a Workflow, identified by a ULID (26-char Crockford Base32: 10 timestamp + 16 random). Thread state is an immutable CAS chain — each step points to its predecessor via a \`prev\` hash, forming a linked list.
|
||||||
|
|
||||||
|
Active threads are indexed in \`threads.yaml\`; completed threads move to \`history.jsonl\`.
|
||||||
|
|
||||||
|
A thread progresses by running \`uwf thread exec\`, which performs one moderator→agent→extract cycle per step.
|
||||||
|
|
||||||
|
### Workflow
|
||||||
|
A Workflow is a YAML definition (\`WorkflowPayload\`) stored as a CAS node. It defines:
|
||||||
|
- **Roles** — named actors with system prompts and output schemas
|
||||||
|
- **Graph** — status-based routing edges between roles
|
||||||
|
- **Conditions** — edge predicates evaluated by the moderator
|
||||||
|
|
||||||
|
Workflow names follow verb-first kebab-case: \`solve-issue\`, \`review-code\`.
|
||||||
|
|
||||||
|
### Step
|
||||||
|
A Step is one moderator→agent→extract cycle, stored as a CAS node (\`StepNodePayload\`). Each step contains:
|
||||||
|
- **output** — the agent's extracted frontmatter output
|
||||||
|
- **detail** — a CAS reference to turn-level records
|
||||||
|
- **prev** — CAS hash of the previous step (forming the chain)
|
||||||
|
- **role** — which role produced this step
|
||||||
|
|
||||||
|
### Turn
|
||||||
|
A Turn is an agent-internal interaction within a single Step. Turns are stored per-turn in the detail node, capturing the raw agent I/O before extraction.
|
||||||
|
|
||||||
|
## Data Flow
|
||||||
|
|
||||||
|
\`\`\`
|
||||||
|
uwf thread exec <thread-id>
|
||||||
|
→ Moderator evaluates graph edges based on current status
|
||||||
|
→ Selects next role (or $END)
|
||||||
|
→ Agent CLI is spawned with context
|
||||||
|
→ Agent produces frontmatter markdown
|
||||||
|
→ Extract pipeline parses output into structured data
|
||||||
|
→ New CAS step node is appended to the thread chain
|
||||||
|
\`\`\`
|
||||||
|
|
||||||
|
## Storage Layout
|
||||||
|
|
||||||
|
All data lives under \`~/.uncaged/workflow/\`:
|
||||||
|
- \`cas/\` — content-addressed store (XXH64-keyed)
|
||||||
|
- \`threads.yaml\` — active thread index
|
||||||
|
- \`history.jsonl\` — completed thread archive
|
||||||
|
- \`registry.yaml\` — workflow name → CAS hash mapping
|
||||||
|
`;
|
||||||
|
}
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
|
export { generateArchitectureReference } from "./architecture-reference.js";
|
||||||
export { encodeUint64AsCrockford } from "./base32.js";
|
export { encodeUint64AsCrockford } from "./base32.js";
|
||||||
export { generateCliReference } from "./cli-reference.js";
|
export { generateCliReference } from "./cli-reference.js";
|
||||||
export { env } from "./env.js";
|
export { env } from "./env.js";
|
||||||
@@ -13,6 +14,7 @@ export {
|
|||||||
validateFrontmatter,
|
validateFrontmatter,
|
||||||
} from "./frontmatter-markdown/index.js";
|
} from "./frontmatter-markdown/index.js";
|
||||||
export { createLogger } from "./logger.js";
|
export { createLogger } from "./logger.js";
|
||||||
|
export { generateModeratorReference } from "./moderator-reference.js";
|
||||||
export type {
|
export type {
|
||||||
CreateProcessLoggerOptions,
|
CreateProcessLoggerOptions,
|
||||||
ProcessLogFn,
|
ProcessLogFn,
|
||||||
@@ -25,3 +27,4 @@ export { err, ok } from "./result.js";
|
|||||||
export { getDefaultWorkflowStorageRoot, getGlobalCasDir } from "./storage-root.js";
|
export { getDefaultWorkflowStorageRoot, getGlobalCasDir } from "./storage-root.js";
|
||||||
export type { LogFn, Result } from "./types.js";
|
export type { LogFn, Result } from "./types.js";
|
||||||
export { extractUlidTimestamp, generateUlid } from "./ulid.js";
|
export { extractUlidTimestamp, generateUlid } from "./ulid.js";
|
||||||
|
export { generateYamlReference } from "./yaml-reference.js";
|
||||||
|
|||||||
@@ -0,0 +1,56 @@
|
|||||||
|
export function generateModeratorReference(): string {
|
||||||
|
return `# Moderator Reference
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
The moderator is the workflow engine's routing component. It evaluates the directed graph defined in the workflow YAML to determine the next role (or \`$END\`) after each step — with zero LLM cost.
|
||||||
|
|
||||||
|
## Status-Based Routing
|
||||||
|
|
||||||
|
The moderator uses **status-based routing**: it inspects the previous step's extracted output (specifically the \`$status\` field) and looks up the corresponding edge in the graph.
|
||||||
|
|
||||||
|
### Graph Structure
|
||||||
|
|
||||||
|
The graph is a nested map: \`Record<Role | "$START", Record<Status, Target>>\`. Each role maps its possible \`$status\` values to a target with a \`role\` and \`prompt\`:
|
||||||
|
|
||||||
|
\`\`\`yaml
|
||||||
|
graph:
|
||||||
|
$START:
|
||||||
|
_: { role: planner, prompt: "Analyze the issue." }
|
||||||
|
planner:
|
||||||
|
ready: { role: developer, prompt: "Implement the plan (CAS hash: {{{plan}}})." }
|
||||||
|
insufficient_info: { role: $END, prompt: "Not enough info." }
|
||||||
|
developer:
|
||||||
|
done: { role: reviewer, prompt: "Review branch {{{branch}}} at {{{worktree}}}." }
|
||||||
|
failed: { role: $END, prompt: "Developer failed: {{{reason}}}." }
|
||||||
|
reviewer:
|
||||||
|
approved: { role: tester, prompt: "Run tests on {{{branch}}} at {{{worktree}}}." }
|
||||||
|
rejected: { role: developer, prompt: "Fix issues: {{{comments}}}." }
|
||||||
|
\`\`\`
|
||||||
|
|
||||||
|
### Routing Algorithm
|
||||||
|
|
||||||
|
1. Look up \`graph[lastRole]\` to get the status map for the current role
|
||||||
|
2. Look up \`statusMap[lastOutput.$status]\` to get the target
|
||||||
|
3. If target role is \`$END\`, mark thread as completed
|
||||||
|
4. Otherwise, render the edge prompt (Mustache templates with \`{{{field}}}\` from output) and spawn the next agent
|
||||||
|
|
||||||
|
### Edge Prompts and Mustache Templates
|
||||||
|
|
||||||
|
Edge prompts use triple-brace Mustache syntax (\`{{{field}}}\`) to interpolate values from the previous step's output into the next agent's task prompt. This passes structured data (branch names, file paths, CAS hashes) between roles without manual wiring.
|
||||||
|
|
||||||
|
## Special Nodes
|
||||||
|
|
||||||
|
- \`$START\` — entry point; uses status key \`_\` (unconditional) since there is no previous output
|
||||||
|
- \`$END\` — terminal node; thread completes when reached and is moved to history
|
||||||
|
|
||||||
|
## Integration with Steps
|
||||||
|
|
||||||
|
Each \`uwf thread exec\` cycle:
|
||||||
|
1. Moderator reads the thread's head step output
|
||||||
|
2. Looks up \`graph[lastRole][output.$status]\` to pick the next role
|
||||||
|
3. If next is \`$END\`, marks thread as completed
|
||||||
|
4. Otherwise, renders the edge prompt and spawns the agent for the selected role
|
||||||
|
5. Extract pipeline parses agent output → new step node → append to CAS chain
|
||||||
|
`;
|
||||||
|
}
|
||||||
@@ -0,0 +1,82 @@
|
|||||||
|
export function generateYamlReference(): string {
|
||||||
|
return `# Workflow YAML Schema Reference
|
||||||
|
|
||||||
|
## Top-Level Structure
|
||||||
|
|
||||||
|
A workflow YAML file defines the complete workflow specification:
|
||||||
|
|
||||||
|
\`\`\`yaml
|
||||||
|
name: solve-issue # verb-first kebab-case identifier
|
||||||
|
description: "..." # human-readable description
|
||||||
|
|
||||||
|
roles: # named actors in the workflow
|
||||||
|
planner:
|
||||||
|
description: "Analyzes issue and outputs a plan"
|
||||||
|
goal: "You are a planning agent."
|
||||||
|
capabilities:
|
||||||
|
- issue-analysis
|
||||||
|
- planning
|
||||||
|
procedure: |
|
||||||
|
1. Read the issue
|
||||||
|
2. Produce a test spec
|
||||||
|
output: "Output the plan summary. Set $status to ready or insufficient_info."
|
||||||
|
frontmatter: # JSON Schema for structured output (drives routing)
|
||||||
|
oneOf:
|
||||||
|
- properties:
|
||||||
|
$status: { const: ready }
|
||||||
|
plan: { type: string }
|
||||||
|
required: [$status, plan]
|
||||||
|
- properties:
|
||||||
|
$status: { const: insufficient_info }
|
||||||
|
required: [$status]
|
||||||
|
|
||||||
|
graph: # status-based routing (nested map)
|
||||||
|
$START:
|
||||||
|
_: { role: planner, prompt: "Analyze the issue." }
|
||||||
|
planner:
|
||||||
|
ready: { role: developer, prompt: "Implement plan {{{plan}}}." }
|
||||||
|
insufficient_info: { role: $END, prompt: "Not enough info." }
|
||||||
|
\`\`\`
|
||||||
|
|
||||||
|
## roles
|
||||||
|
|
||||||
|
Each role defines an actor in the workflow:
|
||||||
|
|
||||||
|
| Field | Type | Description |
|
||||||
|
|-------|------|-------------|
|
||||||
|
| \`description\` | string | Short description of the role's purpose |
|
||||||
|
| \`goal\` | string | System-level goal statement for the agent |
|
||||||
|
| \`capabilities\` | string[] | Tags describing what the role can do |
|
||||||
|
| \`procedure\` | string | Step-by-step instructions for the agent |
|
||||||
|
| \`output\` | string | Description of expected output format |
|
||||||
|
| \`frontmatter\` | JSON Schema | Defines the structured output the agent must produce |
|
||||||
|
|
||||||
|
### frontmatter
|
||||||
|
|
||||||
|
The \`frontmatter\` field is a standard JSON Schema object. The extract pipeline validates agent output against it. Key conventions:
|
||||||
|
- \`$status\` field drives routing decisions in the graph
|
||||||
|
- Use \`const\` or \`enum\` to constrain status values
|
||||||
|
- Use \`oneOf\` to define multiple valid output shapes (one per status)
|
||||||
|
- All \`required\` fields must appear in the agent's frontmatter output
|
||||||
|
|
||||||
|
## graph
|
||||||
|
|
||||||
|
The graph is a nested map defining status-based routing:
|
||||||
|
|
||||||
|
\`\`\`
|
||||||
|
Record<Role | "$START", Record<Status, { role: string, prompt: string }>>
|
||||||
|
\`\`\`
|
||||||
|
|
||||||
|
| Level | Key | Value |
|
||||||
|
|-------|-----|-------|
|
||||||
|
| Outer | Role name or \`$START\` | Status map for that role |
|
||||||
|
| Inner | \`$status\` value (or \`_\` for unconditional) | Target: \`{ role, prompt }\` |
|
||||||
|
|
||||||
|
### Special Nodes
|
||||||
|
- \`$START\` — entry point; uses status key \`_\` (unconditional, no previous output)
|
||||||
|
- \`$END\` — terminal node; thread completes when reached
|
||||||
|
|
||||||
|
### Edge Prompts
|
||||||
|
Prompts use triple-brace Mustache templates (\`{{{field}}}\`) to interpolate values from the previous step's output. Example: \`"Implement plan {{{plan}}} in repo {{{repoPath}}}."\`
|
||||||
|
`;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user