Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 487c48effa | |||
| 4eca2d533c | |||
| f0f840e6e0 | |||
| 7ff90cef4f | |||
| e62d51d845 | |||
| a803fcb4fc | |||
| d00c93fc19 | |||
| 99a2890be2 | |||
| 3b7d0564bb | |||
| 2eb5ee0666 | |||
| e67932c83c | |||
| 04a12231c3 | |||
| e5ae9a134c | |||
| bdafaf3aa1 | |||
| 02f7f0b708 | |||
| 8ea554bb5e | |||
| 8a425521da | |||
| f174f2fd0a | |||
| 355594d074 | |||
| fd7609fe90 | |||
| dacecfbbb7 | |||
| 3238eaeddf |
@@ -0,0 +1,167 @@
|
||||
name: "solve-issue"
|
||||
description: "TDD-driven issue resolution for small, focused changes. Loop protection relies on engine maxRounds."
|
||||
roles:
|
||||
planner:
|
||||
description: "Analyzes issue and outputs a TDD test spec"
|
||||
goal: "You are a planning agent. You analyze Gitea issues and produce a TDD test specification that downstream roles will implement and verify."
|
||||
capabilities:
|
||||
- issue-analysis
|
||||
- planning
|
||||
procedure: |
|
||||
On first run (no previous steps):
|
||||
1. Read the issue and all comments from Gitea using `tea issues <number> -r <owner/repo>`
|
||||
2. Read CLAUDE.md (or equivalent project conventions file) to understand coding standards
|
||||
3. Assess whether the issue has enough information to produce a test spec
|
||||
4. If insufficient info: comment on the issue via `echo "..." | tea comment <number> -r <owner/repo>` (skip if you already commented), then output status=insufficient_info and terminate
|
||||
5. If sufficient: produce a detailed TDD test spec in markdown covering all scenarios
|
||||
|
||||
On subsequent runs (bounced back by tester with fix_spec):
|
||||
1. Read the tester's output from the previous step to understand what's wrong with the spec
|
||||
2. Revise the test spec accordingly
|
||||
|
||||
After producing the test spec:
|
||||
1. Store it via `uwf cas put-text "<markdown content>"` and capture the returned hash
|
||||
2. Put the hash in frontmatter.plan (required when status=ready)
|
||||
output: "Output a brief summary of the test spec. Frontmatter must include: status (ready or insufficient_info) and plan (CAS hash of the test spec, required when status=ready)."
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
status:
|
||||
type: string
|
||||
enum: [ready, insufficient_info]
|
||||
plan:
|
||||
type: string
|
||||
required: [status]
|
||||
developer:
|
||||
description: "TDD implementation per test spec"
|
||||
goal: "You are a developer agent. You implement code changes following TDD — write tests first, then implementation."
|
||||
capabilities:
|
||||
- coding
|
||||
procedure: |
|
||||
1. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the latest planner step's meta.plan)
|
||||
2. If bounced back from reviewer or tester: read the previous role's output to understand what needs fixing
|
||||
3. Write tests first based on the spec
|
||||
4. Implement the code to make tests pass
|
||||
5. Ensure `bun run build` passes with no errors
|
||||
6. Run `bun test` to verify all tests pass
|
||||
output: "List all files changed and provide a summary. Frontmatter must include: status (done or failed)."
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
status:
|
||||
type: string
|
||||
enum: [done, failed]
|
||||
required: [status]
|
||||
reviewer:
|
||||
description: "Code standards compliance check"
|
||||
goal: "You are a code reviewer. You verify code standards compliance — NOT functionality (that's the tester's job)."
|
||||
capabilities:
|
||||
- code-review
|
||||
- static-analysis
|
||||
procedure: |
|
||||
Hard checks (must all pass):
|
||||
1. `bun run build` — no build errors
|
||||
2. `bunx biome check` — no lint violations
|
||||
3. TypeScript strict mode — no type errors
|
||||
|
||||
Soft checks (review against CLAUDE.md conventions):
|
||||
- Functional-first: `function` + `type`, not `class` + `interface`
|
||||
- No optional properties (`?:`) — use `T | null`
|
||||
- Naming conventions (kebab-case files, PascalCase types, camelCase functions)
|
||||
- Module boundary discipline (folder exports via index.ts)
|
||||
- No `console.log` (use structured logger)
|
||||
- No dynamic imports in production code
|
||||
|
||||
Only review standards compliance. Do NOT test functionality.
|
||||
If rejecting, you MUST explain the specific reason in your output.
|
||||
output: "Explain your decision with specific file/line references. Frontmatter must include: approved (true or false)."
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
approved:
|
||||
type: boolean
|
||||
required: [approved]
|
||||
tester:
|
||||
description: "Functional correctness verification"
|
||||
goal: "You are a tester agent. You verify that the implementation correctly satisfies every scenario in the test spec."
|
||||
capabilities:
|
||||
- testing
|
||||
procedure: |
|
||||
1. Run `bun test` for automated test verification
|
||||
2. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the latest planner step's meta.plan)
|
||||
3. Verify each scenario in the spec is covered and passing
|
||||
4. Determine outcome:
|
||||
- passed: all scenarios verified, tests pass
|
||||
- fix_code: tests fail or implementation doesn't match spec → send back to developer
|
||||
- fix_spec: the spec itself is wrong or incomplete → send back to planner
|
||||
output: "Report test results per scenario. Frontmatter must include: status (passed, fix_code, or fix_spec)."
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
status:
|
||||
type: string
|
||||
enum: [passed, fix_code, fix_spec]
|
||||
required: [status]
|
||||
committer:
|
||||
description: "Commits and creates PR"
|
||||
goal: "You are a committer agent. You create a clean commit and push a PR linking the original issue."
|
||||
capabilities: []
|
||||
procedure: |
|
||||
Note: You inherit the developer's worktree and branch. Do NOT create a new branch.
|
||||
1. Stage all changes: `git add -A`
|
||||
2. Commit with a descriptive message referencing the issue: `git commit -m "type: description\n\nFixes #N"`
|
||||
3. Push the branch: `git push -u origin <branch-name>`
|
||||
- If push hook fails: capture the error log in your output, mark hook_failed
|
||||
4. On push success: create a PR via `tea pr create --title "..." --description "..."`
|
||||
- PR description must follow the project template: What / Why / Changes / Ref sections, with `Fixes #N` in Ref
|
||||
output: "Include PR URL on success or error log on failure. Frontmatter must include: success (true or false)."
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
success:
|
||||
type: boolean
|
||||
required: [success]
|
||||
conditions:
|
||||
insufficientInfo:
|
||||
description: "Planner determined there's not enough info to proceed"
|
||||
expression: "$last('planner').status = 'insufficient_info'"
|
||||
devFailed:
|
||||
description: "Developer failed to implement"
|
||||
expression: "$last('developer').status = 'failed'"
|
||||
rejected:
|
||||
description: "Reviewer rejected the implementation"
|
||||
expression: "$last('reviewer').approved = false"
|
||||
fixCode:
|
||||
description: "Tester found code issues"
|
||||
expression: "$last('tester').status = 'fix_code'"
|
||||
fixSpec:
|
||||
description: "Tester found spec issues"
|
||||
expression: "$last('tester').status = 'fix_spec'"
|
||||
hookFailed:
|
||||
description: "Push hook failed"
|
||||
expression: "$last('committer').success = false"
|
||||
graph:
|
||||
$START:
|
||||
- role: "planner"
|
||||
planner:
|
||||
- role: "$END"
|
||||
condition: "insufficientInfo"
|
||||
- role: "developer"
|
||||
developer:
|
||||
- role: "$END"
|
||||
condition: "devFailed"
|
||||
- role: "reviewer"
|
||||
reviewer:
|
||||
- role: "developer"
|
||||
condition: "rejected"
|
||||
- role: "tester"
|
||||
tester:
|
||||
- role: "developer"
|
||||
condition: "fixCode"
|
||||
- role: "planner"
|
||||
condition: "fixSpec"
|
||||
- role: "committer"
|
||||
committer:
|
||||
- role: "developer"
|
||||
condition: "hookFailed"
|
||||
- role: "$END"
|
||||
@@ -19,7 +19,7 @@ roles:
|
||||
output: |
|
||||
Provide your analysis as markdown under the frontmatter.
|
||||
The frontmatter must include your structured findings.
|
||||
meta:
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
thesis:
|
||||
|
||||
@@ -9,7 +9,7 @@ roles:
|
||||
- planning
|
||||
procedure: "Analyze the issue and create a detailed, actionable implementation plan."
|
||||
output: "Output the plan summary and list of concrete steps."
|
||||
meta:
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
plan:
|
||||
@@ -28,7 +28,7 @@ roles:
|
||||
- testing
|
||||
procedure: "Implement the plan. Write code, tests, and ensure existing tests pass."
|
||||
output: "List all files changed and provide a summary of the implementation."
|
||||
meta:
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
filesChanged:
|
||||
@@ -46,7 +46,7 @@ roles:
|
||||
- static-analysis
|
||||
procedure: "Review the implementation against the plan. Check for bugs, edge cases, and style."
|
||||
output: "Approve or reject with detailed comments explaining your decision."
|
||||
meta:
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
approved:
|
||||
@@ -57,7 +57,7 @@ roles:
|
||||
conditions:
|
||||
notApproved:
|
||||
description: "Reviewer rejected the implementation"
|
||||
expression: "steps[-1].output.approved = false"
|
||||
expression: "$last('reviewer').approved = false"
|
||||
graph:
|
||||
$START:
|
||||
- role: "planner"
|
||||
|
||||
@@ -7,6 +7,7 @@ import {
|
||||
cmdCasGet,
|
||||
cmdCasHas,
|
||||
cmdCasPut,
|
||||
cmdCasPutText,
|
||||
cmdCasRefs,
|
||||
cmdCasReindex,
|
||||
cmdCasSchemaGet,
|
||||
@@ -295,6 +296,17 @@ cas
|
||||
});
|
||||
});
|
||||
|
||||
cas
|
||||
.command("put-text")
|
||||
.description("Store a plain text string, print its hash")
|
||||
.argument("<text>", "Text content to store")
|
||||
.action((text: string) => {
|
||||
const storageRoot = resolveStorageRoot();
|
||||
runAction(async () => {
|
||||
writeOutput(await cmdCasPutText(storageRoot, text));
|
||||
});
|
||||
});
|
||||
|
||||
cas
|
||||
.command("has")
|
||||
.description("Check if a hash exists")
|
||||
|
||||
@@ -2,9 +2,11 @@ import { readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
import type { JSONSchema, Store } from "@uncaged/json-cas";
|
||||
import { bootstrap, getSchema, refs, walk } from "@uncaged/json-cas";
|
||||
import { bootstrap, getSchema, putSchema, refs, walk } from "@uncaged/json-cas";
|
||||
import { createFsStore } from "@uncaged/json-cas-fs";
|
||||
|
||||
import { TEXT_SCHEMA } from "../schemas.js";
|
||||
|
||||
// ---- Helpers ----
|
||||
|
||||
function openStore(storageRoot: string): Store {
|
||||
@@ -121,3 +123,10 @@ export async function cmdCasSchemaGet(storageRoot: string, hash: string): Promis
|
||||
}
|
||||
return schema;
|
||||
}
|
||||
|
||||
export async function cmdCasPutText(storageRoot: string, text: string): Promise<{ hash: string }> {
|
||||
const store = openStore(storageRoot);
|
||||
const typeHash = await putSchema(store, TEXT_SCHEMA);
|
||||
const hash = await store.put(typeHash, text);
|
||||
return { hash };
|
||||
}
|
||||
|
||||
@@ -2,7 +2,12 @@ import { readFile } from "node:fs/promises";
|
||||
|
||||
import type { JSONSchema } from "@uncaged/json-cas";
|
||||
import { putSchema, validate } from "@uncaged/json-cas";
|
||||
import type { CasRef, RoleDefinition, WorkflowPayload } from "@uncaged/workflow-protocol";
|
||||
import type {
|
||||
CasRef,
|
||||
RoleDefinition,
|
||||
Transition,
|
||||
WorkflowPayload,
|
||||
} from "@uncaged/workflow-protocol";
|
||||
import { parse } from "yaml";
|
||||
|
||||
import {
|
||||
@@ -46,11 +51,28 @@ function isJsonSchema(value: unknown): value is JSONSchema {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
async function resolveMetaRef(uwf: UwfStore, roleName: string, meta: unknown): Promise<CasRef> {
|
||||
if (!isJsonSchema(meta)) {
|
||||
fail(`role "${roleName}": meta must be a JSON Schema object`);
|
||||
/** Normalize graph transitions: ensure condition is null (not undefined) for fallback entries. */
|
||||
function normalizeGraph(graph: Record<string, Transition[]>): Record<string, Transition[]> {
|
||||
const result: Record<string, Transition[]> = {};
|
||||
for (const [node, transitions] of Object.entries(graph)) {
|
||||
result[node] = transitions.map((t) => ({
|
||||
role: t.role,
|
||||
condition: t.condition ?? null,
|
||||
}));
|
||||
}
|
||||
const schema: JSONSchema = meta.title === undefined ? { ...meta, title: roleName } : meta;
|
||||
return result;
|
||||
}
|
||||
|
||||
async function resolveFrontmatterRef(
|
||||
uwf: UwfStore,
|
||||
roleName: string,
|
||||
frontmatter: unknown,
|
||||
): Promise<CasRef> {
|
||||
if (!isJsonSchema(frontmatter)) {
|
||||
fail(`role "${roleName}": frontmatter must be a JSON Schema object`);
|
||||
}
|
||||
const schema: JSONSchema =
|
||||
frontmatter.title === undefined ? { ...frontmatter, title: roleName } : frontmatter;
|
||||
return putSchema(uwf.store, schema);
|
||||
}
|
||||
|
||||
@@ -60,14 +82,18 @@ export async function materializeWorkflowPayload(
|
||||
): Promise<WorkflowPayload> {
|
||||
const roles: Record<string, RoleDefinition> = {};
|
||||
for (const [roleName, role] of Object.entries(raw.roles)) {
|
||||
const meta = await resolveMetaRef(uwf, `${raw.name}.${roleName}`, role.meta);
|
||||
const frontmatter = await resolveFrontmatterRef(
|
||||
uwf,
|
||||
`${raw.name}.${roleName}`,
|
||||
role.frontmatter,
|
||||
);
|
||||
roles[roleName] = {
|
||||
description: role.description,
|
||||
goal: role.goal,
|
||||
capabilities: role.capabilities,
|
||||
procedure: role.procedure,
|
||||
output: role.output,
|
||||
meta,
|
||||
frontmatter,
|
||||
};
|
||||
}
|
||||
return {
|
||||
@@ -75,7 +101,7 @@ export async function materializeWorkflowPayload(
|
||||
description: raw.description,
|
||||
roles,
|
||||
conditions: raw.conditions,
|
||||
graph: raw.graph,
|
||||
graph: normalizeGraph(raw.graph),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -2,10 +2,13 @@ import type { Hash, Store } from "@uncaged/json-cas";
|
||||
import { putSchema } from "@uncaged/json-cas";
|
||||
import { START_NODE_SCHEMA, STEP_NODE_SCHEMA, WORKFLOW_SCHEMA } from "@uncaged/workflow-protocol";
|
||||
|
||||
export const TEXT_SCHEMA = { type: "string" as const };
|
||||
|
||||
export type UwfSchemaHashes = {
|
||||
workflow: Hash;
|
||||
startNode: Hash;
|
||||
stepNode: Hash;
|
||||
text: Hash;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -13,10 +16,11 @@ export type UwfSchemaHashes = {
|
||||
* Idempotent: safe to call on every CLI invocation.
|
||||
*/
|
||||
export async function registerUwfSchemas(store: Store): Promise<UwfSchemaHashes> {
|
||||
const [workflow, startNode, stepNode] = await Promise.all([
|
||||
const [workflow, startNode, stepNode, text] = await Promise.all([
|
||||
putSchema(store, WORKFLOW_SCHEMA),
|
||||
putSchema(store, START_NODE_SCHEMA),
|
||||
putSchema(store, STEP_NODE_SCHEMA),
|
||||
putSchema(store, TEXT_SCHEMA),
|
||||
]);
|
||||
return { workflow, startNode, stepNode };
|
||||
return { workflow, startNode, stepNode, text };
|
||||
}
|
||||
|
||||
@@ -15,8 +15,8 @@ function isRoleDefinition(value: unknown): boolean {
|
||||
if (!isRecord(value)) {
|
||||
return false;
|
||||
}
|
||||
const meta = value.meta;
|
||||
const metaOk = isRecord(meta) && typeof meta.type === "string";
|
||||
const frontmatter = value.frontmatter;
|
||||
const frontmatterOk = isRecord(frontmatter) && typeof frontmatter.type === "string";
|
||||
const capabilities = value.capabilities;
|
||||
const capabilitiesOk =
|
||||
Array.isArray(capabilities) && capabilities.every((c) => typeof c === "string");
|
||||
@@ -26,7 +26,7 @@ function isRoleDefinition(value: unknown): boolean {
|
||||
capabilitiesOk &&
|
||||
typeof value.procedure === "string" &&
|
||||
typeof value.output === "string" &&
|
||||
metaOk
|
||||
frontmatterOk
|
||||
);
|
||||
}
|
||||
|
||||
@@ -42,7 +42,10 @@ function isTransition(value: unknown): boolean {
|
||||
return false;
|
||||
}
|
||||
const condition = value.condition;
|
||||
return typeof value.role === "string" && (condition === null || typeof condition === "string");
|
||||
return (
|
||||
typeof value.role === "string" &&
|
||||
(condition === null || condition === undefined || typeof condition === "string")
|
||||
);
|
||||
}
|
||||
|
||||
function isStringRecord(value: unknown, itemCheck: (item: unknown) => boolean): boolean {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import type { Store } from "@uncaged/json-cas";
|
||||
|
||||
import {
|
||||
type AgentContext,
|
||||
@@ -10,7 +11,6 @@ import {
|
||||
import {
|
||||
loadHermesSession,
|
||||
parseSessionIdFromStdout,
|
||||
storeHermesRawOutput,
|
||||
storeHermesSessionDetail,
|
||||
} from "./session-detail.js";
|
||||
|
||||
@@ -52,17 +52,8 @@ export function buildHermesPrompt(ctx: AgentContext): string {
|
||||
return parts.join("\n");
|
||||
}
|
||||
|
||||
function spawnHermesChat(prompt: string): Promise<{ stdout: string; stderr: string }> {
|
||||
function spawnHermes(args: string[]): Promise<{ stdout: string; stderr: string }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const args = [
|
||||
"chat",
|
||||
"-q",
|
||||
prompt,
|
||||
"--yolo",
|
||||
"--max-turns",
|
||||
String(HERMES_MAX_TURNS),
|
||||
"--quiet",
|
||||
];
|
||||
const child = spawn(HERMES_COMMAND, args, {
|
||||
env: process.env,
|
||||
shell: false,
|
||||
@@ -94,23 +85,73 @@ function spawnHermesChat(prompt: string): Promise<{ stdout: string; stderr: stri
|
||||
});
|
||||
}
|
||||
|
||||
function spawnHermesChat(prompt: string): Promise<{ stdout: string; stderr: string }> {
|
||||
return spawnHermes([
|
||||
"chat",
|
||||
"-q",
|
||||
prompt,
|
||||
"--yolo",
|
||||
"--max-turns",
|
||||
String(HERMES_MAX_TURNS),
|
||||
"--quiet",
|
||||
]);
|
||||
}
|
||||
|
||||
function spawnHermesResume(
|
||||
sessionId: string,
|
||||
message: string,
|
||||
): Promise<{ stdout: string; stderr: string }> {
|
||||
return spawnHermes([
|
||||
"chat",
|
||||
"--resume",
|
||||
sessionId,
|
||||
"-q",
|
||||
message,
|
||||
"--yolo",
|
||||
"--max-turns",
|
||||
String(HERMES_MAX_TURNS),
|
||||
"--quiet",
|
||||
]);
|
||||
}
|
||||
|
||||
function parseSessionId(stdout: string, stderr: string): string {
|
||||
const sessionId = parseSessionIdFromStdout(stderr) ?? parseSessionIdFromStdout(stdout);
|
||||
if (sessionId === null) {
|
||||
throw new Error(
|
||||
"Failed to parse session_id from hermes output.\n" +
|
||||
`stderr (first 200 chars): ${stderr.slice(0, 200)}\n` +
|
||||
`stdout (first 200 chars): ${stdout.slice(0, 200)}`,
|
||||
);
|
||||
}
|
||||
return sessionId;
|
||||
}
|
||||
|
||||
async function buildResultFromSession(sessionId: string, store: Store): Promise<AgentRunResult> {
|
||||
const session = await loadHermesSession(sessionId);
|
||||
if (session === null) {
|
||||
throw new Error(`Failed to load hermes session file for session_id: ${sessionId}`);
|
||||
}
|
||||
const { detailHash, output } = await storeHermesSessionDetail(store, session);
|
||||
return { output, detailHash, sessionId };
|
||||
}
|
||||
|
||||
async function runHermes(ctx: AgentContext): Promise<AgentRunResult> {
|
||||
const fullPrompt = buildHermesPrompt(ctx);
|
||||
const { stdout, stderr } = await spawnHermesChat(fullPrompt);
|
||||
const { store } = ctx;
|
||||
const sessionId = parseSessionId(stdout, stderr);
|
||||
return buildResultFromSession(sessionId, ctx.store);
|
||||
}
|
||||
|
||||
// --quiet mode: session_id may be on stdout or stderr
|
||||
const sessionId = parseSessionIdFromStdout(stderr) ?? parseSessionIdFromStdout(stdout);
|
||||
if (sessionId !== null) {
|
||||
const session = await loadHermesSession(sessionId);
|
||||
if (session !== null) {
|
||||
const { detailHash, output } = await storeHermesSessionDetail(store, session);
|
||||
return { output, detailHash };
|
||||
}
|
||||
}
|
||||
|
||||
const detailHash = await storeHermesRawOutput(store, stdout);
|
||||
return { output: stdout, detailHash };
|
||||
async function continueHermes(
|
||||
sessionId: string,
|
||||
message: string,
|
||||
store: Store,
|
||||
): Promise<AgentRunResult> {
|
||||
const { stdout, stderr } = await spawnHermesResume(sessionId, message);
|
||||
// Resume may return a new session_id
|
||||
const newSessionId = parseSessionIdFromStdout(stderr) ?? parseSessionIdFromStdout(stdout);
|
||||
const resolvedId = newSessionId ?? sessionId;
|
||||
return buildResultFromSession(resolvedId, store);
|
||||
}
|
||||
|
||||
/** Agent CLI factory: parses argv, runs Hermes, extracts output, writes StepNode. */
|
||||
@@ -118,5 +159,6 @@ export function createHermesAgent(): () => Promise<void> {
|
||||
return createAgent({
|
||||
name: "hermes",
|
||||
run: runHermes,
|
||||
continue: continueHermes,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -12,4 +12,10 @@ export type { FrontmatterFastPathResult } from "./frontmatter.js";
|
||||
export { tryFrontmatterFastPath } from "./frontmatter.js";
|
||||
export { createAgent } from "./run.js";
|
||||
export { getConfigPath, getEnvPath, loadWorkflowConfig } from "./storage.js";
|
||||
export type { AgentContext, AgentOptions, AgentRunFn, AgentRunResult } from "./types.js";
|
||||
export type {
|
||||
AgentContext,
|
||||
AgentContinueFn,
|
||||
AgentOptions,
|
||||
AgentRunFn,
|
||||
AgentRunResult,
|
||||
} from "./types.js";
|
||||
|
||||
@@ -3,11 +3,12 @@ import type { CasRef, StepNodePayload, ThreadId } from "@uncaged/workflow-protoc
|
||||
import { config as loadDotenv } from "dotenv";
|
||||
import { buildOutputFormatInstruction } from "./build-output-format-instruction.js";
|
||||
import { buildContextWithMeta } from "./context.js";
|
||||
import { extract } from "./extract.js";
|
||||
import { tryFrontmatterFastPath } from "./frontmatter.js";
|
||||
import type { AgentStore } from "./storage.js";
|
||||
import { getEnvPath, loadWorkflowConfig, resolveStorageRoot } from "./storage.js";
|
||||
import type { AgentContext, AgentOptions, AgentRunResult } from "./types.js";
|
||||
import { getEnvPath, resolveStorageRoot } from "./storage.js";
|
||||
import type { AgentOptions } from "./types.js";
|
||||
|
||||
const MAX_FRONTMATTER_RETRIES = 2;
|
||||
|
||||
function fail(message: string): never {
|
||||
process.stderr.write(`${message}\n`);
|
||||
@@ -66,31 +67,16 @@ async function writeStepNode(options: {
|
||||
return hash;
|
||||
}
|
||||
|
||||
async function runAgent(options: AgentOptions, ctx: AgentContext): Promise<AgentRunResult> {
|
||||
return runWithMessage("agent run failed", () => options.run(ctx));
|
||||
}
|
||||
|
||||
async function extractOutput(
|
||||
async function tryExtractOutput(
|
||||
rawOutput: string,
|
||||
outputSchema: CasRef,
|
||||
storageRoot: string,
|
||||
ctx: Awaited<ReturnType<typeof buildContextWithMeta>>,
|
||||
): Promise<CasRef> {
|
||||
const fastPath = await runWithMessage("frontmatter fast path", () =>
|
||||
tryFrontmatterFastPath(rawOutput, outputSchema, ctx.meta.store),
|
||||
).catch(() => null);
|
||||
|
||||
): Promise<CasRef | null> {
|
||||
const fastPath = await tryFrontmatterFastPath(rawOutput, outputSchema, ctx.meta.store);
|
||||
if (fastPath !== null) {
|
||||
return fastPath.outputHash;
|
||||
}
|
||||
|
||||
const config = await runWithMessage("failed to load config", () =>
|
||||
loadWorkflowConfig(storageRoot),
|
||||
);
|
||||
const extracted = await runWithMessage("extract failed", () =>
|
||||
extract(rawOutput, outputSchema, config),
|
||||
);
|
||||
return extracted.hash;
|
||||
return null;
|
||||
}
|
||||
|
||||
async function persistStep(options: {
|
||||
@@ -112,11 +98,6 @@ async function persistStep(options: {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an agent CLI entrypoint.
|
||||
* Parses argv (`<thread-id> <role>`), runs the agent, extracts structured output,
|
||||
* writes StepNode to CAS, and prints the new node hash to stdout.
|
||||
*/
|
||||
export function createAgent(options: AgentOptions): () => Promise<void> {
|
||||
return async function main(): Promise<void> {
|
||||
const { threadId, role } = parseArgv(process.argv);
|
||||
@@ -130,13 +111,36 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
|
||||
fail(`unknown role: ${role}`);
|
||||
}
|
||||
|
||||
const metaSchema = getSchema(ctx.meta.store, roleDef.meta);
|
||||
if (metaSchema !== null) {
|
||||
ctx.outputFormatInstruction = buildOutputFormatInstruction(metaSchema);
|
||||
const frontmatterSchema = getSchema(ctx.meta.store, roleDef.frontmatter);
|
||||
if (frontmatterSchema !== null) {
|
||||
ctx.outputFormatInstruction = buildOutputFormatInstruction(frontmatterSchema);
|
||||
}
|
||||
|
||||
let agentResult = await runWithMessage("agent run failed", () => options.run(ctx));
|
||||
|
||||
// Try to extract frontmatter; retry via continue if it fails
|
||||
let outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
|
||||
|
||||
for (let retry = 0; retry < MAX_FRONTMATTER_RETRIES && outputHash === null; retry++) {
|
||||
const correctionMessage =
|
||||
"Your previous response did not contain valid YAML frontmatter matching the role schema.\n" +
|
||||
"You MUST begin your response with a YAML frontmatter block (--- delimited).\n" +
|
||||
"Please output ONLY the corrected frontmatter block followed by your work.";
|
||||
|
||||
agentResult = await runWithMessage("agent continue failed", () =>
|
||||
options.continue(agentResult.sessionId, correctionMessage, ctx.meta.store),
|
||||
);
|
||||
outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
|
||||
}
|
||||
|
||||
if (outputHash === null) {
|
||||
fail(
|
||||
"Agent output does not contain valid YAML frontmatter matching the role schema " +
|
||||
`after ${MAX_FRONTMATTER_RETRIES} retries.\n` +
|
||||
`Raw output (first 500 chars): ${agentResult.output.slice(0, 500)}`,
|
||||
);
|
||||
}
|
||||
|
||||
const agentResult = await runAgent(options, ctx);
|
||||
const outputHash = await extractOutput(agentResult.output, roleDef.meta, storageRoot, ctx);
|
||||
const stepHash = await persistStep({
|
||||
ctx,
|
||||
outputHash,
|
||||
|
||||
@@ -17,11 +17,19 @@ export type AgentContext = ModeratorContext & {
|
||||
export type AgentRunResult = {
|
||||
output: string;
|
||||
detailHash: string;
|
||||
sessionId: string;
|
||||
};
|
||||
|
||||
export type AgentContinueFn = (
|
||||
sessionId: string,
|
||||
message: string,
|
||||
store: AgentContext["store"],
|
||||
) => Promise<AgentRunResult>;
|
||||
|
||||
export type AgentRunFn = (ctx: AgentContext) => Promise<AgentRunResult>;
|
||||
|
||||
export type AgentOptions = {
|
||||
name: string;
|
||||
run: AgentRunFn;
|
||||
continue: AgentContinueFn;
|
||||
};
|
||||
|
||||
@@ -35,11 +35,11 @@ const solveIssueWorkflow: WorkflowPayload = {
|
||||
conditions: {
|
||||
needsClarification: {
|
||||
description: "Planner requests clarification from user",
|
||||
expression: "$exists(steps[-1].output.needsClarification)",
|
||||
expression: "$exists($last('planner').needsClarification)",
|
||||
},
|
||||
notApproved: {
|
||||
rejected: {
|
||||
description: "Reviewer rejected the implementation",
|
||||
expression: "steps[-1].output.approved = false",
|
||||
expression: "$last('reviewer').approved = false",
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
@@ -50,7 +50,7 @@ const solveIssueWorkflow: WorkflowPayload = {
|
||||
],
|
||||
developer: [{ role: "reviewer", condition: null }],
|
||||
reviewer: [
|
||||
{ role: "developer", condition: "notApproved" },
|
||||
{ role: "developer", condition: "rejected" },
|
||||
{ role: "$END", condition: null },
|
||||
],
|
||||
},
|
||||
@@ -72,7 +72,7 @@ describe("evaluate", () => {
|
||||
expect(result).toEqual({ ok: true, value: "planner" });
|
||||
});
|
||||
|
||||
test("condition match (notApproved → developer)", async () => {
|
||||
test("condition match (rejected → developer)", async () => {
|
||||
const context = makeContext([
|
||||
{
|
||||
role: "reviewer",
|
||||
@@ -126,4 +126,116 @@ describe("evaluate", () => {
|
||||
const result = await evaluate(solveIssueWorkflow, context);
|
||||
expect(result).toEqual({ ok: true, value: "developer" });
|
||||
});
|
||||
|
||||
test("$last returns most recent matching role's frontmatter", async () => {
|
||||
const workflow: WorkflowPayload = {
|
||||
...solveIssueWorkflow,
|
||||
conditions: {
|
||||
devFailed: {
|
||||
description: "Developer failed",
|
||||
expression: "$last('developer').status = 'failed'",
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: [{ role: "developer", condition: null }],
|
||||
developer: [
|
||||
{ role: "$END", condition: "devFailed" },
|
||||
{ role: "reviewer", condition: null },
|
||||
],
|
||||
},
|
||||
};
|
||||
const context = makeContext([
|
||||
{
|
||||
role: "developer",
|
||||
output: { status: "done" },
|
||||
detail: "1VPBG9SM5E7WK",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
{
|
||||
role: "reviewer",
|
||||
output: { approved: false },
|
||||
detail: "2MXBG6PN4A8JR",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
{
|
||||
role: "developer",
|
||||
output: { status: "failed" },
|
||||
detail: "3QNTH7WK8D2PA",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
]);
|
||||
const result = await evaluate(workflow, context);
|
||||
expect(result).toEqual({ ok: true, value: "$END" });
|
||||
});
|
||||
|
||||
test("$first returns earliest matching role's frontmatter", async () => {
|
||||
const workflow: WorkflowPayload = {
|
||||
...solveIssueWorkflow,
|
||||
conditions: {
|
||||
firstPlanReady: {
|
||||
description: "First planner run was ready",
|
||||
expression: "$first('planner').status = 'ready'",
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: [{ role: "planner", condition: null }],
|
||||
planner: [
|
||||
{ role: "$END", condition: "firstPlanReady" },
|
||||
{ role: "developer", condition: null },
|
||||
],
|
||||
},
|
||||
};
|
||||
const context = makeContext([
|
||||
{
|
||||
role: "planner",
|
||||
output: { status: "ready", plan: "ABC123" },
|
||||
detail: "7BQST3VW9F2MA",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
{
|
||||
role: "developer",
|
||||
output: { status: "done" },
|
||||
detail: "1VPBG9SM5E7WK",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
{
|
||||
role: "planner",
|
||||
output: { status: "revised", plan: "DEF456" },
|
||||
detail: "4RNMK6PX8B3WQ",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
]);
|
||||
const result = await evaluate(workflow, context);
|
||||
expect(result).toEqual({ ok: true, value: "$END" });
|
||||
});
|
||||
|
||||
test("$last returns undefined for unmatched role", async () => {
|
||||
const workflow: WorkflowPayload = {
|
||||
...solveIssueWorkflow,
|
||||
conditions: {
|
||||
hasReviewer: {
|
||||
description: "Reviewer has run",
|
||||
expression: "$exists($last('reviewer'))",
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: [{ role: "planner", condition: null }],
|
||||
planner: [
|
||||
{ role: "$END", condition: "hasReviewer" },
|
||||
{ role: "developer", condition: null },
|
||||
],
|
||||
},
|
||||
};
|
||||
const context = makeContext([
|
||||
{
|
||||
role: "planner",
|
||||
output: { status: "ready" },
|
||||
detail: "7BQST3VW9F2MA",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
]);
|
||||
const result = await evaluate(workflow, context);
|
||||
// no reviewer step → $exists returns false → fallback to developer
|
||||
expect(result).toEqual({ ok: true, value: "developer" });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -21,12 +21,44 @@ function isTruthy(value: unknown): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
function findByRole(
|
||||
steps: ModeratorContext["steps"],
|
||||
role: string,
|
||||
direction: "first" | "last",
|
||||
): unknown {
|
||||
if (direction === "last") {
|
||||
for (let i = steps.length - 1; i >= 0; i--) {
|
||||
if (steps[i].role === role) {
|
||||
return steps[i].output;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (const step of steps) {
|
||||
if (step.role === role) {
|
||||
return step.output;
|
||||
}
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async function evaluateJsonata(
|
||||
expression: string,
|
||||
context: ModeratorContext,
|
||||
): Promise<Result<unknown, Error>> {
|
||||
try {
|
||||
const result = await jsonata(expression).evaluate(context);
|
||||
const expr = jsonata(expression);
|
||||
expr.registerFunction(
|
||||
"first",
|
||||
(role: string) => findByRole(context.steps, role, "first"),
|
||||
"<s:x>",
|
||||
);
|
||||
expr.registerFunction(
|
||||
"last",
|
||||
(role: string) => findByRole(context.steps, role, "last"),
|
||||
"<s:x>",
|
||||
);
|
||||
const result = await expr.evaluate(context);
|
||||
return { ok: true, value: result };
|
||||
} catch (error) {
|
||||
return {
|
||||
|
||||
@@ -2,14 +2,14 @@ import type { JSONSchema } from "@uncaged/json-cas";
|
||||
|
||||
const ROLE_DEFINITION: JSONSchema = {
|
||||
type: "object",
|
||||
required: ["description", "goal", "capabilities", "procedure", "output", "meta"],
|
||||
required: ["description", "goal", "capabilities", "procedure", "output", "frontmatter"],
|
||||
properties: {
|
||||
description: { type: "string" },
|
||||
goal: { type: "string" },
|
||||
capabilities: { type: "array", items: { type: "string" } },
|
||||
procedure: { type: "string" },
|
||||
output: { type: "string" },
|
||||
meta: { type: "string", format: "cas_ref" },
|
||||
frontmatter: { type: "string", format: "cas_ref" },
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
@@ -22,7 +22,7 @@ export type RoleDefinition = {
|
||||
capabilities: string[];
|
||||
procedure: string;
|
||||
output: string;
|
||||
meta: CasRef;
|
||||
frontmatter: CasRef;
|
||||
};
|
||||
|
||||
export type Transition = {
|
||||
|
||||
@@ -46,6 +46,8 @@ uwf cas get <hash> # read a CAS node (type + payload)
|
||||
[--timestamp] # include timestamp in output
|
||||
uwf cas put <type-hash> <data> # store a node, print its hash
|
||||
# <data>: JSON file path or inline JSON string
|
||||
uwf cas put-text <text> # store a plain text string, print its hash
|
||||
# shortcut for put with the built-in text schema
|
||||
uwf cas has <hash> # check if a hash exists
|
||||
uwf cas refs <hash> # list direct CAS references from a node
|
||||
uwf cas walk <hash> # recursive traversal from a node
|
||||
|
||||
Reference in New Issue
Block a user