Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e5ae9a134c | |||
| bdafaf3aa1 | |||
| 02f7f0b708 | |||
| 8ea554bb5e | |||
| 8a425521da | |||
| f174f2fd0a | |||
| 355594d074 | |||
| fd7609fe90 | |||
| dacecfbbb7 | |||
| 3238eaeddf |
@@ -0,0 +1,167 @@
|
||||
name: "solve-issue"
|
||||
description: "TDD-driven issue resolution for small, focused changes. Loop protection relies on engine maxRounds."
|
||||
roles:
|
||||
planner:
|
||||
description: "Analyzes issue and outputs a TDD test spec"
|
||||
goal: "You are a planning agent. You analyze Gitea issues and produce a TDD test specification that downstream roles will implement and verify."
|
||||
capabilities:
|
||||
- issue-analysis
|
||||
- planning
|
||||
procedure: |
|
||||
On first run (no previous steps):
|
||||
1. Read the issue and all comments from Gitea using `tea issues <number> -r <owner/repo>`
|
||||
2. Read CLAUDE.md (or equivalent project conventions file) to understand coding standards
|
||||
3. Assess whether the issue has enough information to produce a test spec
|
||||
4. If insufficient info: comment on the issue via `echo "..." | tea comment <number> -r <owner/repo>` (skip if you already commented), then output status=insufficient_info and terminate
|
||||
5. If sufficient: produce a detailed TDD test spec in markdown covering all scenarios
|
||||
|
||||
On subsequent runs (bounced back by tester with fix_spec):
|
||||
1. Read the tester's output from the previous step to understand what's wrong with the spec
|
||||
2. Revise the test spec accordingly
|
||||
|
||||
After producing the test spec:
|
||||
1. Store it via `uwf cas put "<markdown content>"` and capture the returned hash
|
||||
2. Put the hash in meta.plan (required when status=ready)
|
||||
output: "Output a brief summary of the test spec. Frontmatter must include: status (ready or insufficient_info) and plan (CAS hash of the test spec, required when status=ready)."
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
status:
|
||||
type: string
|
||||
enum: [ready, insufficient_info]
|
||||
plan:
|
||||
type: string
|
||||
required: [status]
|
||||
developer:
|
||||
description: "TDD implementation per test spec"
|
||||
goal: "You are a developer agent. You implement code changes following TDD — write tests first, then implementation."
|
||||
capabilities:
|
||||
- coding
|
||||
procedure: |
|
||||
1. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the latest planner step's meta.plan)
|
||||
2. If bounced back from reviewer or tester: read the previous role's output to understand what needs fixing
|
||||
3. Write tests first based on the spec
|
||||
4. Implement the code to make tests pass
|
||||
5. Ensure `bun run build` passes with no errors
|
||||
6. Run `bun test` to verify all tests pass
|
||||
output: "List all files changed and provide a summary. Frontmatter must include: status (done or failed)."
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
status:
|
||||
type: string
|
||||
enum: [done, failed]
|
||||
required: [status]
|
||||
reviewer:
|
||||
description: "Code standards compliance check"
|
||||
goal: "You are a code reviewer. You verify code standards compliance — NOT functionality (that's the tester's job)."
|
||||
capabilities:
|
||||
- code-review
|
||||
- static-analysis
|
||||
procedure: |
|
||||
Hard checks (must all pass):
|
||||
1. `bun run build` — no build errors
|
||||
2. `bunx biome check` — no lint violations
|
||||
3. TypeScript strict mode — no type errors
|
||||
|
||||
Soft checks (review against CLAUDE.md conventions):
|
||||
- Functional-first: `function` + `type`, not `class` + `interface`
|
||||
- No optional properties (`?:`) — use `T | null`
|
||||
- Naming conventions (kebab-case files, PascalCase types, camelCase functions)
|
||||
- Module boundary discipline (folder exports via index.ts)
|
||||
- No `console.log` (use structured logger)
|
||||
- No dynamic imports in production code
|
||||
|
||||
Only review standards compliance. Do NOT test functionality.
|
||||
If rejecting, you MUST explain the specific reason in your output.
|
||||
output: "Explain your decision with specific file/line references. Frontmatter must include: approved (true or false)."
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
approved:
|
||||
type: boolean
|
||||
required: [approved]
|
||||
tester:
|
||||
description: "Functional correctness verification"
|
||||
goal: "You are a tester agent. You verify that the implementation correctly satisfies every scenario in the test spec."
|
||||
capabilities:
|
||||
- testing
|
||||
procedure: |
|
||||
1. Run `bun test` for automated test verification
|
||||
2. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the latest planner step's meta.plan)
|
||||
3. Verify each scenario in the spec is covered and passing
|
||||
4. Determine outcome:
|
||||
- passed: all scenarios verified, tests pass
|
||||
- fix_code: tests fail or implementation doesn't match spec → send back to developer
|
||||
- fix_spec: the spec itself is wrong or incomplete → send back to planner
|
||||
output: "Report test results per scenario. Frontmatter must include: status (passed, fix_code, or fix_spec)."
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
status:
|
||||
type: string
|
||||
enum: [passed, fix_code, fix_spec]
|
||||
required: [status]
|
||||
committer:
|
||||
description: "Commits and creates PR"
|
||||
goal: "You are a committer agent. You create a clean commit and push a PR linking the original issue."
|
||||
capabilities: []
|
||||
procedure: |
|
||||
Note: You inherit the developer's worktree and branch. Do NOT create a new branch.
|
||||
1. Stage all changes: `git add -A`
|
||||
2. Commit with a descriptive message referencing the issue: `git commit -m "type: description\n\nFixes #N"`
|
||||
3. Push the branch: `git push -u origin <branch-name>`
|
||||
- If push hook fails: capture the error log in your output, mark hook_failed
|
||||
4. On push success: create a PR via `tea pr create --title "..." --description "..."`
|
||||
- PR description must follow the project template: What / Why / Changes / Ref sections, with `Fixes #N` in Ref
|
||||
output: "Include PR URL on success or error log on failure. Frontmatter must include: success (true or false)."
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
success:
|
||||
type: boolean
|
||||
required: [success]
|
||||
conditions:
|
||||
insufficientInfo:
|
||||
description: "Planner determined there's not enough info to proceed"
|
||||
expression: "$last('planner').status = 'insufficient_info'"
|
||||
devFailed:
|
||||
description: "Developer failed to implement"
|
||||
expression: "$last('developer').status = 'failed'"
|
||||
rejected:
|
||||
description: "Reviewer rejected the implementation"
|
||||
expression: "$last('reviewer').approved = false"
|
||||
fixCode:
|
||||
description: "Tester found code issues"
|
||||
expression: "$last('tester').status = 'fix_code'"
|
||||
fixSpec:
|
||||
description: "Tester found spec issues"
|
||||
expression: "$last('tester').status = 'fix_spec'"
|
||||
hookFailed:
|
||||
description: "Push hook failed"
|
||||
expression: "$last('committer').success = false"
|
||||
graph:
|
||||
$START:
|
||||
- role: "planner"
|
||||
planner:
|
||||
- role: "$END"
|
||||
condition: "insufficientInfo"
|
||||
- role: "developer"
|
||||
developer:
|
||||
- role: "$END"
|
||||
condition: "devFailed"
|
||||
- role: "reviewer"
|
||||
reviewer:
|
||||
- role: "developer"
|
||||
condition: "rejected"
|
||||
- role: "tester"
|
||||
tester:
|
||||
- role: "developer"
|
||||
condition: "fixCode"
|
||||
- role: "planner"
|
||||
condition: "fixSpec"
|
||||
- role: "committer"
|
||||
committer:
|
||||
- role: "developer"
|
||||
condition: "hookFailed"
|
||||
- role: "$END"
|
||||
@@ -19,7 +19,7 @@ roles:
|
||||
output: |
|
||||
Provide your analysis as markdown under the frontmatter.
|
||||
The frontmatter must include your structured findings.
|
||||
meta:
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
thesis:
|
||||
|
||||
@@ -9,7 +9,7 @@ roles:
|
||||
- planning
|
||||
procedure: "Analyze the issue and create a detailed, actionable implementation plan."
|
||||
output: "Output the plan summary and list of concrete steps."
|
||||
meta:
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
plan:
|
||||
@@ -28,7 +28,7 @@ roles:
|
||||
- testing
|
||||
procedure: "Implement the plan. Write code, tests, and ensure existing tests pass."
|
||||
output: "List all files changed and provide a summary of the implementation."
|
||||
meta:
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
filesChanged:
|
||||
@@ -46,7 +46,7 @@ roles:
|
||||
- static-analysis
|
||||
procedure: "Review the implementation against the plan. Check for bugs, edge cases, and style."
|
||||
output: "Approve or reject with detailed comments explaining your decision."
|
||||
meta:
|
||||
frontmatter:
|
||||
type: object
|
||||
properties:
|
||||
approved:
|
||||
@@ -57,7 +57,7 @@ roles:
|
||||
conditions:
|
||||
notApproved:
|
||||
description: "Reviewer rejected the implementation"
|
||||
expression: "steps[-1].output.approved = false"
|
||||
expression: "$last('reviewer').approved = false"
|
||||
graph:
|
||||
$START:
|
||||
- role: "planner"
|
||||
|
||||
@@ -46,11 +46,16 @@ function isJsonSchema(value: unknown): value is JSONSchema {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
async function resolveMetaRef(uwf: UwfStore, roleName: string, meta: unknown): Promise<CasRef> {
|
||||
if (!isJsonSchema(meta)) {
|
||||
fail(`role "${roleName}": meta must be a JSON Schema object`);
|
||||
async function resolveFrontmatterRef(
|
||||
uwf: UwfStore,
|
||||
roleName: string,
|
||||
frontmatter: unknown,
|
||||
): Promise<CasRef> {
|
||||
if (!isJsonSchema(frontmatter)) {
|
||||
fail(`role "${roleName}": frontmatter must be a JSON Schema object`);
|
||||
}
|
||||
const schema: JSONSchema = meta.title === undefined ? { ...meta, title: roleName } : meta;
|
||||
const schema: JSONSchema =
|
||||
frontmatter.title === undefined ? { ...frontmatter, title: roleName } : frontmatter;
|
||||
return putSchema(uwf.store, schema);
|
||||
}
|
||||
|
||||
@@ -60,14 +65,18 @@ export async function materializeWorkflowPayload(
|
||||
): Promise<WorkflowPayload> {
|
||||
const roles: Record<string, RoleDefinition> = {};
|
||||
for (const [roleName, role] of Object.entries(raw.roles)) {
|
||||
const meta = await resolveMetaRef(uwf, `${raw.name}.${roleName}`, role.meta);
|
||||
const frontmatter = await resolveFrontmatterRef(
|
||||
uwf,
|
||||
`${raw.name}.${roleName}`,
|
||||
role.frontmatter,
|
||||
);
|
||||
roles[roleName] = {
|
||||
description: role.description,
|
||||
goal: role.goal,
|
||||
capabilities: role.capabilities,
|
||||
procedure: role.procedure,
|
||||
output: role.output,
|
||||
meta,
|
||||
frontmatter,
|
||||
};
|
||||
}
|
||||
return {
|
||||
|
||||
@@ -15,8 +15,8 @@ function isRoleDefinition(value: unknown): boolean {
|
||||
if (!isRecord(value)) {
|
||||
return false;
|
||||
}
|
||||
const meta = value.meta;
|
||||
const metaOk = isRecord(meta) && typeof meta.type === "string";
|
||||
const frontmatter = value.frontmatter;
|
||||
const frontmatterOk = isRecord(frontmatter) && typeof frontmatter.type === "string";
|
||||
const capabilities = value.capabilities;
|
||||
const capabilitiesOk =
|
||||
Array.isArray(capabilities) && capabilities.every((c) => typeof c === "string");
|
||||
@@ -26,7 +26,7 @@ function isRoleDefinition(value: unknown): boolean {
|
||||
capabilitiesOk &&
|
||||
typeof value.procedure === "string" &&
|
||||
typeof value.output === "string" &&
|
||||
metaOk
|
||||
frontmatterOk
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -130,13 +130,18 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
|
||||
fail(`unknown role: ${role}`);
|
||||
}
|
||||
|
||||
const metaSchema = getSchema(ctx.meta.store, roleDef.meta);
|
||||
if (metaSchema !== null) {
|
||||
ctx.outputFormatInstruction = buildOutputFormatInstruction(metaSchema);
|
||||
const frontmatterSchema = getSchema(ctx.meta.store, roleDef.frontmatter);
|
||||
if (frontmatterSchema !== null) {
|
||||
ctx.outputFormatInstruction = buildOutputFormatInstruction(frontmatterSchema);
|
||||
}
|
||||
|
||||
const agentResult = await runAgent(options, ctx);
|
||||
const outputHash = await extractOutput(agentResult.output, roleDef.meta, storageRoot, ctx);
|
||||
const outputHash = await extractOutput(
|
||||
agentResult.output,
|
||||
roleDef.frontmatter,
|
||||
storageRoot,
|
||||
ctx,
|
||||
);
|
||||
const stepHash = await persistStep({
|
||||
ctx,
|
||||
outputHash,
|
||||
|
||||
@@ -35,11 +35,11 @@ const solveIssueWorkflow: WorkflowPayload = {
|
||||
conditions: {
|
||||
needsClarification: {
|
||||
description: "Planner requests clarification from user",
|
||||
expression: "$exists(steps[-1].output.needsClarification)",
|
||||
expression: "$exists($last('planner').needsClarification)",
|
||||
},
|
||||
notApproved: {
|
||||
rejected: {
|
||||
description: "Reviewer rejected the implementation",
|
||||
expression: "steps[-1].output.approved = false",
|
||||
expression: "$last('reviewer').approved = false",
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
@@ -50,7 +50,7 @@ const solveIssueWorkflow: WorkflowPayload = {
|
||||
],
|
||||
developer: [{ role: "reviewer", condition: null }],
|
||||
reviewer: [
|
||||
{ role: "developer", condition: "notApproved" },
|
||||
{ role: "developer", condition: "rejected" },
|
||||
{ role: "$END", condition: null },
|
||||
],
|
||||
},
|
||||
@@ -72,7 +72,7 @@ describe("evaluate", () => {
|
||||
expect(result).toEqual({ ok: true, value: "planner" });
|
||||
});
|
||||
|
||||
test("condition match (notApproved → developer)", async () => {
|
||||
test("condition match (rejected → developer)", async () => {
|
||||
const context = makeContext([
|
||||
{
|
||||
role: "reviewer",
|
||||
@@ -126,4 +126,116 @@ describe("evaluate", () => {
|
||||
const result = await evaluate(solveIssueWorkflow, context);
|
||||
expect(result).toEqual({ ok: true, value: "developer" });
|
||||
});
|
||||
|
||||
test("$last returns most recent matching role's frontmatter", async () => {
|
||||
const workflow: WorkflowPayload = {
|
||||
...solveIssueWorkflow,
|
||||
conditions: {
|
||||
devFailed: {
|
||||
description: "Developer failed",
|
||||
expression: "$last('developer').status = 'failed'",
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: [{ role: "developer", condition: null }],
|
||||
developer: [
|
||||
{ role: "$END", condition: "devFailed" },
|
||||
{ role: "reviewer", condition: null },
|
||||
],
|
||||
},
|
||||
};
|
||||
const context = makeContext([
|
||||
{
|
||||
role: "developer",
|
||||
output: { status: "done" },
|
||||
detail: "1VPBG9SM5E7WK",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
{
|
||||
role: "reviewer",
|
||||
output: { approved: false },
|
||||
detail: "2MXBG6PN4A8JR",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
{
|
||||
role: "developer",
|
||||
output: { status: "failed" },
|
||||
detail: "3QNTH7WK8D2PA",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
]);
|
||||
const result = await evaluate(workflow, context);
|
||||
expect(result).toEqual({ ok: true, value: "$END" });
|
||||
});
|
||||
|
||||
test("$first returns earliest matching role's frontmatter", async () => {
|
||||
const workflow: WorkflowPayload = {
|
||||
...solveIssueWorkflow,
|
||||
conditions: {
|
||||
firstPlanReady: {
|
||||
description: "First planner run was ready",
|
||||
expression: "$first('planner').status = 'ready'",
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: [{ role: "planner", condition: null }],
|
||||
planner: [
|
||||
{ role: "$END", condition: "firstPlanReady" },
|
||||
{ role: "developer", condition: null },
|
||||
],
|
||||
},
|
||||
};
|
||||
const context = makeContext([
|
||||
{
|
||||
role: "planner",
|
||||
output: { status: "ready", plan: "ABC123" },
|
||||
detail: "7BQST3VW9F2MA",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
{
|
||||
role: "developer",
|
||||
output: { status: "done" },
|
||||
detail: "1VPBG9SM5E7WK",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
{
|
||||
role: "planner",
|
||||
output: { status: "revised", plan: "DEF456" },
|
||||
detail: "4RNMK6PX8B3WQ",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
]);
|
||||
const result = await evaluate(workflow, context);
|
||||
expect(result).toEqual({ ok: true, value: "$END" });
|
||||
});
|
||||
|
||||
test("$last returns undefined for unmatched role", async () => {
|
||||
const workflow: WorkflowPayload = {
|
||||
...solveIssueWorkflow,
|
||||
conditions: {
|
||||
hasReviewer: {
|
||||
description: "Reviewer has run",
|
||||
expression: "$exists($last('reviewer'))",
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: [{ role: "planner", condition: null }],
|
||||
planner: [
|
||||
{ role: "$END", condition: "hasReviewer" },
|
||||
{ role: "developer", condition: null },
|
||||
],
|
||||
},
|
||||
};
|
||||
const context = makeContext([
|
||||
{
|
||||
role: "planner",
|
||||
output: { status: "ready" },
|
||||
detail: "7BQST3VW9F2MA",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
]);
|
||||
const result = await evaluate(workflow, context);
|
||||
// no reviewer step → $exists returns false → fallback to developer
|
||||
expect(result).toEqual({ ok: true, value: "developer" });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -21,12 +21,44 @@ function isTruthy(value: unknown): boolean {
|
||||
return true;
|
||||
}
|
||||
|
||||
function findByRole(
|
||||
steps: ModeratorContext["steps"],
|
||||
role: string,
|
||||
direction: "first" | "last",
|
||||
): unknown {
|
||||
if (direction === "last") {
|
||||
for (let i = steps.length - 1; i >= 0; i--) {
|
||||
if (steps[i].role === role) {
|
||||
return steps[i].output;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (const step of steps) {
|
||||
if (step.role === role) {
|
||||
return step.output;
|
||||
}
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
async function evaluateJsonata(
|
||||
expression: string,
|
||||
context: ModeratorContext,
|
||||
): Promise<Result<unknown, Error>> {
|
||||
try {
|
||||
const result = await jsonata(expression).evaluate(context);
|
||||
const expr = jsonata(expression);
|
||||
expr.registerFunction(
|
||||
"first",
|
||||
(role: string) => findByRole(context.steps, role, "first"),
|
||||
"<s:x>",
|
||||
);
|
||||
expr.registerFunction(
|
||||
"last",
|
||||
(role: string) => findByRole(context.steps, role, "last"),
|
||||
"<s:x>",
|
||||
);
|
||||
const result = await expr.evaluate(context);
|
||||
return { ok: true, value: result };
|
||||
} catch (error) {
|
||||
return {
|
||||
|
||||
@@ -2,14 +2,14 @@ import type { JSONSchema } from "@uncaged/json-cas";
|
||||
|
||||
const ROLE_DEFINITION: JSONSchema = {
|
||||
type: "object",
|
||||
required: ["description", "goal", "capabilities", "procedure", "output", "meta"],
|
||||
required: ["description", "goal", "capabilities", "procedure", "output", "frontmatter"],
|
||||
properties: {
|
||||
description: { type: "string" },
|
||||
goal: { type: "string" },
|
||||
capabilities: { type: "array", items: { type: "string" } },
|
||||
procedure: { type: "string" },
|
||||
output: { type: "string" },
|
||||
meta: { type: "string", format: "cas_ref" },
|
||||
frontmatter: { type: "string", format: "cas_ref" },
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
@@ -22,7 +22,7 @@ export type RoleDefinition = {
|
||||
capabilities: string[];
|
||||
procedure: string;
|
||||
output: string;
|
||||
meta: CasRef;
|
||||
frontmatter: CasRef;
|
||||
};
|
||||
|
||||
export type Transition = {
|
||||
|
||||
Reference in New Issue
Block a user