Merge pull request 'fix: remove _ single-exit for user roles' (#88) from fix/86-remove-single-exit-underscore into main
CI / check (push) Successful in 2m16s

This commit was merged in pull request #88.
This commit is contained in:
2026-06-05 02:09:50 +00:00
14 changed files with 158 additions and 138 deletions
+2 -2
View File
@@ -23,7 +23,7 @@ roles:
type: object
properties:
$status:
enum: ["_"]
enum: ["done"]
thesis:
type: string
keyPoints:
@@ -37,4 +37,4 @@ graph:
$START:
_: { role: "analyst", prompt: "Analyze the topic in the task and produce a structured summary with key points." }
analyst:
_: { role: "$END", prompt: "Analysis complete. Finish the workflow." }
done: { role: "$END", prompt: "Analysis complete. Finish the workflow." }
+30
View File
@@ -0,0 +1,30 @@
name: eval-simple
description: "Single-role eval workflow: fixer takes prompt, fixes code, done."
roles:
fixer:
description: "Fixes the code based on the prompt"
goal: |
You are a code fixer. Read the prompt, understand the bug, fix it, and verify by running the tests.
capabilities:
- code-editing
- test-running
procedure: |
1. Read the prompt to understand what needs to be fixed
2. Fix the bug in the source code
3. Run the tests mentioned in the prompt to verify
4. Output $status=done when tests pass
output: "Describe what you fixed and confirm tests pass. Set $status to done."
frontmatter:
type: object
properties:
$status:
type: string
enum: [done]
summary:
type: string
required: [$status, summary]
graph:
$START:
_: { role: "fixer", prompt: "Fix the code issue described in the task prompt." }
fixer:
done: { role: "$END", prompt: "Fix complete." }
+10 -10
View File
@@ -42,7 +42,7 @@ roles:
type: object
required: ["$status"]
properties:
$status: { type: string }
$status: { type: string, enum: ["done"] }
graph:
$START:
_:
@@ -59,7 +59,7 @@ graph:
prompt: "Try again"
location: null
roleB:
_:
done:
role: $END
prompt: "Done"
location: null
@@ -92,7 +92,7 @@ roles:
type: object
required: ["$status"]
properties:
$status: { type: string }
$status: { type: string, enum: ["done"] }
roleC:
description: Fail role
goal: Do C
@@ -104,7 +104,7 @@ roles:
type: object
required: ["$status"]
properties:
$status: { type: string }
$status: { type: string, enum: ["done"] }
graph:
$START:
_:
@@ -121,12 +121,12 @@ graph:
prompt: "Do C (fail)"
location: null
roleB:
_:
done:
role: $END
prompt: "Done"
location: null
roleC:
_:
done:
role: $END
prompt: "Done"
location: null
@@ -147,7 +147,7 @@ roles:
type: object
required: ["$status"]
properties:
$status: { type: string }
$status: { type: string, enum: ["done"] }
graph:
$START:
_:
@@ -155,7 +155,7 @@ graph:
prompt: "Work"
location: null
worker:
_:
done:
role: $END
prompt: "Done"
location: null
@@ -426,8 +426,8 @@ describe("currentRole field", () => {
await writeFile(wf, SINGLE_ROLE_WORKFLOW_YAML, "utf8");
const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
// worker → _ maps to $END
await insertStepNode(storageRoot, thread as ThreadId, "worker", {});
// worker → done maps to $END
await insertStepNode(storageRoot, thread as ThreadId, "worker", { $status: "done" });
const result = await cmdThreadShow(storageRoot, thread as ThreadId);
expect(result.currentRole).toBe(null);
@@ -8,10 +8,10 @@ const solveIssueGraph: WorkflowPayload["graph"] = {
_: { role: "planner", prompt: "Start planning from the issue in the task.", location: null },
},
planner: {
_: { role: "developer", prompt: "Implement the plan: {{plan}}", location: null },
planned: { role: "developer", prompt: "Implement the plan: {{plan}}", location: null },
},
developer: {
_: { role: "reviewer", prompt: "Review the changes: {{summary}}", location: null },
implemented: { role: "reviewer", prompt: "Review the changes: {{summary}}", location: null },
},
reviewer: {
approved: { role: "$END", prompt: "Done.", location: null },
@@ -112,7 +112,7 @@ describe("evaluate", () => {
test("mustache template rendering with simple fields", () => {
const result = evaluate(solveIssueGraph, "planner", {
$status: "_",
$status: "planned",
plan: "Add auth middleware",
});
expect(result).toEqual({
@@ -139,11 +139,11 @@ describe("evaluate", () => {
test("triple mustache also works for unescaped output", () => {
const graph: Record<string, Record<string, Target>> = {
reviewer: {
_: { role: "developer", prompt: "Fix: {{{comments}}}", location: null },
rejected: { role: "developer", prompt: "Fix: {{{comments}}}", location: null },
},
};
const result = evaluate(graph, "reviewer", {
$status: "_",
$status: "rejected",
comments: "<script>alert(1)</script>",
});
expect(result).toEqual({
@@ -152,24 +152,22 @@ describe("evaluate", () => {
});
});
test("missing $status defaults to _ (unit routing)", () => {
test("missing $status → error (no unit fallback)", () => {
const result = evaluate(solveIssueGraph, "planner", {
plan: "Add auth middleware",
});
expect(result).toEqual({
ok: true,
value: {
role: "developer",
prompt: "Implement the plan: Add auth middleware",
location: null,
},
});
expect(result.ok).toBe(false);
if (!result.ok) {
expect(result.error.message).toBe(
'agent output for role "planner" is missing required "$status" string',
);
}
});
test("mustache template with nested object paths", () => {
const graph: Record<string, Record<string, Target>> = {
reviewer: {
_: {
rejected: {
role: "developer",
prompt: "Address: {{review.comments}}",
location: null,
@@ -177,7 +175,7 @@ describe("evaluate", () => {
},
};
const result = evaluate(graph, "reviewer", {
$status: "_",
$status: "rejected",
review: { comments: "refactor the handler" },
});
expect(result).toEqual({
@@ -254,7 +254,7 @@ describe("thread read timing", () => {
},
graph: {
$START: { _: { role: "worker", prompt: "go", location: null } },
worker: { _: { role: "$END", prompt: "", location: null } },
worker: { done: { role: "$END", prompt: "", location: null } },
},
});
@@ -320,7 +320,7 @@ describe("thread read timing", () => {
},
graph: {
$START: { _: { role: "worker", prompt: "go", location: null } },
worker: { _: { role: "$END", prompt: "", location: null } },
worker: { done: { role: "$END", prompt: "", location: null } },
},
});
@@ -54,7 +54,7 @@ roles:
type: object
required: ["$status"]
properties:
$status: { type: string }
$status: { type: string, enum: ["ready"] }
graph:
$START:
_:
@@ -62,7 +62,7 @@ graph:
prompt: "Plan the work"
location: null
planner:
_:
ready:
role: $END
prompt: "Done"
location: null
@@ -110,7 +110,7 @@ roles:
type: object
required: ["$status"]
properties:
$status: { type: string }
$status: { type: string, enum: ["ready"] }
graph:
$START:
_:
@@ -118,7 +118,7 @@ graph:
prompt: "Plan"
location: null
planner:
_:
ready:
role: $END
prompt: "Done"
location: null
@@ -153,7 +153,7 @@ roles:
type: object
required: ["$status"]
properties:
$status: { type: string }
$status: { type: string, enum: ["ready"] }
graph:
$START:
_:
@@ -161,7 +161,7 @@ graph:
prompt: "Plan"
location: null
planner:
_:
ready:
role: $END
prompt: "Done"
location: null
@@ -79,7 +79,7 @@ async function setupSuspendedThread(mode: MockAgentMode): Promise<{
},
ok: { role: "reviewer", prompt: "Review the work", location: null },
},
reviewer: { _: { role: "$END", prompt: "Done", location: null } },
reviewer: { done: { role: "$END", prompt: "Done", location: null } },
},
});
@@ -234,7 +234,7 @@ describe("uwf thread resume", () => {
},
graph: {
$START: { _: { role: "worker", prompt: "Start", location: null } },
worker: { _: { role: "$END", prompt: "Done", location: null } },
worker: { done: { role: "$END", prompt: "Done", location: null } },
},
});
@@ -480,8 +480,8 @@ describe("uwf thread resume - completed threads", () => {
},
graph: {
$START: { _: { role: "worker", prompt: "Start work", location: null } },
worker: { _: { role: "reviewer", prompt: "Review the work", location: null } },
reviewer: { _: { role: "$END", prompt: "Done", location: null } },
worker: { done: { role: "reviewer", prompt: "Review the work", location: null } },
reviewer: { done: { role: "$END", prompt: "Done", location: null } },
},
});
@@ -493,8 +493,8 @@ describe("uwf thread resume - completed threads", () => {
process.env.OCAS_HOME = casDir;
const workerOutputHash = await store.cas.put(outputSchemaHash, { $status: "_" });
const reviewerOutputHash = await store.cas.put(outputSchemaHash, { $status: "_" });
const workerOutputHash = await store.cas.put(outputSchemaHash, { $status: "done" });
const reviewerOutputHash = await store.cas.put(outputSchemaHash, { $status: "done" });
const detailHash = await store.cas.put(schemas.text, "mock detail");
const workerStepHash = await store.cas.put(schemas.stepNode, {
@@ -563,7 +563,7 @@ describe("uwf thread resume - completed threads", () => {
stepHash: newWorkerStepHash,
detailHash,
role: "worker",
frontmatter: { $status: "_" },
frontmatter: { $status: "done" },
body: "",
startedAtMs: 1716600003000,
completedAtMs: 1716600004000,
@@ -641,7 +641,7 @@ echo '${adapterJson}'
},
graph: {
$START: { _: { role: "worker", prompt: "Start", location: null } },
worker: { _: { role: "$END", prompt: "Done", location: null } },
worker: { done: { role: "$END", prompt: "Done", location: null } },
},
});
@@ -689,7 +689,7 @@ echo '${adapterJson}'
},
graph: {
$START: { _: { role: "worker", prompt: "Start", location: null } },
worker: { _: { role: "$END", prompt: "Done", location: null } },
worker: { done: { role: "$END", prompt: "Done", location: null } },
},
});
@@ -31,7 +31,7 @@ roles:
type: object
required: ["$status"]
properties:
$status: { type: string }
$status: { type: string, enum: ["ready"] }
graph:
$START:
_:
@@ -39,7 +39,7 @@ graph:
prompt: "Plan the work"
location: null
planner:
_:
ready:
role: $END
prompt: "Done"
location: null
@@ -54,7 +54,7 @@ roles:
type: object
required: ["$status"]
properties:
$status: { type: string }
$status: { type: string, enum: ["ready"] }
graph:
$START:
_:
@@ -62,7 +62,7 @@ graph:
prompt: "Plan the work"
location: null
planner:
_:
ready:
role: $END
prompt: "Done"
location: null
@@ -17,7 +17,7 @@ function makeWorkflow(overrides?: Partial<WorkflowPayload>): WorkflowPayload {
frontmatter: {
type: "object",
properties: {
$status: { enum: ["_"] },
$status: { enum: ["done"] },
plan: { type: "string" },
},
required: ["$status", "plan"],
@@ -52,7 +52,7 @@ function makeWorkflow(overrides?: Partial<WorkflowPayload>): WorkflowPayload {
},
graph: {
$START: { _: { role: "writer", prompt: "Begin writing", location: null } },
writer: { _: { role: "reviewer", prompt: "Review this: {{{plan}}}", location: null } },
writer: { done: { role: "reviewer", prompt: "Review this: {{{plan}}}", location: null } },
reviewer: {
approved: { role: "$END", prompt: "Done: {{{summary}}}", location: null },
rejected: { role: "writer", prompt: "Fix: {{{reason}}}", location: null },
@@ -82,7 +82,7 @@ describe("Suite 1: Role Reference Integrity", () => {
output: "None",
frontmatter: {
type: "object",
properties: { $status: { enum: ["_"] } },
properties: { $status: { enum: ["done"] } },
required: ["$status"],
} as unknown as string,
};
@@ -173,11 +173,11 @@ describe("Suite 2: Graph Structure", () => {
output: "Isolated",
frontmatter: {
type: "object",
properties: { $status: { enum: ["_"] } },
properties: { $status: { enum: ["done"] } },
required: ["$status"],
} as unknown as string,
};
wf.graph.isolated = { _: { role: "$END", prompt: "done", location: null } };
wf.graph.isolated = { done: { role: "$END", prompt: "done", location: null } };
const errors = validateWorkflow(wf);
expect(errors.some((e) => e.includes('role "isolated" is not reachable from $START'))).toBe(
true,
@@ -186,34 +186,34 @@ describe("Suite 2: Graph Structure", () => {
test("2.6 edge target references invalid role", () => {
const wf = makeWorkflow();
wf.graph.writer = { _: { role: "ghost", prompt: "Go to ghost", location: null } };
wf.graph.writer = { done: { role: "ghost", prompt: "Go to ghost", location: null } };
const errors = validateWorkflow(wf);
expect(errors.some((e) => e.includes('unknown target role "ghost"'))).toBe(true);
});
});
describe("Suite 3: Status-Edge Consistency", () => {
test("3.1 single-exit role with multiple graph keys", () => {
test("3.1 user role using _ graph key is rejected", () => {
const wf = makeWorkflow();
wf.graph.writer = {
_: { role: "reviewer", prompt: "Review", location: null },
extra: { role: "$END", prompt: "Done", location: null },
};
wf.graph.writer = { _: { role: "reviewer", prompt: "Review", location: null } };
const errors = validateWorkflow(wf);
expect(
errors.some((e) =>
e.includes('role "writer" is single-exit but has status keys other than "_"'),
e.includes('role "writer" must use explicit $status keys in graph, not "_"'),
),
).toBe(true);
});
test("3.2 single-exit role missing _ key", () => {
test("3.2 user role graph key not matching $status enum", () => {
const wf = makeWorkflow();
wf.graph.writer = { done: { role: "reviewer", prompt: "Review", location: null } };
wf.graph.writer = { wrong: { role: "reviewer", prompt: "Review", location: null } };
const errors = validateWorkflow(wf);
expect(
errors.some((e) => e.includes('role "writer" is single-exit but graph has no "_" key')),
).toBe(true);
expect(errors.some((e) => e.includes('role "writer" graph has extra status keys: wrong'))).toBe(
true,
);
expect(errors.some((e) => e.includes('role "writer" graph is missing status keys: done'))).toBe(
true,
);
});
test("3.3 multi-exit role with extra statuses", () => {
@@ -244,9 +244,11 @@ describe("Suite 3: Status-Edge Consistency", () => {
const wf = makeWorkflow();
wf.graph.reviewer = { _: { role: "$END", prompt: "Done", location: null } };
const errors = validateWorkflow(wf);
expect(errors.some((e) => e.includes('role "reviewer" is multi-exit but graph uses "_"'))).toBe(
true,
);
expect(
errors.some((e) =>
e.includes('role "reviewer" must use explicit $status keys in graph, not "_"'),
),
).toBe(true);
});
});
@@ -314,20 +316,20 @@ describe("Suite 3b: Enum-Based Multi-Exit", () => {
expect(errors.some((e) => e.includes("missing status keys: rejected"))).toBe(true);
});
test("3b.4 enum with single value (not multi-exit) treated as single-exit", () => {
test("3b.4 enum with single explicit value passes", () => {
const wf = makeWorkflow();
wf.roles.writer = {
...wf.roles.writer,
frontmatter: {
type: "object",
properties: {
$status: { enum: ["_"] },
$status: { enum: ["ready"] },
plan: { type: "string" },
},
required: ["$status", "plan"],
} as unknown as string,
};
wf.graph.writer = { _: { role: "reviewer", prompt: "Review: {{{plan}}}", location: null } };
wf.graph.writer = { ready: { role: "reviewer", prompt: "Review: {{{plan}}}", location: null } };
const errors = validateWorkflow(wf);
expect(errors).toEqual([]);
});
@@ -355,13 +357,15 @@ describe("Suite 3b: Enum-Based Multi-Exit", () => {
});
describe("Suite 4: Mustache Template Variable Existence", () => {
test("4.1 prompt references nonexistent variable (single-exit)", () => {
test("4.1 prompt references nonexistent variable (enum status)", () => {
const wf = makeWorkflow();
wf.graph.writer = { _: { role: "reviewer", prompt: "Review: {{{branch}}}", location: null } };
wf.graph.writer = {
done: { role: "reviewer", prompt: "Review: {{{branch}}}", location: null },
};
const errors = validateWorkflow(wf);
expect(
errors.some((e) =>
e.includes('prompt variable "branch" not found in role "writer" frontmatter'),
errors.some(
(e) => e.includes('prompt variable "branch"') && e.includes('role "writer" frontmatter'),
),
).toBe(true);
});
@@ -388,7 +392,7 @@ describe("Suite 4: Mustache Template Variable Existence", () => {
test("4.4 $status variable is always valid", () => {
const wf = makeWorkflow();
wf.graph.writer = { _: { role: "reviewer", prompt: "Status: {{$status}}", location: null } };
wf.graph.writer = { done: { role: "reviewer", prompt: "Status: {{$status}}", location: null } };
const errors = validateWorkflow(wf);
expect(errors).toEqual([]);
});
@@ -456,14 +460,14 @@ describe("Suite 6: Multiple Errors Collection", () => {
output: "None",
frontmatter: {
type: "object",
properties: { $status: { enum: ["_"] } },
properties: { $status: { enum: ["done"] } },
required: ["$status"],
} as unknown as string,
};
// unknown graph reference
wf.graph.nonexistent = { _: { role: "$END", prompt: "done", location: null } };
wf.graph.nonexistent = { done: { role: "$END", prompt: "done", location: null } };
// bad mustache var
wf.graph.writer = { _: { role: "reviewer", prompt: "{{{badvar}}}", location: null } };
wf.graph.writer = { done: { role: "reviewer", prompt: "{{{badvar}}}", location: null } };
const errors = validateWorkflow(wf);
expect(errors.length).toBeGreaterThanOrEqual(3);
});
@@ -31,7 +31,7 @@ function makeMinimalPayload(name: string, description: string): WorkflowPayload
frontmatter: {
type: "object",
properties: {
$status: { type: "string" },
$status: { type: "string", enum: ["done"] },
},
required: ["$status"],
} as unknown as CasRef,
@@ -39,7 +39,7 @@ function makeMinimalPayload(name: string, description: string): WorkflowPayload
},
graph: {
$START: { _: { role: "worker", prompt: "start working", location: null } },
worker: { _: { role: "$END", prompt: "done", location: null } },
worker: { done: { role: "$END", prompt: "done", location: null } },
},
};
}
+13 -7
View File
@@ -8,7 +8,8 @@ mustache.escape = (text: string) => text;
const START_ROLE = "$START";
const SUSPEND_ROLE = "$SUSPEND";
const UNIT_STATUS = "_";
// $START is a special entry node with no agent output — it always uses this key.
const START_STATUS = "_";
type LastOutput = Record<string, unknown>;
@@ -19,12 +20,17 @@ export function evaluate(
lastRole: string,
lastOutput: LastOutput,
): Result<EvaluateResult, Error> {
const status =
lastRole === START_ROLE
? UNIT_STATUS
: typeof lastOutput[STATUS_KEY] === "string"
? (lastOutput[STATUS_KEY] as string)
: UNIT_STATUS;
let status: string;
if (lastRole === START_ROLE) {
status = START_STATUS;
} else if (typeof lastOutput[STATUS_KEY] === "string") {
status = lastOutput[STATUS_KEY] as string;
} else {
return {
ok: false,
error: new Error(`agent output for role "${lastRole}" is missing required "$status" string`),
};
}
const roleTargets = graph[lastRole];
if (roleTargets === undefined) {
+19 -46
View File
@@ -24,17 +24,13 @@ function isOneOfSchema(fm: unknown): fm is SchemaObj & { oneOf: SchemaObj[] } {
return Array.isArray(obj.oneOf);
}
/** Check if a frontmatter schema uses enum-based multi-exit ($status with multiple enum values). */
function isEnumMultiExit(fm: unknown): boolean {
/** Check if a frontmatter schema declares "$status" as an enum (the required form for user roles). */
function hasStatusEnum(fm: unknown): boolean {
if (typeof fm !== "object" || fm === null) return false;
const obj = fm as SchemaObj;
const props = obj.properties as Record<string, SchemaObj> | undefined;
if (!props?.$status) return false;
const statusDef = props.$status;
if (!Array.isArray(statusDef.enum)) return false;
// Filter out "_" (wildcard) — if remaining values > 1, it's multi-exit
const statuses = (statusDef.enum as string[]).filter((s) => s !== "_");
return statuses.length > 1;
return Array.isArray(props.$status.enum);
}
/** Extract status values from an enum-based $status field. */
@@ -43,7 +39,7 @@ function getEnumStatuses(fm: SchemaObj): string[] {
if (!props?.$status) return [];
const statusDef = props.$status;
if (!Array.isArray(statusDef.enum)) return [];
return (statusDef.enum as string[]).filter((s) => s !== "_");
return statusDef.enum as string[];
}
/** Get property names from a schema object. */
@@ -194,15 +190,19 @@ function checkOneOfDiscriminant(
}
}
/** Check status-edge consistency for a multi-exit role. */
function checkMultiExitEdges(
/** Check status-edge consistency for a user role. "_" is reserved for $START and rejected here. */
function checkStatusEdges(
roleName: string,
graphKeys: Set<string>,
statusSet: Set<string>,
errors: string[],
): void {
if (graphKeys.has("_")) {
errors.push(`role "${roleName}" is multi-exit but graph uses "_"`);
errors.push(`role "${roleName}" must use explicit $status keys in graph, not "_"`);
return;
}
if (statusSet.has("_")) {
errors.push(`role "${roleName}" $status enum must use explicit values, not "_"`);
return;
}
@@ -255,50 +255,23 @@ function checkRoleConsistency(payload: WorkflowPayload, errors: string[]): void
const statuses = getOneOfStatuses(variants);
checkOneOfDiscriminant(roleName, variants, statuses, errors);
checkMultiExitEdges(roleName, graphKeys, new Set(statuses), errors);
checkStatusEdges(roleName, graphKeys, new Set(statuses), errors);
checkMultiExitMustache(roleName, graphEntry, variants, errors);
} else if (isEnumMultiExit(fm)) {
} else if (hasStatusEnum(fm)) {
const statuses = getEnumStatuses(fm as SchemaObj);
checkMultiExitEdges(roleName, graphKeys, new Set(statuses), errors);
checkStatusEdges(roleName, graphKeys, new Set(statuses), errors);
// For enum-based schemas, mustache vars come from the flat properties
checkSingleExitMustache(roleName, graphEntry, fm as SchemaObj, errors);
checkEnumMustache(roleName, graphEntry, fm as SchemaObj, errors);
} else {
checkSingleExitRole(roleName, graphKeys, graphEntry, fm as SchemaObj | null, errors);
}
}
}
/** Check single-exit role status and mustache. */
function checkSingleExitRole(
roleName: string,
graphKeys: Set<string>,
graphEntry: Record<string, { role: string; prompt: string }>,
fm: SchemaObj | null,
errors: string[],
): void {
if (graphKeys.size > 1 || (graphKeys.size === 1 && !graphKeys.has("_"))) {
if (!graphKeys.has("_")) {
errors.push(`role "${roleName}" is single-exit but graph has no "_" key`);
} else {
errors.push(`role "${roleName}" is single-exit but has status keys other than "_"`);
}
}
const singleTarget = graphEntry._;
if (!singleTarget) return;
const vars = extractMustacheVars(singleTarget.prompt);
const propNames = fm ? getPropertyNames(fm) : new Set<string>();
for (const v of vars) {
if (v === "$status") continue;
if (!propNames.has(v)) {
errors.push(`prompt variable "${v}" not found in role "${roleName}" frontmatter`);
errors.push(
`role "${roleName}" must define "$status" as an enum (or oneOf const) in frontmatter`,
);
}
}
}
/** Check mustache vars in all edge prompts against flat schema properties. */
function checkSingleExitMustache(
function checkEnumMustache(
roleName: string,
graphEntry: Record<string, { role: string; prompt: string }>,
fm: SchemaObj,
+12 -3
View File
@@ -57,9 +57,18 @@ function isGraph(value: unknown): boolean {
if (!isRecord(value)) {
return false;
}
return Object.values(value).every(
(statusMap) => isRecord(statusMap) && Object.values(statusMap).every((t) => isTarget(t)),
);
return Object.entries(value).every(([node, statusMap]) => {
if (!isRecord(statusMap)) {
return false;
}
return Object.entries(statusMap).every(([status, target]) => {
// "_" is only valid as a status key for the $START entry node.
if (status === "_" && node !== "$START") {
return false;
}
return isTarget(target);
});
});
}
/**