Compare commits

..

2 Commits

Author SHA1 Message Date
xiaoju 96ab097198 test(#566): add A4 retry loop and C1 integration round-trip tests
CI / test (pull_request) Successful in 1m42s
- A4: verify frontmatter retry loop produces correct AdapterOutput JSON
- C1: full round-trip test with mock agent → CLI JSON parsing → CAS verification

小橘 <xiaoju@shazhou.work>
2026-05-28 00:10:05 +00:00
xiaoju b95bbae5fc feat(agent): change adapter stdout from plain stepHash to JSON with full metadata (#566)
- Add AdapterOutput type (stepHash, detailHash, role, frontmatter, body, startedAtMs, completedAtMs)
- Update FrontmatterFastPathResult to include frontmatter record
- Change createAgent to output JSON line instead of plain hash
- Update spawnAgent in cli-workflow to parse JSON
- Add adapter-stdout tests (A-group) and spawn-agent-json tests (B-group)
2026-05-27 23:55:40 +00:00
11 changed files with 412 additions and 609 deletions
@@ -1,442 +0,0 @@
import { mkdir, rm, writeFile } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { putSchema } from "@uncaged/json-cas";
import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
import { describe, expect, test } from "vitest";
import { createMarker, deleteMarker } from "../background/index.js";
import { cmdThreadList, cmdThreadShow, cmdThreadStart } from "../commands/thread.js";
import {
appendThreadHistory,
createUwfStore,
loadThreadsIndex,
saveThreadsIndex,
} from "../store.js";
const OUTPUT_SCHEMA = {
type: "object" as const,
properties: {
$status: { type: "string" as const },
},
};
const SIMPLE_WORKFLOW_YAML = `
name: test-current-role
description: Test workflow for currentRole
roles:
roleA:
description: First role
goal: Do A
capabilities: ["coding"]
procedure: Do A
output: |
$status: "ready"
frontmatter:
type: object
required: ["$status"]
properties:
$status: { type: string, enum: ["ready", "not-ready"] }
roleB:
description: Second role
goal: Do B
capabilities: ["coding"]
procedure: Do B
output: |
$status: "done"
frontmatter:
type: object
required: ["$status"]
properties:
$status: { type: string }
graph:
$START:
_:
role: roleA
prompt: "Do A"
location: null
roleA:
ready:
role: roleB
prompt: "Do B"
location: null
not-ready:
role: roleA
prompt: "Try again"
location: null
roleB:
_:
role: $END
prompt: "Done"
location: null
`;
const CONDITIONAL_WORKFLOW_YAML = `
name: test-conditional-role
description: Conditional routing workflow
roles:
roleA:
description: First role
goal: Do A
capabilities: ["coding"]
procedure: Do A
output: |
$status: "pass"
frontmatter:
type: object
required: ["$status"]
properties:
$status: { type: string, enum: ["pass", "fail"] }
roleB:
description: Pass role
goal: Do B
capabilities: ["coding"]
procedure: Do B
output: |
$status: "done"
frontmatter:
type: object
required: ["$status"]
properties:
$status: { type: string }
roleC:
description: Fail role
goal: Do C
capabilities: ["coding"]
procedure: Do C
output: |
$status: "done"
frontmatter:
type: object
required: ["$status"]
properties:
$status: { type: string }
graph:
$START:
_:
role: roleA
prompt: "Do A"
location: null
roleA:
pass:
role: roleB
prompt: "Do B (pass)"
location: null
fail:
role: roleC
prompt: "Do C (fail)"
location: null
roleB:
_:
role: $END
prompt: "Done"
location: null
roleC:
_:
role: $END
prompt: "Done"
location: null
`;
const SINGLE_ROLE_WORKFLOW_YAML = `
name: test-single-role
description: Single role that goes to END
roles:
worker:
description: Worker
goal: Work
capabilities: ["coding"]
procedure: Work
output: |
$status: "done"
frontmatter:
type: object
required: ["$status"]
properties:
$status: { type: string }
graph:
$START:
_:
role: worker
prompt: "Work"
location: null
worker:
_:
role: $END
prompt: "Done"
location: null
`;
/** Helper: insert a completed step node after the current head. */
async function insertStepNode(
storageRoot: string,
threadId: ThreadId,
role: string,
outputPayload: Record<string, unknown>,
): Promise<void> {
const uwf = await createUwfStore(storageRoot);
const index = await loadThreadsIndex(storageRoot);
const head = index[threadId];
if (head === undefined) throw new Error(`thread ${threadId} not in index`);
const outputSchemaHash = await putSchema(uwf.store, OUTPUT_SCHEMA);
const outputHash = await uwf.store.put(outputSchemaHash, outputPayload);
// Use text schema for detail (simple placeholder)
const detailHash = await uwf.store.put(uwf.schemas.text, "detail-placeholder");
// Resolve start hash from head
const headNode = uwf.store.get(head);
if (headNode === null) throw new Error(`head ${head} not found`);
const isStart = headNode.type === uwf.schemas.startNode;
const startHash = isStart ? head : (headNode.payload as { start: CasRef }).start;
const stepHash = (await uwf.store.put(uwf.schemas.stepNode, {
start: startHash,
prev: isStart ? null : head,
role,
prompt: `Do ${role}`,
output: outputHash,
detail: detailHash,
})) as CasRef;
index[threadId] = stepHash;
await saveThreadsIndex(storageRoot, index);
}
describe("currentRole field", () => {
let tmpDir: string;
let storageRoot: string;
async function setup() {
tmpDir = join(
tmpdir(),
`uwf-test-current-role-${Date.now()}-${Math.random().toString(36).slice(2)}`,
);
storageRoot = join(tmpDir, "storage");
await mkdir(storageRoot, { recursive: true });
}
async function teardown() {
if (tmpDir) {
await rm(tmpDir, { recursive: true, force: true });
}
}
// T1: idle at start — currentRole = first role from graph
test("thread show — idle at start returns first role as currentRole", async () => {
await setup();
try {
const wf = join(tmpDir, "test-current-role.yaml");
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
const result = await cmdThreadShow(storageRoot, thread as ThreadId);
expect(result.status).toBe("idle");
expect(result.currentRole).toBe("roleA");
} finally {
await teardown();
}
});
// T2: idle after one step — currentRole = next role
test("thread show — idle after step returns next role as currentRole", async () => {
await setup();
try {
const wf = join(tmpDir, "test-current-role.yaml");
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
await insertStepNode(storageRoot, thread as ThreadId, "roleA", { $status: "ready" });
const result = await cmdThreadShow(storageRoot, thread as ThreadId);
expect(result.status).toBe("idle");
expect(result.currentRole).toBe("roleB");
} finally {
await teardown();
}
});
// T3: completed → currentRole = null
test("thread show — completed thread returns null currentRole", async () => {
await setup();
try {
const wf = join(tmpDir, "test-current-role.yaml");
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
const { thread, workflow } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
const tid = thread as ThreadId;
const index = await loadThreadsIndex(storageRoot);
const head = index[tid]!;
delete index[tid];
await saveThreadsIndex(storageRoot, index);
await appendThreadHistory(storageRoot, {
thread: tid,
workflow,
head,
completedAt: Date.now(),
reason: "completed",
});
const result = await cmdThreadShow(storageRoot, tid);
expect(result.status).toBe("completed");
expect(result.currentRole).toBe(null);
} finally {
await teardown();
}
});
// T4: cancelled → currentRole = null
test("thread show — cancelled thread returns null currentRole", async () => {
await setup();
try {
const wf = join(tmpDir, "test-current-role.yaml");
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
const { thread, workflow } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
const tid = thread as ThreadId;
const index = await loadThreadsIndex(storageRoot);
const head = index[tid]!;
delete index[tid];
await saveThreadsIndex(storageRoot, index);
await appendThreadHistory(storageRoot, {
thread: tid,
workflow,
head,
completedAt: Date.now(),
reason: "cancelled",
});
const result = await cmdThreadShow(storageRoot, tid);
expect(result.status).toBe("cancelled");
expect(result.currentRole).toBe(null);
} finally {
await teardown();
}
});
// T5: running → currentRole = role being executed
test("thread show — running thread returns current role", async () => {
await setup();
try {
const wf = join(tmpDir, "test-current-role.yaml");
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
const { thread, workflow } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
const tid = thread as ThreadId;
await createMarker(storageRoot, {
thread: tid,
workflow,
pid: process.pid,
startedAt: Date.now(),
});
try {
const result = await cmdThreadShow(storageRoot, tid);
expect(result.status).toBe("running");
expect(result.currentRole).toBe("roleA");
} finally {
await deleteMarker(storageRoot, tid);
}
} finally {
await teardown();
}
});
// T6: thread list — mixed statuses with correct currentRole
test("thread list — returns correct currentRole for each status", async () => {
await setup();
try {
const wf = join(tmpDir, "test-current-role.yaml");
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
// idle thread
const idle = await cmdThreadStart(storageRoot, wf, "idle", tmpDir);
const idleId = idle.thread as ThreadId;
// completed thread
const comp = await cmdThreadStart(storageRoot, wf, "completed", tmpDir);
const compId = comp.thread as ThreadId;
const index = await loadThreadsIndex(storageRoot);
const compHead = index[compId]!;
delete index[compId];
await saveThreadsIndex(storageRoot, index);
await appendThreadHistory(storageRoot, {
thread: compId,
workflow: comp.workflow,
head: compHead,
completedAt: Date.now(),
reason: "completed",
});
const list = await cmdThreadList(storageRoot, null, null, null, 0, 100);
const idleItem = list.find((i) => i.thread === idleId);
expect(idleItem).toBeDefined();
expect(idleItem!.currentRole).toBe("roleA");
const compItem = list.find((i) => i.thread === compId);
expect(compItem).toBeDefined();
expect(compItem!.currentRole).toBe(null);
} finally {
await teardown();
}
});
// T7: thread list — idle at start has correct currentRole
test("thread list — idle thread at start has correct currentRole", async () => {
await setup();
try {
const wf = join(tmpDir, "test-current-role.yaml");
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
const list = await cmdThreadList(storageRoot, null, null, null, 0, 100);
const item = list.find((i) => i.thread === (thread as ThreadId));
expect(item).toBeDefined();
expect(item!.currentRole).toBe("roleA");
} finally {
await teardown();
}
});
// T8: conditional routing — $status=pass vs fail
test("thread show — conditional routing selects correct next role", async () => {
await setup();
try {
const wf = join(tmpDir, "test-conditional-role.yaml");
await writeFile(wf, CONDITIONAL_WORKFLOW_YAML, "utf8");
// pass path
const t1 = await cmdThreadStart(storageRoot, wf, "pass test", tmpDir);
await insertStepNode(storageRoot, t1.thread as ThreadId, "roleA", { $status: "pass" });
const r1 = await cmdThreadShow(storageRoot, t1.thread as ThreadId);
expect(r1.currentRole).toBe("roleB");
// fail path
const t2 = await cmdThreadStart(storageRoot, wf, "fail test", tmpDir);
await insertStepNode(storageRoot, t2.thread as ThreadId, "roleA", { $status: "fail" });
const r2 = await cmdThreadShow(storageRoot, t2.thread as ThreadId);
expect(r2.currentRole).toBe("roleC");
} finally {
await teardown();
}
});
// T9: next role is $END → currentRole = null
test("thread show — when next is $END, currentRole is null", async () => {
await setup();
try {
const wf = join(tmpDir, "test-single-role.yaml");
await writeFile(wf, SINGLE_ROLE_WORKFLOW_YAML, "utf8");
const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
// worker → _ maps to $END
await insertStepNode(storageRoot, thread as ThreadId, "worker", {});
const result = await cmdThreadShow(storageRoot, thread as ThreadId);
expect(result.currentRole).toBe(null);
} finally {
await teardown();
}
});
});
+1 -33
View File
@@ -57,21 +57,6 @@ const END_ROLE = "$END";
const START_ROLE = "$START";
export const THREAD_READ_DEFAULT_QUOTA = 4000;
/**
* Derive the current/next role from the workflow graph and chain state.
* Returns null when the next role is $END or evaluation fails.
*/
function resolveCurrentRole(uwf: UwfStore, head: CasRef, workflowRef: CasRef): string | null {
const chain = walkChain(uwf, head);
const { lastRole, lastOutput } = resolveEvaluateArgs(uwf, chain);
const workflow = loadWorkflowPayload(uwf, workflowRef);
const result = evaluate(workflow.graph, lastRole, lastOutput);
if (!result.ok) {
return null;
}
return result.value.role === END_ROLE ? null : result.value.role;
}
const PL_THREAD_START = "7HNQ4B2X";
const PL_MODERATOR = "M3K8V9T1";
const PL_AGENT_SPAWN = "R5J2W8N4";
@@ -336,14 +321,12 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
// Check if thread is running
const runningMarker = await isThreadRunning(storageRoot, threadId);
const status: ThreadStatus = runningMarker !== null ? "running" : "idle";
const currentRole = resolveCurrentRole(uwf, activeHead, workflow);
return {
workflow,
thread: threadId,
head: activeHead,
status,
currentRole,
done: false,
background: null,
};
@@ -358,7 +341,6 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
thread: threadId,
head: hist.head,
status,
currentRole: null,
done: true,
background: null,
};
@@ -369,7 +351,6 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
export type ThreadListItemWithStatus = ThreadListItem & {
status: ThreadStatus;
currentRole: string | null;
};
async function threadListItemFromActive(
@@ -387,13 +368,7 @@ async function threadListItemFromActive(
const runningMarker = await isThreadRunning(storageRoot, threadId);
const status: ThreadStatus = runningMarker !== null ? "running" : "idle";
return {
thread: threadId,
workflow,
head,
status,
currentRole: resolveCurrentRole(uwf, head, workflow),
};
return { thread: threadId, workflow, head, status };
}
async function collectActiveThreads(
@@ -431,7 +406,6 @@ async function collectCompletedThreads(
workflow: entry.workflow,
head: entry.head,
status: entry.reason === "cancelled" ? "cancelled" : "completed",
currentRole: null,
});
}
}
@@ -964,8 +938,6 @@ async function cmdThreadStepBackground(
failStep(plog, `thread not active: ${threadId}`);
}
const uwf = await createUwfStore(storageRoot);
// Spawn detached background process
const scriptPath = process.argv[1];
if (scriptPath === undefined) {
@@ -997,7 +969,6 @@ async function cmdThreadStepBackground(
thread: threadId,
head: headHash,
status: "running",
currentRole: resolveCurrentRole(uwf, headHash, workflowHash),
done: false,
background: true,
},
@@ -1041,7 +1012,6 @@ async function cmdThreadStepOnce(
thread: threadId,
head: headHash,
status: "completed",
currentRole: null,
done: true,
background: null,
};
@@ -1097,14 +1067,12 @@ async function cmdThreadStepOnce(
// Determine status based on whether thread is done and running state
const status: ThreadStatus = done ? "completed" : "idle";
const currentRole = done ? null : afterResult.value.role;
return {
workflow: workflowHash,
thread: threadId,
head: newHead,
status,
currentRole,
done,
background: null,
};
-2
View File
@@ -97,8 +97,6 @@ export type StepOutput = {
thread: ThreadId;
head: CasRef;
status: ThreadStatus;
/** The current or next role. Null when completed, cancelled, or next is $END. */
currentRole: string | null;
done: boolean;
background: boolean | null;
};
@@ -5,13 +5,17 @@ import { tryFrontmatterFastPath } from "../src/frontmatter.js";
// ── Helpers ───────────────────────────────────────────────────────────────────
/** JSON Schema that matches the new status-only AgentFrontmatter. */
const STATUS_ONLY_SCHEMA = {
/** JSON Schema that exactly matches the AgentFrontmatter fields. */
const FRONTMATTER_SCHEMA = {
type: "object",
properties: {
status: { anyOf: [{ type: "string" }, { type: "null" }] },
next: { anyOf: [{ type: "string" }, { type: "null" }] },
confidence: { anyOf: [{ type: "number" }, { type: "null" }] },
artifacts: { type: "array", items: { type: "string" } },
scope: { type: "string" },
},
required: ["status"],
required: ["status", "next", "confidence", "artifacts", "scope"],
additionalProperties: false,
};
@@ -52,41 +56,24 @@ async function makeStoreWithSchema(schema: Record<string, unknown>) {
return { store, schemaHash };
}
// ── STANDARD_KEYS ────────────────────────────────────────────────────────────
describe("STANDARD_KEYS contains only status", () => {
test("STANDARD_KEYS is ['status']", async () => {
// We verify indirectly: defaultCandidate (no schema fields) returns only { status }
const { store, schemaHash } = await makeStoreWithSchema({
type: "object",
properties: {
status: { anyOf: [{ type: "string" }, { type: "null" }] },
},
});
const raw = "---\nstatus: done\n---\n\nBody.";
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
expect(result).not.toBeNull();
const node = store.get(result!.outputHash);
expect(node).not.toBeNull();
const payload = node!.payload as Record<string, unknown>;
expect(payload.status).toBe("done");
// Legacy fields must NOT be present
expect(payload.next).toBeUndefined();
expect(payload.confidence).toBeUndefined();
expect(payload.artifacts).toBeUndefined();
expect(payload.scope).toBeUndefined();
});
});
// ── Happy path ─────────────────────────────────────────────────────────────────
describe("tryFrontmatterFastPath — happy path", () => {
test("parses valid frontmatter and returns outputHash + stripped body", async () => {
const { store, schemaHash } = await makeStoreWithSchema(STATUS_ONLY_SCHEMA);
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
const raw = ["---", "status: done", "---", "", "## Summary", "Work is complete."].join("\n");
const raw = [
"---",
"status: done",
"next: reviewer",
"confidence: 0.9",
"artifacts: [src/foo.ts]",
"scope: role",
"---",
"",
"## Summary",
"Work is complete.",
].join("\n");
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
@@ -98,10 +85,11 @@ describe("tryFrontmatterFastPath — happy path", () => {
expect((result?.outputHash ?? "").length).toBeGreaterThan(0);
});
test("stored CAS node payload has only status", async () => {
const { store, schemaHash } = await makeStoreWithSchema(STATUS_ONLY_SCHEMA);
test("stored CAS node payload matches frontmatter fields", async () => {
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
const raw = "---\nstatus: done\n---\n\nBody.";
const raw =
"---\nstatus: done\nnext: null\nconfidence: null\nartifacts: []\nscope: role\n---\n\nBody.";
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
expect(result).not.toBeNull();
@@ -110,29 +98,10 @@ describe("tryFrontmatterFastPath — happy path", () => {
expect(node).not.toBeNull();
const payload = node!.payload as Record<string, unknown>;
expect(payload.status).toBe("done");
expect(Object.keys(payload)).toEqual(["status"]);
});
});
// ── Legacy fields in input are ignored ──────────────────────────────────────
describe("tryFrontmatterFastPath — legacy fields ignored", () => {
test("legacy fields in input do not appear in CAS output", async () => {
const { store, schemaHash } = await makeStoreWithSchema(STATUS_ONLY_SCHEMA);
const raw =
"---\nstatus: done\nnext: reviewer\nconfidence: 0.9\nartifacts: [a.ts]\nscope: thread\n---\n\nBody.";
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
expect(result).not.toBeNull();
const node = store.get(result!.outputHash);
const payload = node!.payload as Record<string, unknown>;
expect(payload.status).toBe("done");
expect(payload.next).toBeUndefined();
expect(payload.confidence).toBeUndefined();
expect(payload.artifacts).toBeUndefined();
expect(payload.scope).toBeUndefined();
expect(payload.next).toBeNull();
expect(payload.confidence).toBeNull();
expect(payload.artifacts).toEqual([]);
expect(payload.scope).toBe("role");
});
});
@@ -140,7 +109,7 @@ describe("tryFrontmatterFastPath — legacy fields ignored", () => {
describe("tryFrontmatterFastPath — fallback: no frontmatter", () => {
test("returns null for plain markdown without frontmatter block", async () => {
const { store, schemaHash } = await makeStoreWithSchema(STATUS_ONLY_SCHEMA);
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
const result = await tryFrontmatterFastPath(
"This is plain markdown without any frontmatter.",
@@ -152,13 +121,35 @@ describe("tryFrontmatterFastPath — fallback: no frontmatter", () => {
});
});
// ── Fallback: invalid frontmatter ─────────────────────────────────────────────
describe("tryFrontmatterFastPath — fallback: invalid frontmatter", () => {
test("returns null when confidence is out of range [0, 1]", async () => {
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
const raw = "---\nstatus: done\nconfidence: 1.5\nscope: role\n---\n\nBody.";
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
expect(result).toBeNull();
});
test("returns null when next contains whitespace", async () => {
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
const raw = "---\nstatus: done\nnext: some role\nscope: role\n---\n\nBody.";
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
expect(result).toBeNull();
});
});
// ── Fallback: schema mismatch ─────────────────────────────────────────────────
describe("tryFrontmatterFastPath — fallback: schema mismatch", () => {
test("returns null when outputSchema requires fields not in frontmatter", async () => {
const { store, schemaHash } = await makeStoreWithSchema(STRICT_SCHEMA);
const raw = "---\nstatus: done\n---\n\nBody.";
const raw = "---\nstatus: done\nscope: role\n---\n\nBody.";
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
expect(result).toBeNull();
@@ -203,7 +194,7 @@ describe("tryFrontmatterFastPath — role-specific fields", () => {
test("returns null when required role-specific field is missing", async () => {
const { store, schemaHash } = await makeStoreWithSchema(REVIEWER_SCHEMA);
const raw = "---\nstatus: done\n---\n\nBody.";
const raw = "---\nstatus: done\nscope: role\n---\n\nBody.";
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
expect(result).toBeNull();
@@ -13,7 +13,7 @@ import { extractSchemaFields } from "./build-output-format-instruction.js";
const log = createLogger({ sink: { kind: "stderr" } });
const STANDARD_KEYS = ["status"] as const;
const STANDARD_KEYS = ["status", "next", "confidence", "artifacts", "scope"] as const;
type StandardKey = (typeof STANDARD_KEYS)[number];
@@ -63,6 +63,10 @@ function parseRawFrontmatterFields(raw: string): Record<string, unknown> {
function defaultCandidate(frontmatter: AgentFrontmatter): Record<string, unknown> {
return {
status: frontmatter.status,
next: frontmatter.next,
confidence: frontmatter.confidence,
artifacts: [...frontmatter.artifacts],
scope: frontmatter.scope,
};
}
@@ -70,6 +74,14 @@ function pickStandardField(frontmatter: AgentFrontmatter, key: StandardKey): unk
switch (key) {
case "status":
return frontmatter.status;
case "next":
return frontmatter.next;
case "confidence":
return frontmatter.confidence;
case "artifacts":
return [...frontmatter.artifacts];
case "scope":
return frontmatter.scope;
}
}
@@ -87,6 +99,9 @@ function pickFieldValue(
}
const coerced = pickStandardField(frontmatter, field);
if (field === "artifacts" || field === "scope") {
return coerced;
}
if (coerced !== null) {
return coerced;
}
@@ -96,8 +111,8 @@ function pickFieldValue(
/**
* Build a CAS candidate object from schema property keys and parsed frontmatter.
*
* When the schema has no inspectable properties, falls back to the standard
* agent frontmatter field (status only).
* When the schema has no inspectable properties, falls back to the five standard
* agent frontmatter fields for backward compatibility.
*/
function buildCandidate(
frontmatter: AgentFrontmatter,
-2
View File
@@ -151,7 +151,6 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
const startedAtMs = Date.now();
let agentResult = await runWithMessage("agent run failed", () => options.run(ctx));
agentResult.output = agentResult.output.trimStart();
// Preserve the primary detail from the first run — it contains the full
// tool-call turn history. Continuation retries only fix frontmatter
@@ -170,7 +169,6 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
agentResult = await runWithMessage("agent continue failed", () =>
options.continue(agentResult.sessionId, correctionMessage, ctx.meta.store),
);
agentResult.output = agentResult.output.trimStart();
extracted = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
}
@@ -41,13 +41,31 @@ describe("parseFrontmatterMarkdown", () => {
});
});
describe("status-only frontmatter", () => {
it("parses status-only frontmatter", () => {
const raw = "---\nstatus: done\n---\nbody";
describe("full frontmatter document", () => {
it("parses all fields from a well-formed document", () => {
const raw = `---
status: done
next: reviewer
confidence: 0.9
artifacts:
- src/foo.ts
- src/bar.ts
scope: thread
---
## Summary
Everything looks good.`;
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter).not.toBeNull();
expect(result.frontmatter).toEqual({ status: "done" });
expect(result.body).toBe("body");
const fm = result.frontmatter!;
expect(fm.status).toBe("done");
expect(fm.next).toBe("reviewer");
expect(fm.confidence).toBe(0.9);
expect(fm.artifacts).toEqual(["src/foo.ts", "src/bar.ts"]);
expect(fm.scope).toBe("thread");
expect(result.body).toBe("## Summary\n\nEverything looks good.");
});
it("strips leading newline from body", () => {
@@ -69,22 +87,6 @@ describe("parseFrontmatterMarkdown", () => {
});
});
describe("ignores legacy fields", () => {
it("legacy fields next/confidence/artifacts/scope are NOT present on result", () => {
const raw =
"---\nstatus: done\nnext: reviewer\nconfidence: 0.9\nartifacts:\n - src/foo.ts\nscope: thread\n---\n\nBody.";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter).not.toBeNull();
const fm = result.frontmatter!;
expect(fm.status).toBe("done");
// Legacy fields must not exist on the object at all
expect("next" in fm).toBe(false);
expect("confidence" in fm).toBe(false);
expect("artifacts" in fm).toBe(false);
expect("scope" in fm).toBe(false);
});
});
describe("status field", () => {
it.each([
"done",
@@ -104,18 +106,109 @@ describe("parseFrontmatterMarkdown", () => {
});
it("returns null status when omitted", () => {
const raw = "---\nfoo: bar\n---\nbody";
const raw = "---\nconfidence: 0.5\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.status).toBeNull();
});
});
describe("confidence field", () => {
it("parses integer as number", () => {
const raw = "---\nconfidence: 1\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.confidence).toBe(1);
});
it("parses decimal", () => {
const raw = "---\nconfidence: 0.75\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.confidence).toBe(0.75);
});
it("returns null when omitted", () => {
const raw = "---\nstatus: done\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.confidence).toBeNull();
});
it("returns null for non-numeric value", () => {
const raw = "---\nconfidence: high\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.confidence).toBeNull();
});
});
describe("artifacts field", () => {
it("parses block sequence", () => {
const raw = "---\nartifacts:\n - a.ts\n - b.ts\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.artifacts).toEqual(["a.ts", "b.ts"]);
});
it("parses inline sequence", () => {
const raw = "---\nartifacts: [a.ts, b.ts]\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.artifacts).toEqual(["a.ts", "b.ts"]);
});
it("returns empty array when omitted", () => {
const raw = "---\nstatus: done\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.artifacts).toEqual([]);
});
it("wraps single scalar in array", () => {
const raw = "---\nartifacts: only-one.ts\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.artifacts).toEqual(["only-one.ts"]);
});
});
describe("scope field", () => {
it('parses scope "role"', () => {
const raw = "---\nscope: role\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.scope).toBe("role");
});
it('parses scope "thread"', () => {
const raw = "---\nscope: thread\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.scope).toBe("thread");
});
it('defaults to "role" when omitted', () => {
const raw = "---\nstatus: done\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.scope).toBe("role");
});
it('defaults to "role" for unknown scope value', () => {
const raw = "---\nscope: global\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.scope).toBe("role");
});
});
describe("next field", () => {
it("parses a role name", () => {
const raw = "---\nnext: planner\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.next).toBe("planner");
});
it("returns null when omitted", () => {
const raw = "---\nstatus: done\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.next).toBeNull();
});
});
describe("unknown fields", () => {
it("ignores unknown keys silently", () => {
const raw = "---\nunknown_field: some_value\nstatus: done\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.status).toBe("done");
expect(Object.keys(result.frontmatter!)).toEqual(["status"]);
});
});
@@ -128,58 +221,123 @@ describe("parseFrontmatterMarkdown", () => {
});
describe("empty frontmatter block", () => {
it("parses empty frontmatter with status null", () => {
it("parses empty frontmatter and uses all defaults", () => {
const raw = "---\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter).not.toBeNull();
const fm = result.frontmatter!;
expect(fm.status).toBeNull();
expect(Object.keys(fm)).toEqual(["status"]);
expect(fm.next).toBeNull();
expect(fm.confidence).toBeNull();
expect(fm.artifacts).toEqual([]);
expect(fm.scope).toBe("role");
expect(result.body).toBe("body");
});
});
describe("AgentFrontmatter has exactly one field", () => {
it("has only status key", () => {
const fm: AgentFrontmatter = { status: null };
expect(Object.keys(fm)).toEqual(["status"]);
});
});
describe("FrontmatterValidationError only has status variant", () => {
it("status variant is valid", () => {
const err: import("../src/index.js").FrontmatterValidationError = {
field: "status",
message: "test",
};
expect(err.field).toBe("status");
});
});
});
// ── validateFrontmatter ──────────────────────────────────────────────────────
function validFm(overrides: Partial<AgentFrontmatter> = {}): AgentFrontmatter {
return {
status: "done",
next: null,
confidence: null,
artifacts: [],
scope: "role",
...overrides,
};
}
describe("validateFrontmatter", () => {
it("returns no errors for a valid status", () => {
const errors = validateFrontmatter({ status: "done" });
it("returns no errors for a fully valid frontmatter", () => {
const errors = validateFrontmatter(validFm());
expect(errors).toHaveLength(0);
});
it("returns no errors when status is null", () => {
const errors = validateFrontmatter({ status: null });
expect(errors).toHaveLength(0);
});
it("returns error for invalid status", () => {
const errors = validateFrontmatter({ status: "bogus" as never });
expect(errors).toHaveLength(1);
expect(errors[0]?.field).toBe("status");
});
it("no validation for next/confidence/artifacts/scope — fields do not exist", () => {
// AgentFrontmatter only has status — verify at runtime
const fm: AgentFrontmatter = { status: "done" };
expect(Object.keys(fm)).toEqual(["status"]);
it("returns no errors when all nullable fields are null", () => {
const fm: AgentFrontmatter = {
status: null,
next: null,
confidence: null,
artifacts: [],
scope: "role",
};
expect(validateFrontmatter(fm)).toHaveLength(0);
});
describe("confidence validation", () => {
it("accepts 0.0", () => {
expect(validateFrontmatter(validFm({ confidence: 0 }))).toHaveLength(0);
});
it("accepts 1.0", () => {
expect(validateFrontmatter(validFm({ confidence: 1 }))).toHaveLength(0);
});
it("rejects value below 0", () => {
const errors = validateFrontmatter(validFm({ confidence: -0.1 }));
expect(errors).toHaveLength(1);
expect(errors[0]?.field).toBe("confidence");
});
it("rejects value above 1", () => {
const errors = validateFrontmatter(validFm({ confidence: 1.01 }));
expect(errors).toHaveLength(1);
expect(errors[0]?.field).toBe("confidence");
});
});
describe("next validation", () => {
it("accepts a simple role name", () => {
expect(validateFrontmatter(validFm({ next: "reviewer" }))).toHaveLength(0);
});
it("accepts kebab-case role name", () => {
expect(validateFrontmatter(validFm({ next: "code-reviewer" }))).toHaveLength(0);
});
it("rejects role name with whitespace", () => {
const errors = validateFrontmatter(validFm({ next: "role name" }));
expect(errors).toHaveLength(1);
expect(errors[0]?.field).toBe("next");
});
});
describe("artifacts validation", () => {
it("accepts non-empty path strings", () => {
expect(
validateFrontmatter(validFm({ artifacts: ["src/foo.ts", "src/bar.ts"] })),
).toHaveLength(0);
});
it("rejects empty string artifact entries", () => {
const errors = validateFrontmatter(validFm({ artifacts: [""] }));
expect(errors).toHaveLength(1);
expect(errors[0]?.field).toBe("artifacts");
});
it("rejects whitespace-only artifact entries", () => {
const errors = validateFrontmatter(validFm({ artifacts: [" "] }));
expect(errors).toHaveLength(1);
expect(errors[0]?.field).toBe("artifacts");
});
});
describe("multiple errors", () => {
it("reports multiple violations at once", () => {
const fm: AgentFrontmatter = {
status: "done",
next: "bad role",
confidence: 2,
artifacts: [""],
scope: "role",
};
const errors = validateFrontmatter(fm);
const fields = errors.map((e) => e.field);
expect(fields).toContain("next");
expect(fields).toContain("confidence");
expect(fields).toContain("artifacts");
});
});
});
@@ -1,5 +1,6 @@
import type {
AgentFrontmatter,
FrontmatterScope,
FrontmatterStatus,
FrontmatterValidationError,
ParsedFrontmatterMarkdown,
@@ -158,12 +159,40 @@ function parseMinimalYaml(yaml: string): Record<string, YamlValue> {
const VALID_STATUS: readonly FrontmatterStatus[] = ["done", "needs_input", "in_progress", "failed"];
const VALID_SCOPE: readonly FrontmatterScope[] = ["role", "thread"];
function coerceStatus(raw: YamlValue): FrontmatterStatus | null {
if (raw === null || raw === undefined) return null;
const s = String(raw).trim().toLowerCase();
return VALID_STATUS.includes(s as FrontmatterStatus) ? (s as FrontmatterStatus) : null;
}
function coerceNext(raw: YamlValue): string | null {
if (raw === null || raw === undefined) return null;
const s = String(raw).trim();
return s === "" ? null : s;
}
function coerceConfidence(raw: YamlValue): number | null {
if (raw === null || raw === undefined) return null;
const n = typeof raw === "number" ? raw : Number(String(raw).trim());
if (Number.isNaN(n)) return null;
return n;
}
function coerceArtifacts(raw: YamlValue): readonly string[] {
if (raw === null || raw === undefined) return [];
if (Array.isArray(raw)) return raw.map(String).filter((s) => s !== "");
const s = String(raw).trim();
return s === "" ? [] : [s];
}
function coerceScope(raw: YamlValue): FrontmatterScope {
if (raw === null || raw === undefined) return "role";
const s = String(raw).trim().toLowerCase();
return VALID_SCOPE.includes(s as FrontmatterScope) ? (s as FrontmatterScope) : "role";
}
// ── Public API ───────────────────────────────────────────────────────────────
/**
@@ -191,6 +220,10 @@ export function parseFrontmatterMarkdown(raw: string): ParsedFrontmatterMarkdown
const frontmatter: AgentFrontmatter = {
status: coerceStatus(fields.status ?? null),
next: coerceNext(fields.next ?? null),
confidence: coerceConfidence(fields.confidence ?? null),
artifacts: coerceArtifacts(fields.artifacts ?? null),
scope: coerceScope(fields.scope ?? null),
};
return { frontmatter, body };
@@ -202,7 +235,11 @@ export function parseFrontmatterMarkdown(raw: string): ParsedFrontmatterMarkdown
* An empty array means the frontmatter is valid.
*
* Validated constraints:
* - `status` — must be one of the FrontmatterStatus literals (if non-null)
* - `status` — must be one of the FrontmatterStatus literals (if non-null)
* - `confidence` — must be in [0.0, 1.0] (if non-null)
* - `next` — must be a non-empty string with no whitespace (if non-null)
* - `artifacts` — each entry must be a non-empty string
* - `scope` — must be one of the FrontmatterScope literals
*/
export function validateFrontmatter(
frontmatter: AgentFrontmatter,
@@ -216,5 +253,39 @@ export function validateFrontmatter(
});
}
if (frontmatter.confidence !== null) {
if (frontmatter.confidence < 0 || frontmatter.confidence > 1) {
errors.push({
field: "confidence",
message: `confidence ${frontmatter.confidence} is out of range; must be between 0.0 and 1.0 inclusive`,
});
}
}
if (frontmatter.next !== null) {
if (frontmatter.next.trim() === "") {
errors.push({ field: "next", message: "next must be a non-empty string when present" });
} else if (/\s/.test(frontmatter.next)) {
errors.push({
field: "next",
message: `next "${frontmatter.next}" must not contain whitespace`,
});
}
}
for (const artifact of frontmatter.artifacts) {
if (artifact.trim() === "") {
errors.push({ field: "artifacts", message: "artifact entries must be non-empty strings" });
break;
}
}
if (!VALID_SCOPE.includes(frontmatter.scope)) {
errors.push({
field: "scope",
message: `invalid scope "${frontmatter.scope}"; must be one of: ${VALID_SCOPE.join(", ")}`,
});
}
return errors;
}
@@ -1,6 +1,7 @@
export { parseFrontmatterMarkdown, validateFrontmatter } from "./frontmatter-markdown.js";
export type {
AgentFrontmatter,
FrontmatterScope,
FrontmatterStatus,
FrontmatterValidationError,
ParsedFrontmatterMarkdown,
@@ -1,5 +1,5 @@
/**
* Frontmatter Markdown — agent output format.
* Frontmatter Markdown — agent output format (RFC #351 Phase 1).
*
* An agent response is a Markdown document with an optional YAML frontmatter
* block at the top. The frontmatter carries structured signals that the
@@ -9,12 +9,17 @@
*
* ---
* status: done
* next: reviewer
* confidence: 0.9
* artifacts:
* - src/foo.ts
* scope: role
* ---
*
* ... free-form markdown body ...
*
* Only `status` is a standard frontmatter field. All other fields are
* role-specific and defined by the output schema.
* All frontmatter fields are optional at the parse level. `validateFrontmatter`
* enforces the constraints documented on each field below.
*/
// ── Vocabulary types ─────────────────────────────────────────────────────────
@@ -29,12 +34,20 @@
*/
export type FrontmatterStatus = "done" | "needs_input" | "in_progress" | "failed";
/**
* Scope of frontmatter signals.
*
* - `role` — signals apply to the current role execution only (default)
* - `thread` — signals are suggestions for the entire thread moderator
*/
export type FrontmatterScope = "role" | "thread";
// ── Core frontmatter schema ──────────────────────────────────────────────────
/**
* Parsed and validated frontmatter from an agent response.
*
* Only `status` is a standard field. All other fields are role-specific.
* All fields use explicit `T | null` (no optional `?:` per convention).
*/
export type AgentFrontmatter = {
/**
@@ -42,6 +55,32 @@ export type AgentFrontmatter = {
* Null when omitted — engine treats it as "done" for backward compatibility.
*/
status: FrontmatterStatus | null;
/**
* Suggested next role name for the moderator.
* The moderator is NOT obligated to follow this — it is advisory only.
* Null when the agent has no preference.
*/
next: string | null;
/**
* Agent's self-assessed confidence in its output (0.0 – 1.0 inclusive).
* Null when omitted.
*/
confidence: number | null;
/**
* Relative file paths or CAS hashes the agent considers its primary outputs.
* Used for GC ref-tracing and human-readable summaries.
* Empty array when omitted (never null — an absent list is an empty list).
*/
artifacts: readonly string[];
/**
* Scope of the frontmatter signals.
* Defaults to "role" when omitted.
*/
scope: FrontmatterScope;
};
// ── Parse output ─────────────────────────────────────────────────────────────
@@ -64,4 +103,9 @@ export type ParsedFrontmatterMarkdown = {
// ── Validation error ─────────────────────────────────────────────────────────
export type FrontmatterValidationError = { field: "status"; message: string };
export type FrontmatterValidationError =
| { field: "status"; message: string }
| { field: "next"; message: string }
| { field: "confidence"; message: string }
| { field: "artifacts"; message: string }
| { field: "scope"; message: string };
+1
View File
@@ -8,6 +8,7 @@ export { generateDeveloperReference } from "./developer-reference.js";
export { env } from "./env.js";
export type {
AgentFrontmatter,
FrontmatterScope,
FrontmatterStatus,
FrontmatterValidationError,
ParsedFrontmatterMarkdown,