diff --git a/packages/eval/__tests__/builtin-judges.test.ts b/packages/eval/__tests__/builtin-judges.test.ts index 9b4768d..92e4f5e 100644 --- a/packages/eval/__tests__/builtin-judges.test.ts +++ b/packages/eval/__tests__/builtin-judges.test.ts @@ -91,6 +91,29 @@ describe("frontmatter-compliance judge", () => { const result = await runFrontmatterJudge("T4"); expect(result.score).toBe(0); }); + + test("parsed object output with $status → score 1.0", async () => { + mockedReadSteps.mockReturnValue([ + makeStep({ role: "a", output: { $status: "done", summary: "fixed" } as unknown as string }), + makeStep({ role: "b", output: { $status: "reviewed" } as unknown as string }), + ]); + + const result = await runFrontmatterJudge("T5"); + const data = result.data as { stepsTotal: number; stepsValid: number; invalidSteps: unknown[] }; + + expect(result.score).toBe(1.0); + expect(data.stepsTotal).toBe(2); + expect(data.stepsValid).toBe(2); + }); + + test("parsed object output missing $status → score 0", async () => { + mockedReadSteps.mockReturnValue([ + makeStep({ role: "a", output: { summary: "no status field" } as unknown as string }), + ]); + + const result = await runFrontmatterJudge("T6"); + expect(result.score).toBe(0); + }); }); describe("token-stats judge", () => { diff --git a/packages/eval/src/judge/builtin/frontmatter.ts b/packages/eval/src/judge/builtin/frontmatter.ts index 46ab5a3..0c869e3 100644 --- a/packages/eval/src/judge/builtin/frontmatter.ts +++ b/packages/eval/src/judge/builtin/frontmatter.ts @@ -39,6 +39,16 @@ function extractFrontmatterYaml(output: unknown): string | null { /** Validate a single step's frontmatter, returning a list of errors (empty = valid). */ function validateStepFrontmatter(output: unknown): string[] { + // CAS stores the extracted output as a JSON object after the extract pipeline. + // Accept both: parsed object (from step.output) or raw markdown string. + if (typeof output === "object" && output !== null && !Array.isArray(output)) { + const status = (output as Record).$status; + if (typeof status !== "string" || status.trim() === "") { + return ["$status field is missing or not a non-empty string"]; + } + return []; + } + const yaml = extractFrontmatterYaml(output); if (yaml === null) { return ["output does not begin with a valid '---' frontmatter block"];