feat(workflow-util): add frontmatter markdown parser and validator

Phase 1 of RFC #351 — define AgentFrontmatter type, parseFrontmatterMarkdown()
and validateFrontmatter() with 45 tests.

- Built-in minimal YAML parser (no new deps)
- Never throws on malformed input — degrades gracefully
- All fields use T | null (no optional properties)

Refs #351
This commit is contained in:
2026-05-19 04:41:56 +00:00
parent ba90214af6
commit 43978360ff
5 changed files with 764 additions and 0 deletions
@@ -0,0 +1,343 @@
import { describe, expect, it } from "vitest";
import type { AgentFrontmatter } from "../src/index.js";
import { parseFrontmatterMarkdown, validateFrontmatter } from "../src/index.js";
// ── parseFrontmatterMarkdown ─────────────────────────────────────────────────
describe("parseFrontmatterMarkdown", () => {
describe("no frontmatter", () => {
it("returns null frontmatter and full text as body when no fence", () => {
const raw = "Just some markdown text.\n\n## Section\n\nContent.";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter).toBeNull();
expect(result.body).toBe(raw);
});
it("returns null frontmatter when --- appears mid-document", () => {
const raw = "# Heading\n\n---\n\nContent.";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter).toBeNull();
expect(result.body).toBe(raw);
});
it("returns null frontmatter when opening fence is not followed by newline", () => {
const raw = "--- inline content ---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter).toBeNull();
expect(result.body).toBe(raw);
});
it("returns null frontmatter when no closing fence", () => {
const raw = "---\nstatus: done\nbody without close";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter).toBeNull();
expect(result.body).toBe(raw);
});
it("handles empty string", () => {
const result = parseFrontmatterMarkdown("");
expect(result.frontmatter).toBeNull();
expect(result.body).toBe("");
});
});
describe("full frontmatter document", () => {
it("parses all fields from a well-formed document", () => {
const raw = `---
status: done
next: reviewer
confidence: 0.9
artifacts:
- src/foo.ts
- src/bar.ts
scope: thread
---
## Summary
Everything looks good.`;
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter).not.toBeNull();
const fm = result.frontmatter!;
expect(fm.status).toBe("done");
expect(fm.next).toBe("reviewer");
expect(fm.confidence).toBe(0.9);
expect(fm.artifacts).toEqual(["src/foo.ts", "src/bar.ts"]);
expect(fm.scope).toBe("thread");
expect(result.body).toBe("## Summary\n\nEverything looks good.");
});
it("strips leading newline from body", () => {
const raw = "---\nstatus: done\n---\n\nbody here";
const result = parseFrontmatterMarkdown(raw);
expect(result.body).toBe("body here");
});
it("body is empty string when nothing after closing fence", () => {
const raw = "---\nstatus: done\n---\n";
const result = parseFrontmatterMarkdown(raw);
expect(result.body).toBe("");
});
it("body is empty string when document ends exactly at closing fence", () => {
const raw = "---\nstatus: done\n---";
const result = parseFrontmatterMarkdown(raw);
expect(result.body).toBe("");
});
});
describe("status field", () => {
it.each([
"done",
"needs_input",
"in_progress",
"failed",
] as const)('parses status "%s"', (status) => {
const raw = `---\nstatus: ${status}\n---\nbody`;
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.status).toBe(status);
});
it("returns null status for unknown value", () => {
const raw = "---\nstatus: unknown_value\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.status).toBeNull();
});
it("returns null status when omitted", () => {
const raw = "---\nconfidence: 0.5\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.status).toBeNull();
});
});
describe("confidence field", () => {
it("parses integer as number", () => {
const raw = "---\nconfidence: 1\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.confidence).toBe(1);
});
it("parses decimal", () => {
const raw = "---\nconfidence: 0.75\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.confidence).toBe(0.75);
});
it("returns null when omitted", () => {
const raw = "---\nstatus: done\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.confidence).toBeNull();
});
it("returns null for non-numeric value", () => {
const raw = "---\nconfidence: high\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.confidence).toBeNull();
});
});
describe("artifacts field", () => {
it("parses block sequence", () => {
const raw = "---\nartifacts:\n - a.ts\n - b.ts\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.artifacts).toEqual(["a.ts", "b.ts"]);
});
it("parses inline sequence", () => {
const raw = "---\nartifacts: [a.ts, b.ts]\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.artifacts).toEqual(["a.ts", "b.ts"]);
});
it("returns empty array when omitted", () => {
const raw = "---\nstatus: done\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.artifacts).toEqual([]);
});
it("wraps single scalar in array", () => {
const raw = "---\nartifacts: only-one.ts\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.artifacts).toEqual(["only-one.ts"]);
});
});
describe("scope field", () => {
it('parses scope "role"', () => {
const raw = "---\nscope: role\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.scope).toBe("role");
});
it('parses scope "thread"', () => {
const raw = "---\nscope: thread\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.scope).toBe("thread");
});
it('defaults to "role" when omitted', () => {
const raw = "---\nstatus: done\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.scope).toBe("role");
});
it('defaults to "role" for unknown scope value', () => {
const raw = "---\nscope: global\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.scope).toBe("role");
});
});
describe("next field", () => {
it("parses a role name", () => {
const raw = "---\nnext: planner\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.next).toBe("planner");
});
it("returns null when omitted", () => {
const raw = "---\nstatus: done\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.next).toBeNull();
});
});
describe("unknown fields", () => {
it("ignores unknown keys silently", () => {
const raw = "---\nunknown_field: some_value\nstatus: done\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.status).toBe("done");
});
});
describe("YAML comments", () => {
it("ignores YAML comment lines", () => {
const raw = "---\n# this is a comment\nstatus: done\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter?.status).toBe("done");
});
});
describe("empty frontmatter block", () => {
it("parses empty frontmatter and uses all defaults", () => {
const raw = "---\n---\nbody";
const result = parseFrontmatterMarkdown(raw);
expect(result.frontmatter).not.toBeNull();
const fm = result.frontmatter!;
expect(fm.status).toBeNull();
expect(fm.next).toBeNull();
expect(fm.confidence).toBeNull();
expect(fm.artifacts).toEqual([]);
expect(fm.scope).toBe("role");
expect(result.body).toBe("body");
});
});
});
// ── validateFrontmatter ──────────────────────────────────────────────────────
function validFm(overrides: Partial<AgentFrontmatter> = {}): AgentFrontmatter {
return {
status: "done",
next: null,
confidence: null,
artifacts: [],
scope: "role",
...overrides,
};
}
describe("validateFrontmatter", () => {
it("returns no errors for a fully valid frontmatter", () => {
const errors = validateFrontmatter(validFm());
expect(errors).toHaveLength(0);
});
it("returns no errors when all nullable fields are null", () => {
const fm: AgentFrontmatter = {
status: null,
next: null,
confidence: null,
artifacts: [],
scope: "role",
};
expect(validateFrontmatter(fm)).toHaveLength(0);
});
describe("confidence validation", () => {
it("accepts 0.0", () => {
expect(validateFrontmatter(validFm({ confidence: 0 }))).toHaveLength(0);
});
it("accepts 1.0", () => {
expect(validateFrontmatter(validFm({ confidence: 1 }))).toHaveLength(0);
});
it("rejects value below 0", () => {
const errors = validateFrontmatter(validFm({ confidence: -0.1 }));
expect(errors).toHaveLength(1);
expect(errors[0]?.field).toBe("confidence");
});
it("rejects value above 1", () => {
const errors = validateFrontmatter(validFm({ confidence: 1.01 }));
expect(errors).toHaveLength(1);
expect(errors[0]?.field).toBe("confidence");
});
});
describe("next validation", () => {
it("accepts a simple role name", () => {
expect(validateFrontmatter(validFm({ next: "reviewer" }))).toHaveLength(0);
});
it("accepts kebab-case role name", () => {
expect(validateFrontmatter(validFm({ next: "code-reviewer" }))).toHaveLength(0);
});
it("rejects role name with whitespace", () => {
const errors = validateFrontmatter(validFm({ next: "role name" }));
expect(errors).toHaveLength(1);
expect(errors[0]?.field).toBe("next");
});
});
describe("artifacts validation", () => {
it("accepts non-empty path strings", () => {
expect(
validateFrontmatter(validFm({ artifacts: ["src/foo.ts", "src/bar.ts"] })),
).toHaveLength(0);
});
it("rejects empty string artifact entries", () => {
const errors = validateFrontmatter(validFm({ artifacts: [""] }));
expect(errors).toHaveLength(1);
expect(errors[0]?.field).toBe("artifacts");
});
it("rejects whitespace-only artifact entries", () => {
const errors = validateFrontmatter(validFm({ artifacts: [" "] }));
expect(errors).toHaveLength(1);
expect(errors[0]?.field).toBe("artifacts");
});
});
describe("multiple errors", () => {
it("reports multiple violations at once", () => {
const fm: AgentFrontmatter = {
status: "done",
next: "bad role",
confidence: 2,
artifacts: [""],
scope: "role",
};
const errors = validateFrontmatter(fm);
const fields = errors.map((e) => e.field);
expect(fields).toContain("next");
expect(fields).toContain("confidence");
expect(fields).toContain("artifacts");
});
});
});
@@ -0,0 +1,291 @@
import type {
AgentFrontmatter,
FrontmatterScope,
FrontmatterStatus,
FrontmatterValidationError,
ParsedFrontmatterMarkdown,
} from "./types.js";
// ── YAML frontmatter extractor ───────────────────────────────────────────────
const FENCE = "---";
/**
* Split a raw agent response into a YAML string (or null) and a markdown body.
*
* A frontmatter block MUST:
* 1. Start at character position 0 with `---` (no leading whitespace / BOM).
* 2. Be closed by a second `---` on its own line.
*
* Anything that doesn't match this shape is returned verbatim as the body.
*/
function splitFrontmatter(raw: string): { yaml: string | null; body: string } {
if (!raw.startsWith(FENCE)) {
return { yaml: null, body: raw };
}
const rest = raw.slice(FENCE.length);
// The opening `---` must be followed immediately by a newline (or end-of-string).
if (rest.length > 0 && rest[0] !== "\n" && rest[0] !== "\r") {
return { yaml: null, body: raw };
}
// Consume the newline after the opening fence so that `afterOpen` starts at the
// first line of YAML content (not a leading empty line).
const afterOpen = rest.startsWith("\n") ? rest.slice(1) : rest;
const closeIndex = afterOpen.indexOf(`\n${FENCE}`);
if (closeIndex === -1) {
// Also handle the edge case where frontmatter is empty: `---\n---`
if (afterOpen.startsWith(FENCE)) {
const afterClose = afterOpen.slice(FENCE.length);
const body = afterClose.replace(/^\n+/, "");
return { yaml: "", body };
}
return { yaml: null, body: raw };
}
const yaml = afterOpen.slice(0, closeIndex);
// Skip past `\n---` and strip any leading blank separator lines from the body.
const afterClose = afterOpen.slice(closeIndex + 1 + FENCE.length);
const body = afterClose.replace(/^\n+/, "");
return { yaml, body };
}
// ── Minimal YAML scalar parser ───────────────────────────────────────────────
//
// We intentionally avoid a full YAML library dependency inside workflow-util.
// The frontmatter schema is flat and uses only scalars + simple string lists.
// This parser handles exactly what the spec needs and nothing more.
type YamlValue = string | number | boolean | null | string[];
function parseYamlScalar(raw: string): YamlValue {
const trimmed = raw.trim();
// Quoted string
if (
(trimmed.startsWith('"') && trimmed.endsWith('"')) ||
(trimmed.startsWith("'") && trimmed.endsWith("'"))
) {
return trimmed.slice(1, -1);
}
const lower = trimmed.toLowerCase();
if (lower === "true") return true;
if (lower === "false") return false;
if (lower === "null" || lower === "~" || lower === "") return null;
const num = Number(trimmed);
if (!Number.isNaN(num) && trimmed !== "") return num;
return trimmed;
}
function collectBlockSequence(
lines: string[],
startIdx: number,
): { items: string[]; nextIdx: number } {
const items: string[] = [];
let i = startIdx;
while (i < lines.length) {
const itemTrimmed = (lines[i] ?? "").trimStart();
if (!itemTrimmed.startsWith("- ")) break;
items.push(itemTrimmed.slice(2).trim());
i++;
}
return { items, nextIdx: i };
}
function parseInlineSequence(restTrimmed: string): string[] {
const inner = restTrimmed.slice(1, -1);
return inner
.split(",")
.map((s) => s.trim())
.filter((s) => s !== "");
}
function parseKeyValue(
lines: string[],
i: number,
): { key: string; value: YamlValue; nextIdx: number } | null {
const line = lines[i] ?? "";
if (line.trim() === "" || line.trimStart().startsWith("#")) {
return null;
}
const colonIdx = line.indexOf(":");
if (colonIdx === -1) {
return null;
}
const key = line.slice(0, colonIdx).trim();
const restTrimmed = line.slice(colonIdx + 1).trim();
if (restTrimmed === "") {
const { items, nextIdx } = collectBlockSequence(lines, i + 1);
return { key, value: items, nextIdx };
}
if (restTrimmed.startsWith("[") && restTrimmed.endsWith("]")) {
return { key, value: parseInlineSequence(restTrimmed), nextIdx: i + 1 };
}
return { key, value: parseYamlScalar(restTrimmed), nextIdx: i + 1 };
}
/**
* Parse a minimal flat YAML document. Only supports:
* - Scalar key: value pairs
* - Block sequences under a key (items prefixed with ` - `)
*
* Returns a plain object. Throws on structural errors.
*/
function parseMinimalYaml(yaml: string): Record<string, YamlValue> {
const result: Record<string, YamlValue> = {};
const lines = yaml.split("\n");
let i = 0;
while (i < lines.length) {
const entry = parseKeyValue(lines, i);
if (entry === null) {
i++;
continue;
}
result[entry.key] = entry.value;
i = entry.nextIdx;
}
return result;
}
// ── Field coercers ───────────────────────────────────────────────────────────
const VALID_STATUS: readonly FrontmatterStatus[] = ["done", "needs_input", "in_progress", "failed"];
const VALID_SCOPE: readonly FrontmatterScope[] = ["role", "thread"];
function coerceStatus(raw: YamlValue): FrontmatterStatus | null {
if (raw === null || raw === undefined) return null;
const s = String(raw).trim().toLowerCase();
return VALID_STATUS.includes(s as FrontmatterStatus) ? (s as FrontmatterStatus) : null;
}
function coerceNext(raw: YamlValue): string | null {
if (raw === null || raw === undefined) return null;
const s = String(raw).trim();
return s === "" ? null : s;
}
function coerceConfidence(raw: YamlValue): number | null {
if (raw === null || raw === undefined) return null;
const n = typeof raw === "number" ? raw : Number(String(raw).trim());
if (Number.isNaN(n)) return null;
return n;
}
function coerceArtifacts(raw: YamlValue): readonly string[] {
if (raw === null || raw === undefined) return [];
if (Array.isArray(raw)) return raw.map(String).filter((s) => s !== "");
const s = String(raw).trim();
return s === "" ? [] : [s];
}
function coerceScope(raw: YamlValue): FrontmatterScope {
if (raw === null || raw === undefined) return "role";
const s = String(raw).trim().toLowerCase();
return VALID_SCOPE.includes(s as FrontmatterScope) ? (s as FrontmatterScope) : "role";
}
// ── Public API ───────────────────────────────────────────────────────────────
/**
* Parse a raw agent response string into structured frontmatter + body.
*
* - Never throws: malformed YAML is silently treated as "no frontmatter".
* - The returned `frontmatter` is `null` when no valid `---…---` block was found.
* - Unknown YAML keys are silently ignored.
* - Invalid scalar values for known keys are coerced to their null/default.
*/
export function parseFrontmatterMarkdown(raw: string): ParsedFrontmatterMarkdown {
const { yaml, body } = splitFrontmatter(raw);
if (yaml === null) {
return { frontmatter: null, body };
}
let fields: Record<string, YamlValue>;
try {
fields = parseMinimalYaml(yaml);
} catch {
// Unparseable YAML → treat as no frontmatter; keep full raw as body.
return { frontmatter: null, body: raw };
}
const frontmatter: AgentFrontmatter = {
status: coerceStatus(fields.status ?? null),
next: coerceNext(fields.next ?? null),
confidence: coerceConfidence(fields.confidence ?? null),
artifacts: coerceArtifacts(fields.artifacts ?? null),
scope: coerceScope(fields.scope ?? null),
};
return { frontmatter, body };
}
/**
* Validate a parsed `AgentFrontmatter` and return a list of violations.
*
* An empty array means the frontmatter is valid.
*
* Validated constraints:
* - `status` — must be one of the FrontmatterStatus literals (if non-null)
* - `confidence` — must be in [0.0, 1.0] (if non-null)
* - `next` — must be a non-empty string with no whitespace (if non-null)
* - `artifacts` — each entry must be a non-empty string
* - `scope` — must be one of the FrontmatterScope literals
*/
export function validateFrontmatter(
frontmatter: AgentFrontmatter,
): readonly FrontmatterValidationError[] {
const errors: FrontmatterValidationError[] = [];
if (frontmatter.status !== null && !VALID_STATUS.includes(frontmatter.status)) {
errors.push({
field: "status",
message: `invalid status "${frontmatter.status}"; must be one of: ${VALID_STATUS.join(", ")}`,
});
}
if (frontmatter.confidence !== null) {
if (frontmatter.confidence < 0 || frontmatter.confidence > 1) {
errors.push({
field: "confidence",
message: `confidence ${frontmatter.confidence} is out of range; must be between 0.0 and 1.0 inclusive`,
});
}
}
if (frontmatter.next !== null) {
if (frontmatter.next.trim() === "") {
errors.push({ field: "next", message: "next must be a non-empty string when present" });
} else if (/\s/.test(frontmatter.next)) {
errors.push({
field: "next",
message: `next "${frontmatter.next}" must not contain whitespace`,
});
}
}
for (const artifact of frontmatter.artifacts) {
if (artifact.trim() === "") {
errors.push({ field: "artifacts", message: "artifact entries must be non-empty strings" });
break;
}
}
if (!VALID_SCOPE.includes(frontmatter.scope)) {
errors.push({
field: "scope",
message: `invalid scope "${frontmatter.scope}"; must be one of: ${VALID_SCOPE.join(", ")}`,
});
}
return errors;
}
@@ -0,0 +1,8 @@
export { parseFrontmatterMarkdown, validateFrontmatter } from "./frontmatter-markdown.js";
export type {
AgentFrontmatter,
FrontmatterScope,
FrontmatterStatus,
FrontmatterValidationError,
ParsedFrontmatterMarkdown,
} from "./types.js";
@@ -0,0 +1,111 @@
/**
* Frontmatter Markdown — agent output format (RFC #351 Phase 1).
*
* An agent response is a Markdown document with an optional YAML frontmatter
* block at the top. The frontmatter carries structured signals that the
* moderator and engine can consume without running a full LLM extract pass.
*
* Wire format:
*
* ---
* status: done
* next: reviewer
* confidence: 0.9
* artifacts:
* - src/foo.ts
* scope: role
* ---
*
* ... free-form markdown body ...
*
* All frontmatter fields are optional at the parse level. `validateFrontmatter`
* enforces the constraints documented on each field below.
*/
// ── Vocabulary types ─────────────────────────────────────────────────────────
/**
* High-level signal from the agent about where work stands.
*
* - `done` — role completed its objective; moderator may advance
* - `needs_input` — agent is blocked and requires human or peer clarification
* - `in_progress` — work is underway but the agent chose to yield early
* - `failed` — agent cannot complete the task and explains why in the body
*/
export type FrontmatterStatus = "done" | "needs_input" | "in_progress" | "failed";
/**
* Scope of frontmatter signals.
*
* - `role` — signals apply to the current role execution only (default)
* - `thread` — signals are suggestions for the entire thread moderator
*/
export type FrontmatterScope = "role" | "thread";
// ── Core frontmatter schema ──────────────────────────────────────────────────
/**
* Parsed and validated frontmatter from an agent response.
*
* All fields use explicit `T | null` (no optional `?:` per convention).
*/
export type AgentFrontmatter = {
/**
* Completion status signal from the agent.
* Null when omitted — engine treats it as "done" for backward compatibility.
*/
status: FrontmatterStatus | null;
/**
* Suggested next role name for the moderator.
* The moderator is NOT obligated to follow this — it is advisory only.
* Null when the agent has no preference.
*/
next: string | null;
/**
* Agent's self-assessed confidence in its output (0.0 – 1.0 inclusive).
* Null when omitted.
*/
confidence: number | null;
/**
* Relative file paths or CAS hashes the agent considers its primary outputs.
* Used for GC ref-tracing and human-readable summaries.
* Empty array when omitted (never null — an absent list is an empty list).
*/
artifacts: readonly string[];
/**
* Scope of the frontmatter signals.
* Defaults to "role" when omitted.
*/
scope: FrontmatterScope;
};
// ── Parse output ─────────────────────────────────────────────────────────────
/**
* Result of `parseFrontmatterMarkdown`: the structured frontmatter (if present)
* and the body (everything after the closing `---` fence, or the whole input
* if no frontmatter was found).
*/
export type ParsedFrontmatterMarkdown = {
/**
* Parsed frontmatter fields. Null when no frontmatter block was detected
* (i.e. the document does not start with `---`).
*/
frontmatter: AgentFrontmatter | null;
/** Markdown body with frontmatter block stripped. Leading newline removed. */
body: string;
};
// ── Validation error ─────────────────────────────────────────────────────────
export type FrontmatterValidationError =
| { field: "status"; message: string }
| { field: "next"; message: string }
| { field: "confidence"; message: string }
| { field: "artifacts"; message: string }
| { field: "scope"; message: string };
+11
View File
@@ -1,6 +1,17 @@
export { err, ok } from "@uncaged/workflow-protocol";
export { encodeUint64AsCrockford } from "./base32.js";
export { env } from "./env.js";
export {
parseFrontmatterMarkdown,
validateFrontmatter,
} from "./frontmatter-markdown/index.js";
export type {
AgentFrontmatter,
FrontmatterScope,
FrontmatterStatus,
FrontmatterValidationError,
ParsedFrontmatterMarkdown,
} from "./frontmatter-markdown/index.js";
export { createLogger } from "./logger.js";
export { normalizeRefsField } from "./refs-field.js";
export { getDefaultWorkflowStorageRoot, getGlobalCasDir } from "./storage-root.js";