Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f61b395727 | |||
| abc9dcfc5a | |||
| 080b37c2be | |||
| 7935b73374 | |||
| cfa890f83c | |||
| 81c08ac7e2 | |||
| fdcfcc7eba | |||
| 4972f99ca0 | |||
| d8cba5eea0 | |||
| d9f7648fdd | |||
| a2e9dd9785 |
@@ -0,0 +1,174 @@
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { putSchema } from "@uncaged/json-cas";
|
||||
import { createFsStore } from "@uncaged/json-cas-fs";
|
||||
import type { CasRef, StepNodePayload, ThreadId } from "@uncaged/workflow-protocol";
|
||||
import { afterEach, beforeEach, describe, expect, test } from "vitest";
|
||||
import { registerUwfSchemas } from "../schemas.js";
|
||||
import { saveThreadsIndex } from "../store.js";
|
||||
|
||||
// ── schemas ──────────────────────────────────────────────────────────────────
|
||||
|
||||
const OUTPUT_SCHEMA = {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
$status: { type: "string" as const, enum: ["done", "failed"] },
|
||||
result: { type: "string" as const },
|
||||
},
|
||||
required: ["$status"],
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
// ── fixture ──────────────────────────────────────────────────────────────────
|
||||
|
||||
let tmpDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tmpDir = await mkdtemp(join(tmpdir(), "cli-uwf-roundtrip-test-"));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe("C1: adapter JSON round-trip integration", () => {
|
||||
test("mock agent outputs JSON, CLI parses it and updates thread head in CAS", async () => {
|
||||
// 1. Set up CAS store with workflow, start node, and output schema
|
||||
const casDir = join(tmpDir, "cas");
|
||||
await mkdir(casDir, { recursive: true });
|
||||
const store = createFsStore(casDir);
|
||||
const schemas = await registerUwfSchemas(store);
|
||||
|
||||
const outputSchemaHash = await putSchema(store, OUTPUT_SCHEMA);
|
||||
|
||||
const workflowHash = await store.put(schemas.workflow, {
|
||||
name: "test-roundtrip",
|
||||
description: "roundtrip integration test",
|
||||
roles: {
|
||||
worker: {
|
||||
description: "Worker role",
|
||||
goal: "Do work",
|
||||
capabilities: [],
|
||||
procedure: "work",
|
||||
output: "result",
|
||||
frontmatter: outputSchemaHash,
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "Do the work", location: null } },
|
||||
worker: { done: { role: "$END", prompt: "completed", location: null } },
|
||||
},
|
||||
});
|
||||
|
||||
const startHash = await store.put(schemas.startNode, {
|
||||
workflow: workflowHash,
|
||||
prompt: "Test round-trip task",
|
||||
});
|
||||
|
||||
const threadId = "01ROUNDTRIPTEST0000000000" as ThreadId;
|
||||
await saveThreadsIndex(tmpDir, { [threadId]: startHash });
|
||||
|
||||
// 2. Pre-create CAS nodes that the mock agent would produce
|
||||
const outputHash = await store.put(outputSchemaHash, {
|
||||
$status: "done",
|
||||
result: "test-ok",
|
||||
});
|
||||
|
||||
// Use text schema for detail (simple placeholder)
|
||||
const detailHash = await store.put(schemas.text, "mock detail");
|
||||
|
||||
const startedAtMs = 1716600000000;
|
||||
const completedAtMs = 1716600001500;
|
||||
|
||||
const stepHash = await store.put(schemas.stepNode, {
|
||||
start: startHash,
|
||||
prev: null,
|
||||
role: "worker",
|
||||
output: outputHash,
|
||||
detail: detailHash,
|
||||
agent: "uwf-mock",
|
||||
edgePrompt: "Do the work",
|
||||
startedAtMs,
|
||||
completedAtMs,
|
||||
cwd: tmpDir,
|
||||
});
|
||||
|
||||
// 3. Create a minimal mock agent shell script that just outputs JSON
|
||||
// The step node is already in CAS — the agent just needs to print the JSON line
|
||||
const mockAgentPath = join(tmpDir, "mock-agent.sh");
|
||||
const adapterJson = JSON.stringify({
|
||||
stepHash,
|
||||
detailHash,
|
||||
role: "worker",
|
||||
frontmatter: { $status: "done", result: "test-ok" },
|
||||
body: "",
|
||||
startedAtMs,
|
||||
completedAtMs,
|
||||
});
|
||||
await writeFile(mockAgentPath, `#!/bin/sh\necho '${adapterJson}'\n`, { mode: 0o755 });
|
||||
|
||||
// 4. Write config.yaml
|
||||
const configPath = join(tmpDir, "config.yaml");
|
||||
await writeFile(
|
||||
configPath,
|
||||
`defaultAgent: uwf-hermes\ndefaultModel: test-model\nagentOverrides: null\nagents: {}\nproviders: {}\nmodels: {}\n`,
|
||||
);
|
||||
|
||||
// 5. Run CLI with agent override pointing to our mock
|
||||
const cliPath = join(import.meta.dirname, "..", "cli.js");
|
||||
let stdout: string;
|
||||
let stderr: string;
|
||||
let exitCode: number;
|
||||
|
||||
try {
|
||||
stdout = execFileSync(
|
||||
"bun",
|
||||
["run", cliPath, "thread", "exec", threadId, "--agent", mockAgentPath],
|
||||
{
|
||||
encoding: "utf8",
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
env: { ...process.env, WORKFLOW_STORAGE_ROOT: tmpDir },
|
||||
cwd: tmpDir,
|
||||
timeout: 30000,
|
||||
},
|
||||
);
|
||||
stderr = "";
|
||||
exitCode = 0;
|
||||
} catch (e: unknown) {
|
||||
const err = e as NodeJS.ErrnoException & {
|
||||
stdout?: string;
|
||||
stderr?: string;
|
||||
status?: number;
|
||||
};
|
||||
stdout = err.stdout ?? "";
|
||||
stderr = err.stderr ?? "";
|
||||
exitCode = err.status ?? 1;
|
||||
}
|
||||
|
||||
// 6. Verify
|
||||
if (exitCode !== 0) {
|
||||
throw new Error(`CLI exited with code ${exitCode}\nstdout: ${stdout}\nstderr: ${stderr}`);
|
||||
}
|
||||
|
||||
// Parse CLI output
|
||||
const cliOutput = JSON.parse(stdout.trim());
|
||||
expect(cliOutput).toHaveProperty("thread", threadId);
|
||||
expect(cliOutput).toHaveProperty("head", stepHash);
|
||||
expect(cliOutput.head).toMatch(/^[0-9A-HJ-NP-TV-Z]{13}$/);
|
||||
|
||||
// Verify the CAS step node exists and has correct metadata
|
||||
const storeAfter = createFsStore(casDir);
|
||||
const stepNode = storeAfter.get(cliOutput.head as CasRef);
|
||||
expect(stepNode).not.toBeNull();
|
||||
|
||||
const payload = stepNode!.payload as StepNodePayload;
|
||||
expect(payload.role).toBe("worker");
|
||||
expect(payload.agent).toBe("uwf-mock");
|
||||
expect(payload.startedAtMs).toBe(1716600000000);
|
||||
expect(payload.completedAtMs).toBe(1716600001500);
|
||||
expect(payload.output).toBe(outputHash);
|
||||
expect(payload.detail).toBe(detailHash);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,442 @@
|
||||
import { mkdir, rm, writeFile } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { putSchema } from "@uncaged/json-cas";
|
||||
import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { createMarker, deleteMarker } from "../background/index.js";
|
||||
import { cmdThreadList, cmdThreadShow, cmdThreadStart } from "../commands/thread.js";
|
||||
import {
|
||||
appendThreadHistory,
|
||||
createUwfStore,
|
||||
loadThreadsIndex,
|
||||
saveThreadsIndex,
|
||||
} from "../store.js";
|
||||
|
||||
const OUTPUT_SCHEMA = {
|
||||
type: "object" as const,
|
||||
properties: {
|
||||
$status: { type: "string" as const },
|
||||
},
|
||||
};
|
||||
|
||||
const SIMPLE_WORKFLOW_YAML = `
|
||||
name: test-current-role
|
||||
description: Test workflow for currentRole
|
||||
roles:
|
||||
roleA:
|
||||
description: First role
|
||||
goal: Do A
|
||||
capabilities: ["coding"]
|
||||
procedure: Do A
|
||||
output: |
|
||||
$status: "ready"
|
||||
frontmatter:
|
||||
type: object
|
||||
required: ["$status"]
|
||||
properties:
|
||||
$status: { type: string, enum: ["ready", "not-ready"] }
|
||||
roleB:
|
||||
description: Second role
|
||||
goal: Do B
|
||||
capabilities: ["coding"]
|
||||
procedure: Do B
|
||||
output: |
|
||||
$status: "done"
|
||||
frontmatter:
|
||||
type: object
|
||||
required: ["$status"]
|
||||
properties:
|
||||
$status: { type: string }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
role: roleA
|
||||
prompt: "Do A"
|
||||
location: null
|
||||
roleA:
|
||||
ready:
|
||||
role: roleB
|
||||
prompt: "Do B"
|
||||
location: null
|
||||
not-ready:
|
||||
role: roleA
|
||||
prompt: "Try again"
|
||||
location: null
|
||||
roleB:
|
||||
_:
|
||||
role: $END
|
||||
prompt: "Done"
|
||||
location: null
|
||||
`;
|
||||
|
||||
const CONDITIONAL_WORKFLOW_YAML = `
|
||||
name: test-conditional-role
|
||||
description: Conditional routing workflow
|
||||
roles:
|
||||
roleA:
|
||||
description: First role
|
||||
goal: Do A
|
||||
capabilities: ["coding"]
|
||||
procedure: Do A
|
||||
output: |
|
||||
$status: "pass"
|
||||
frontmatter:
|
||||
type: object
|
||||
required: ["$status"]
|
||||
properties:
|
||||
$status: { type: string, enum: ["pass", "fail"] }
|
||||
roleB:
|
||||
description: Pass role
|
||||
goal: Do B
|
||||
capabilities: ["coding"]
|
||||
procedure: Do B
|
||||
output: |
|
||||
$status: "done"
|
||||
frontmatter:
|
||||
type: object
|
||||
required: ["$status"]
|
||||
properties:
|
||||
$status: { type: string }
|
||||
roleC:
|
||||
description: Fail role
|
||||
goal: Do C
|
||||
capabilities: ["coding"]
|
||||
procedure: Do C
|
||||
output: |
|
||||
$status: "done"
|
||||
frontmatter:
|
||||
type: object
|
||||
required: ["$status"]
|
||||
properties:
|
||||
$status: { type: string }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
role: roleA
|
||||
prompt: "Do A"
|
||||
location: null
|
||||
roleA:
|
||||
pass:
|
||||
role: roleB
|
||||
prompt: "Do B (pass)"
|
||||
location: null
|
||||
fail:
|
||||
role: roleC
|
||||
prompt: "Do C (fail)"
|
||||
location: null
|
||||
roleB:
|
||||
_:
|
||||
role: $END
|
||||
prompt: "Done"
|
||||
location: null
|
||||
roleC:
|
||||
_:
|
||||
role: $END
|
||||
prompt: "Done"
|
||||
location: null
|
||||
`;
|
||||
|
||||
const SINGLE_ROLE_WORKFLOW_YAML = `
|
||||
name: test-single-role
|
||||
description: Single role that goes to END
|
||||
roles:
|
||||
worker:
|
||||
description: Worker
|
||||
goal: Work
|
||||
capabilities: ["coding"]
|
||||
procedure: Work
|
||||
output: |
|
||||
$status: "done"
|
||||
frontmatter:
|
||||
type: object
|
||||
required: ["$status"]
|
||||
properties:
|
||||
$status: { type: string }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
role: worker
|
||||
prompt: "Work"
|
||||
location: null
|
||||
worker:
|
||||
_:
|
||||
role: $END
|
||||
prompt: "Done"
|
||||
location: null
|
||||
`;
|
||||
|
||||
/** Helper: insert a completed step node after the current head. */
|
||||
async function insertStepNode(
|
||||
storageRoot: string,
|
||||
threadId: ThreadId,
|
||||
role: string,
|
||||
outputPayload: Record<string, unknown>,
|
||||
): Promise<void> {
|
||||
const uwf = await createUwfStore(storageRoot);
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const head = index[threadId];
|
||||
if (head === undefined) throw new Error(`thread ${threadId} not in index`);
|
||||
|
||||
const outputSchemaHash = await putSchema(uwf.store, OUTPUT_SCHEMA);
|
||||
const outputHash = await uwf.store.put(outputSchemaHash, outputPayload);
|
||||
|
||||
// Use text schema for detail (simple placeholder)
|
||||
const detailHash = await uwf.store.put(uwf.schemas.text, "detail-placeholder");
|
||||
|
||||
// Resolve start hash from head
|
||||
const headNode = uwf.store.get(head);
|
||||
if (headNode === null) throw new Error(`head ${head} not found`);
|
||||
const isStart = headNode.type === uwf.schemas.startNode;
|
||||
const startHash = isStart ? head : (headNode.payload as { start: CasRef }).start;
|
||||
|
||||
const stepHash = (await uwf.store.put(uwf.schemas.stepNode, {
|
||||
start: startHash,
|
||||
prev: isStart ? null : head,
|
||||
role,
|
||||
prompt: `Do ${role}`,
|
||||
output: outputHash,
|
||||
detail: detailHash,
|
||||
})) as CasRef;
|
||||
|
||||
index[threadId] = stepHash;
|
||||
await saveThreadsIndex(storageRoot, index);
|
||||
}
|
||||
|
||||
describe("currentRole field", () => {
|
||||
let tmpDir: string;
|
||||
let storageRoot: string;
|
||||
|
||||
async function setup() {
|
||||
tmpDir = join(
|
||||
tmpdir(),
|
||||
`uwf-test-current-role-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
||||
);
|
||||
storageRoot = join(tmpDir, "storage");
|
||||
await mkdir(storageRoot, { recursive: true });
|
||||
}
|
||||
|
||||
async function teardown() {
|
||||
if (tmpDir) {
|
||||
await rm(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
// T1: idle at start — currentRole = first role from graph
|
||||
test("thread show — idle at start returns first role as currentRole", async () => {
|
||||
await setup();
|
||||
try {
|
||||
const wf = join(tmpDir, "test-current-role.yaml");
|
||||
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
|
||||
const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
|
||||
|
||||
const result = await cmdThreadShow(storageRoot, thread as ThreadId);
|
||||
expect(result.status).toBe("idle");
|
||||
expect(result.currentRole).toBe("roleA");
|
||||
} finally {
|
||||
await teardown();
|
||||
}
|
||||
});
|
||||
|
||||
// T2: idle after one step — currentRole = next role
|
||||
test("thread show — idle after step returns next role as currentRole", async () => {
|
||||
await setup();
|
||||
try {
|
||||
const wf = join(tmpDir, "test-current-role.yaml");
|
||||
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
|
||||
const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
|
||||
|
||||
await insertStepNode(storageRoot, thread as ThreadId, "roleA", { $status: "ready" });
|
||||
|
||||
const result = await cmdThreadShow(storageRoot, thread as ThreadId);
|
||||
expect(result.status).toBe("idle");
|
||||
expect(result.currentRole).toBe("roleB");
|
||||
} finally {
|
||||
await teardown();
|
||||
}
|
||||
});
|
||||
|
||||
// T3: completed → currentRole = null
|
||||
test("thread show — completed thread returns null currentRole", async () => {
|
||||
await setup();
|
||||
try {
|
||||
const wf = join(tmpDir, "test-current-role.yaml");
|
||||
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
|
||||
const { thread, workflow } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
|
||||
const tid = thread as ThreadId;
|
||||
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const head = index[tid]!;
|
||||
delete index[tid];
|
||||
await saveThreadsIndex(storageRoot, index);
|
||||
await appendThreadHistory(storageRoot, {
|
||||
thread: tid,
|
||||
workflow,
|
||||
head,
|
||||
completedAt: Date.now(),
|
||||
reason: "completed",
|
||||
});
|
||||
|
||||
const result = await cmdThreadShow(storageRoot, tid);
|
||||
expect(result.status).toBe("completed");
|
||||
expect(result.currentRole).toBe(null);
|
||||
} finally {
|
||||
await teardown();
|
||||
}
|
||||
});
|
||||
|
||||
// T4: cancelled → currentRole = null
|
||||
test("thread show — cancelled thread returns null currentRole", async () => {
|
||||
await setup();
|
||||
try {
|
||||
const wf = join(tmpDir, "test-current-role.yaml");
|
||||
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
|
||||
const { thread, workflow } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
|
||||
const tid = thread as ThreadId;
|
||||
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const head = index[tid]!;
|
||||
delete index[tid];
|
||||
await saveThreadsIndex(storageRoot, index);
|
||||
await appendThreadHistory(storageRoot, {
|
||||
thread: tid,
|
||||
workflow,
|
||||
head,
|
||||
completedAt: Date.now(),
|
||||
reason: "cancelled",
|
||||
});
|
||||
|
||||
const result = await cmdThreadShow(storageRoot, tid);
|
||||
expect(result.status).toBe("cancelled");
|
||||
expect(result.currentRole).toBe(null);
|
||||
} finally {
|
||||
await teardown();
|
||||
}
|
||||
});
|
||||
|
||||
// T5: running → currentRole = role being executed
|
||||
test("thread show — running thread returns current role", async () => {
|
||||
await setup();
|
||||
try {
|
||||
const wf = join(tmpDir, "test-current-role.yaml");
|
||||
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
|
||||
const { thread, workflow } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
|
||||
const tid = thread as ThreadId;
|
||||
|
||||
await createMarker(storageRoot, {
|
||||
thread: tid,
|
||||
workflow,
|
||||
pid: process.pid,
|
||||
startedAt: Date.now(),
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await cmdThreadShow(storageRoot, tid);
|
||||
expect(result.status).toBe("running");
|
||||
expect(result.currentRole).toBe("roleA");
|
||||
} finally {
|
||||
await deleteMarker(storageRoot, tid);
|
||||
}
|
||||
} finally {
|
||||
await teardown();
|
||||
}
|
||||
});
|
||||
|
||||
// T6: thread list — mixed statuses with correct currentRole
|
||||
test("thread list — returns correct currentRole for each status", async () => {
|
||||
await setup();
|
||||
try {
|
||||
const wf = join(tmpDir, "test-current-role.yaml");
|
||||
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
|
||||
|
||||
// idle thread
|
||||
const idle = await cmdThreadStart(storageRoot, wf, "idle", tmpDir);
|
||||
const idleId = idle.thread as ThreadId;
|
||||
|
||||
// completed thread
|
||||
const comp = await cmdThreadStart(storageRoot, wf, "completed", tmpDir);
|
||||
const compId = comp.thread as ThreadId;
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const compHead = index[compId]!;
|
||||
delete index[compId];
|
||||
await saveThreadsIndex(storageRoot, index);
|
||||
await appendThreadHistory(storageRoot, {
|
||||
thread: compId,
|
||||
workflow: comp.workflow,
|
||||
head: compHead,
|
||||
completedAt: Date.now(),
|
||||
reason: "completed",
|
||||
});
|
||||
|
||||
const list = await cmdThreadList(storageRoot, null, null, null, 0, 100);
|
||||
|
||||
const idleItem = list.find((i) => i.thread === idleId);
|
||||
expect(idleItem).toBeDefined();
|
||||
expect(idleItem!.currentRole).toBe("roleA");
|
||||
|
||||
const compItem = list.find((i) => i.thread === compId);
|
||||
expect(compItem).toBeDefined();
|
||||
expect(compItem!.currentRole).toBe(null);
|
||||
} finally {
|
||||
await teardown();
|
||||
}
|
||||
});
|
||||
|
||||
// T7: thread list — idle at start has correct currentRole
|
||||
test("thread list — idle thread at start has correct currentRole", async () => {
|
||||
await setup();
|
||||
try {
|
||||
const wf = join(tmpDir, "test-current-role.yaml");
|
||||
await writeFile(wf, SIMPLE_WORKFLOW_YAML, "utf8");
|
||||
const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
|
||||
|
||||
const list = await cmdThreadList(storageRoot, null, null, null, 0, 100);
|
||||
const item = list.find((i) => i.thread === (thread as ThreadId));
|
||||
expect(item).toBeDefined();
|
||||
expect(item!.currentRole).toBe("roleA");
|
||||
} finally {
|
||||
await teardown();
|
||||
}
|
||||
});
|
||||
|
||||
// T8: conditional routing — $status=pass vs fail
|
||||
test("thread show — conditional routing selects correct next role", async () => {
|
||||
await setup();
|
||||
try {
|
||||
const wf = join(tmpDir, "test-conditional-role.yaml");
|
||||
await writeFile(wf, CONDITIONAL_WORKFLOW_YAML, "utf8");
|
||||
|
||||
// pass path
|
||||
const t1 = await cmdThreadStart(storageRoot, wf, "pass test", tmpDir);
|
||||
await insertStepNode(storageRoot, t1.thread as ThreadId, "roleA", { $status: "pass" });
|
||||
const r1 = await cmdThreadShow(storageRoot, t1.thread as ThreadId);
|
||||
expect(r1.currentRole).toBe("roleB");
|
||||
|
||||
// fail path
|
||||
const t2 = await cmdThreadStart(storageRoot, wf, "fail test", tmpDir);
|
||||
await insertStepNode(storageRoot, t2.thread as ThreadId, "roleA", { $status: "fail" });
|
||||
const r2 = await cmdThreadShow(storageRoot, t2.thread as ThreadId);
|
||||
expect(r2.currentRole).toBe("roleC");
|
||||
} finally {
|
||||
await teardown();
|
||||
}
|
||||
});
|
||||
|
||||
// T9: next role is $END → currentRole = null
|
||||
test("thread show — when next is $END, currentRole is null", async () => {
|
||||
await setup();
|
||||
try {
|
||||
const wf = join(tmpDir, "test-single-role.yaml");
|
||||
await writeFile(wf, SINGLE_ROLE_WORKFLOW_YAML, "utf8");
|
||||
|
||||
const { thread } = await cmdThreadStart(storageRoot, wf, "test", tmpDir);
|
||||
// worker → _ maps to $END
|
||||
await insertStepNode(storageRoot, thread as ThreadId, "worker", {});
|
||||
|
||||
const result = await cmdThreadShow(storageRoot, thread as ThreadId);
|
||||
expect(result.currentRole).toBe(null);
|
||||
} finally {
|
||||
await teardown();
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,100 @@
|
||||
import { describe, expect, test } from "vitest";
|
||||
|
||||
/**
|
||||
* B-group tests: validate JSON parsing logic used by spawnAgent.
|
||||
*
|
||||
* We test the parsing logic inline since spawnAgent is a private function.
|
||||
* These tests verify the contract: last line of stdout must be valid JSON
|
||||
* with a valid stepHash CasRef.
|
||||
*/
|
||||
|
||||
const CASREF_PATTERN = /^[0-9A-HJ-NP-TV-Z]{13}$/;
|
||||
|
||||
function isCasRef(s: string): boolean {
|
||||
return CASREF_PATTERN.test(s);
|
||||
}
|
||||
|
||||
type AdapterOutput = {
|
||||
stepHash: string;
|
||||
detailHash: string;
|
||||
role: string;
|
||||
frontmatter: Record<string, unknown>;
|
||||
body: string;
|
||||
startedAtMs: number;
|
||||
completedAtMs: number;
|
||||
};
|
||||
|
||||
function parseAgentStdout(stdout: string): AdapterOutput {
|
||||
const line = stdout.trim().split("\n").pop()?.trim() ?? "";
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(line);
|
||||
} catch {
|
||||
throw new Error(`agent stdout last line is not valid JSON: ${line || "(empty)"}`);
|
||||
}
|
||||
const obj = parsed as Record<string, unknown>;
|
||||
if (
|
||||
typeof obj !== "object" ||
|
||||
obj === null ||
|
||||
typeof obj.stepHash !== "string" ||
|
||||
!isCasRef(obj.stepHash as string)
|
||||
) {
|
||||
throw new Error(`agent stdout JSON missing valid stepHash: ${line}`);
|
||||
}
|
||||
return obj as unknown as AdapterOutput;
|
||||
}
|
||||
|
||||
const VALID_OUTPUT: AdapterOutput = {
|
||||
stepHash: "0123456789ABC",
|
||||
detailHash: "DEFGH12345678",
|
||||
role: "planner",
|
||||
frontmatter: { $status: "ready", plan: "somehash" },
|
||||
body: "Plan body",
|
||||
startedAtMs: 1000,
|
||||
completedAtMs: 2000,
|
||||
};
|
||||
|
||||
describe("spawnAgent JSON parsing", () => {
|
||||
test("B1. parses valid JSON from agent stdout", () => {
|
||||
const stdout = JSON.stringify(VALID_OUTPUT) + "\n";
|
||||
const result = parseAgentStdout(stdout);
|
||||
expect(result.stepHash).toBe("0123456789ABC");
|
||||
expect(result.detailHash).toBe("DEFGH12345678");
|
||||
expect(result.role).toBe("planner");
|
||||
expect(result.frontmatter).toEqual({ $status: "ready", plan: "somehash" });
|
||||
expect(result.body).toBe("Plan body");
|
||||
expect(result.startedAtMs).toBe(1000);
|
||||
expect(result.completedAtMs).toBe(2000);
|
||||
});
|
||||
|
||||
test("B2. extracts stepHash for head pointer", () => {
|
||||
const stdout = JSON.stringify(VALID_OUTPUT) + "\n";
|
||||
const result = parseAgentStdout(stdout);
|
||||
expect(result.stepHash).toBe("0123456789ABC");
|
||||
expect(isCasRef(result.stepHash)).toBe(true);
|
||||
});
|
||||
|
||||
test("B3. handles debug lines before JSON", () => {
|
||||
const debugLines = "[debug] loading context...\n[debug] running agent...\n";
|
||||
const stdout = debugLines + JSON.stringify(VALID_OUTPUT) + "\n";
|
||||
const result = parseAgentStdout(stdout);
|
||||
expect(result.stepHash).toBe("0123456789ABC");
|
||||
});
|
||||
|
||||
test("B4. rejects non-JSON last line", () => {
|
||||
const stdout = "not-json-at-all\n";
|
||||
expect(() => parseAgentStdout(stdout)).toThrow("not valid JSON");
|
||||
});
|
||||
|
||||
test("B5. rejects JSON missing stepHash", () => {
|
||||
const incomplete = { detailHash: "DEFGH12345678", role: "planner" };
|
||||
const stdout = JSON.stringify(incomplete) + "\n";
|
||||
expect(() => parseAgentStdout(stdout)).toThrow("missing valid stepHash");
|
||||
});
|
||||
|
||||
test("B6. rejects JSON with invalid stepHash", () => {
|
||||
const bad = { ...VALID_OUTPUT, stepHash: "not-a-hash" };
|
||||
const stdout = JSON.stringify(bad) + "\n";
|
||||
expect(() => parseAgentStdout(stdout)).toThrow("missing valid stepHash");
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,363 @@
|
||||
import { mkdir, mkdtemp, rm } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { bootstrap, type Hash, type JSONSchema, putSchema } from "@uncaged/json-cas";
|
||||
import { createFsStore } from "@uncaged/json-cas-fs";
|
||||
import type { CasRef, StepNodePayload } from "@uncaged/workflow-protocol";
|
||||
import { afterEach, beforeEach, describe, expect, test } from "vitest";
|
||||
import { cmdStepShow } from "../commands/step.js";
|
||||
import { formatOutput } from "../format.js";
|
||||
import { registerUwfSchemas } from "../schemas.js";
|
||||
|
||||
const TURN_SCHEMA: JSONSchema = {
|
||||
title: "test-turn",
|
||||
type: "object",
|
||||
required: ["index", "role", "content"],
|
||||
properties: {
|
||||
index: { type: "integer" },
|
||||
role: { type: "string", enum: ["assistant", "tool"] },
|
||||
content: { type: "string" },
|
||||
toolCalls: {
|
||||
anyOf: [
|
||||
{
|
||||
type: "array",
|
||||
items: {
|
||||
type: "object",
|
||||
required: ["name", "args"],
|
||||
properties: {
|
||||
name: { type: "string" },
|
||||
args: { type: "string" },
|
||||
},
|
||||
additionalProperties: false,
|
||||
},
|
||||
},
|
||||
{ type: "null" },
|
||||
],
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
const DETAIL_SCHEMA: JSONSchema = {
|
||||
title: "test-detail",
|
||||
type: "object",
|
||||
required: ["turns"],
|
||||
properties: {
|
||||
turns: {
|
||||
type: "array",
|
||||
items: { type: "string", format: "cas_ref" },
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
type TestSetup = {
|
||||
store: ReturnType<typeof createFsStore>;
|
||||
schemas: {
|
||||
workflow: Hash;
|
||||
startNode: Hash;
|
||||
stepNode: Hash;
|
||||
text: Hash;
|
||||
};
|
||||
turnType: Hash;
|
||||
detailType: Hash;
|
||||
};
|
||||
|
||||
async function setupTest(casDir: string): Promise<TestSetup> {
|
||||
const store = createFsStore(casDir);
|
||||
await bootstrap(store);
|
||||
const schemas = await registerUwfSchemas(store);
|
||||
const [turnType, detailType] = await Promise.all([
|
||||
putSchema(store, TURN_SCHEMA),
|
||||
putSchema(store, DETAIL_SCHEMA),
|
||||
]);
|
||||
return { store, schemas, turnType, detailType };
|
||||
}
|
||||
|
||||
async function createTestStep(
|
||||
setup: TestSetup,
|
||||
turnPayloads: Array<{
|
||||
index: number;
|
||||
role: string;
|
||||
content: string;
|
||||
toolCalls: Array<{ name: string; args: string }> | null;
|
||||
}>,
|
||||
): Promise<CasRef> {
|
||||
const { store, schemas, turnType, detailType } = setup;
|
||||
|
||||
// Create turn nodes
|
||||
const turnHashes: CasRef[] = [];
|
||||
for (const payload of turnPayloads) {
|
||||
const turnHash = await store.put(turnType, payload);
|
||||
turnHashes.push(turnHash);
|
||||
}
|
||||
|
||||
// Create detail node
|
||||
const detailHash = await store.put(detailType, { turns: turnHashes });
|
||||
|
||||
// Create dummy start node
|
||||
const startHash = await store.put(schemas.startNode, {
|
||||
workflow: "0000000000000" as CasRef,
|
||||
prompt: "test prompt",
|
||||
cwd: "/tmp",
|
||||
});
|
||||
|
||||
// Create dummy output node
|
||||
const outputHash = await store.put(schemas.text, { $status: "done" });
|
||||
|
||||
// Create step node
|
||||
const stepPayload: StepNodePayload = {
|
||||
prev: null,
|
||||
start: startHash,
|
||||
role: "test-role",
|
||||
agent: "test-agent",
|
||||
output: outputHash,
|
||||
detail: detailHash,
|
||||
edgePrompt: "",
|
||||
startedAtMs: Date.now(),
|
||||
completedAtMs: Date.now() + 1000,
|
||||
cwd: "/tmp",
|
||||
};
|
||||
return store.put(schemas.stepNode, stepPayload);
|
||||
}
|
||||
|
||||
describe("cmdStepShow JSON serialization", () => {
|
||||
let testDir: string;
|
||||
let casDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
testDir = await mkdtemp(join(tmpdir(), "uwf-test-"));
|
||||
casDir = join(testDir, "cas");
|
||||
await mkdir(casDir, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(testDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test("escapes newlines in tool call args", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "Running command",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: "echo 'line1'\necho 'line2'",
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
expect(jsonOutput).toContain("\\n");
|
||||
|
||||
const parsed = JSON.parse(jsonOutput);
|
||||
expect(parsed.turns[0].toolCalls[0].args).toContain("\n");
|
||||
});
|
||||
|
||||
test("escapes tabs in tool call args", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: "cat <<EOF\nfield1\tfield2\tfield3\nEOF",
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
expect(jsonOutput).toContain("\\t");
|
||||
});
|
||||
|
||||
test("escapes carriage returns", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "Committing changes",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: 'git commit -m "First line\r\nSecond line"',
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
expect(jsonOutput).toContain("\\r\\n");
|
||||
});
|
||||
|
||||
test("escapes backslashes and quotes", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: 'echo "He said \\"hello\\""',
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
const parsed = JSON.parse(jsonOutput);
|
||||
expect(parsed.turns).toBeDefined();
|
||||
});
|
||||
|
||||
test("handles Unicode control characters", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: "echo '\u0001\u001F'",
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
});
|
||||
|
||||
test("handles nested CAS refs with control characters", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "First turn\nwith newline",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: "cmd1\nline2",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
index: 1,
|
||||
role: "assistant",
|
||||
content: "Second turn\twith tab",
|
||||
toolCalls: null,
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
const parsed = JSON.parse(jsonOutput);
|
||||
expect(parsed.turns).toHaveLength(2);
|
||||
});
|
||||
|
||||
test("YAML output format is unaffected", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "Running command",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: "echo 'line1'\necho 'line2'",
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const yamlOutput = formatOutput(result, "yaml");
|
||||
|
||||
expect(yamlOutput).toContain("turns:");
|
||||
expect(yamlOutput.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test("handles empty and null values", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "",
|
||||
toolCalls: null,
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
const parsed = JSON.parse(jsonOutput);
|
||||
expect(parsed.turns).toBeDefined();
|
||||
});
|
||||
|
||||
test("handles large step with multiple tool calls", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
|
||||
const turns = [];
|
||||
for (let i = 0; i < 25; i++) {
|
||||
turns.push({
|
||||
index: i,
|
||||
role: "assistant" as const,
|
||||
content: `Turn ${i}\nwith newline`,
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: `command${i}\nline2\tfield${i}`,
|
||||
},
|
||||
{
|
||||
name: "Read",
|
||||
args: `/path/to/file${i}`,
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
const stepHash = await createTestStep(setup, turns);
|
||||
|
||||
const startTime = Date.now();
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
expect(duration).toBeLessThan(2000);
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
|
||||
const parsed = JSON.parse(jsonOutput);
|
||||
expect(parsed.turns).toHaveLength(25);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,227 @@
|
||||
import { mkdir, rm, writeFile } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import type { ThreadId } from "@uncaged/workflow-protocol";
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { createMarker, deleteMarker } from "../background/index.js";
|
||||
import { cmdThreadShow, cmdThreadStart } from "../commands/thread.js";
|
||||
import { appendThreadHistory, loadThreadsIndex } from "../store.js";
|
||||
|
||||
const TEST_WORKFLOW_YAML = `
|
||||
name: test-status
|
||||
description: Test workflow for status field
|
||||
roles:
|
||||
planner:
|
||||
description: Plans the work
|
||||
goal: Plan implementation
|
||||
capabilities: ["planning"]
|
||||
procedure: Plan
|
||||
output: |
|
||||
$status: "ready"
|
||||
frontmatter:
|
||||
type: object
|
||||
required: ["$status"]
|
||||
properties:
|
||||
$status: { type: string }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
role: planner
|
||||
prompt: "Plan the work"
|
||||
location: null
|
||||
planner:
|
||||
_:
|
||||
role: $END
|
||||
prompt: "Done"
|
||||
location: null
|
||||
`;
|
||||
|
||||
describe("thread show status field", () => {
|
||||
let tmpDir: string;
|
||||
let storageRoot: string;
|
||||
|
||||
async function setupTestEnv() {
|
||||
tmpDir = join(tmpdir(), `uwf-test-status-${Date.now()}`);
|
||||
storageRoot = join(tmpDir, "storage");
|
||||
await mkdir(storageRoot, { recursive: true });
|
||||
}
|
||||
|
||||
async function teardown() {
|
||||
if (tmpDir) {
|
||||
await rm(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
test("active idle thread shows status 'idle'", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = join(tmpDir, "test-status.yaml");
|
||||
await writeFile(workflowPath, TEST_WORKFLOW_YAML, "utf8");
|
||||
|
||||
// Create a thread
|
||||
const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
|
||||
const threadId = startResult.thread as ThreadId;
|
||||
|
||||
// Show the thread (should be idle)
|
||||
const result = await cmdThreadShow(storageRoot, threadId);
|
||||
|
||||
expect(result.status).toBe("idle");
|
||||
expect(result.done).toBe(false);
|
||||
expect(result.background).toBe(null);
|
||||
expect(result.thread).toBe(threadId);
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("active running thread shows status 'running'", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = join(tmpDir, "test-status.yaml");
|
||||
await writeFile(workflowPath, TEST_WORKFLOW_YAML, "utf8");
|
||||
|
||||
// Create a thread
|
||||
const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
|
||||
const threadId = startResult.thread as ThreadId;
|
||||
const workflow = startResult.workflow;
|
||||
|
||||
// Create a running marker
|
||||
await createMarker(storageRoot, {
|
||||
thread: threadId,
|
||||
workflow,
|
||||
pid: process.pid,
|
||||
startedAt: Date.now(),
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await cmdThreadShow(storageRoot, threadId);
|
||||
|
||||
expect(result.status).toBe("running");
|
||||
expect(result.done).toBe(false);
|
||||
expect(result.background).toBe(null);
|
||||
expect(result.thread).toBe(threadId);
|
||||
} finally {
|
||||
// Cleanup: delete marker
|
||||
await deleteMarker(storageRoot, threadId);
|
||||
await teardown();
|
||||
}
|
||||
});
|
||||
|
||||
test("completed thread shows status 'completed'", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = join(tmpDir, "test-status.yaml");
|
||||
await writeFile(workflowPath, TEST_WORKFLOW_YAML, "utf8");
|
||||
|
||||
// Create a thread
|
||||
const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
|
||||
const threadId = startResult.thread as ThreadId;
|
||||
const workflow = startResult.workflow;
|
||||
|
||||
// Get the head hash before moving to history
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const head = index[threadId];
|
||||
if (!head) throw new Error("Thread not found in index");
|
||||
|
||||
// Move thread to history with reason 'completed'
|
||||
const { saveThreadsIndex } = await import("../store.js");
|
||||
const newIndex = { ...index };
|
||||
delete newIndex[threadId];
|
||||
await saveThreadsIndex(storageRoot, newIndex);
|
||||
|
||||
await appendThreadHistory(storageRoot, {
|
||||
thread: threadId,
|
||||
workflow,
|
||||
head,
|
||||
completedAt: Date.now(),
|
||||
reason: "completed",
|
||||
});
|
||||
|
||||
const result = await cmdThreadShow(storageRoot, threadId);
|
||||
|
||||
expect(result.status).toBe("completed");
|
||||
expect(result.done).toBe(true);
|
||||
expect(result.background).toBe(null);
|
||||
expect(result.thread).toBe(threadId);
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("cancelled thread shows status 'cancelled'", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = join(tmpDir, "test-status.yaml");
|
||||
await writeFile(workflowPath, TEST_WORKFLOW_YAML, "utf8");
|
||||
|
||||
// Create a thread
|
||||
const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
|
||||
const threadId = startResult.thread as ThreadId;
|
||||
const workflow = startResult.workflow;
|
||||
|
||||
// Get the head hash before moving to history
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const head = index[threadId];
|
||||
if (!head) throw new Error("Thread not found in index");
|
||||
|
||||
// Move thread to history with reason 'cancelled'
|
||||
const { saveThreadsIndex } = await import("../store.js");
|
||||
const newIndex = { ...index };
|
||||
delete newIndex[threadId];
|
||||
await saveThreadsIndex(storageRoot, newIndex);
|
||||
|
||||
await appendThreadHistory(storageRoot, {
|
||||
thread: threadId,
|
||||
workflow,
|
||||
head,
|
||||
completedAt: Date.now(),
|
||||
reason: "cancelled",
|
||||
});
|
||||
|
||||
const result = await cmdThreadShow(storageRoot, threadId);
|
||||
|
||||
expect(result.status).toBe("cancelled");
|
||||
expect(result.done).toBe(true);
|
||||
expect(result.background).toBe(null);
|
||||
expect(result.thread).toBe(threadId);
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("legacy completed thread without reason shows status 'completed'", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = join(tmpDir, "test-status.yaml");
|
||||
await writeFile(workflowPath, TEST_WORKFLOW_YAML, "utf8");
|
||||
|
||||
// Create a thread
|
||||
const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
|
||||
const threadId = startResult.thread as ThreadId;
|
||||
const workflow = startResult.workflow;
|
||||
|
||||
// Get the head hash before moving to history
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const head = index[threadId];
|
||||
if (!head) throw new Error("Thread not found in index");
|
||||
|
||||
// Move thread to history with reason null (legacy format)
|
||||
const { saveThreadsIndex } = await import("../store.js");
|
||||
const newIndex = { ...index };
|
||||
delete newIndex[threadId];
|
||||
await saveThreadsIndex(storageRoot, newIndex);
|
||||
|
||||
await appendThreadHistory(storageRoot, {
|
||||
thread: threadId,
|
||||
workflow,
|
||||
head,
|
||||
completedAt: Date.now(),
|
||||
reason: null,
|
||||
});
|
||||
|
||||
const result = await cmdThreadShow(storageRoot, threadId);
|
||||
|
||||
expect(result.status).toBe("completed");
|
||||
expect(result.done).toBe(true);
|
||||
expect(result.background).toBe(null);
|
||||
|
||||
await teardown();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,148 @@
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { mkdir, rm, writeFile } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import type { CasRef, StartNodePayload, ThreadId } from "@uncaged/workflow-protocol";
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { cmdThreadStart } from "../commands/thread.js";
|
||||
import { createUwfStore, loadThreadsIndex } from "../store.js";
|
||||
|
||||
describe("thread start --cwd CLI option", () => {
|
||||
let tmpDir: string;
|
||||
let storageRoot: string;
|
||||
|
||||
async function setupTestEnv() {
|
||||
tmpDir = join(tmpdir(), `uwf-test-cwd-cli-${Date.now()}`);
|
||||
storageRoot = join(tmpDir, "storage");
|
||||
await mkdir(storageRoot, { recursive: true });
|
||||
}
|
||||
|
||||
async function teardown() {
|
||||
if (tmpDir) {
|
||||
await rm(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
async function createTestWorkflow(): Promise<string> {
|
||||
const workflowYaml = `
|
||||
name: test-cwd-cli
|
||||
description: Test workflow for CLI cwd option
|
||||
roles:
|
||||
planner:
|
||||
description: Plans the work
|
||||
goal: Plan implementation
|
||||
capabilities: ["planning"]
|
||||
procedure: Plan
|
||||
output: |
|
||||
$status: "ready"
|
||||
frontmatter:
|
||||
type: object
|
||||
required: ["$status"]
|
||||
properties:
|
||||
$status: { type: string }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
role: planner
|
||||
prompt: "Plan the work"
|
||||
location: null
|
||||
planner:
|
||||
_:
|
||||
role: $END
|
||||
prompt: "Done"
|
||||
location: null
|
||||
`;
|
||||
|
||||
const workflowPath = join(tmpDir, "test-cwd-cli.yaml");
|
||||
await writeFile(workflowPath, workflowYaml, "utf8");
|
||||
return workflowPath;
|
||||
}
|
||||
|
||||
async function getStartNodeCwd(threadId: string): Promise<string> {
|
||||
const uwf = await createUwfStore(storageRoot);
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const headHash = index[threadId as ThreadId];
|
||||
expect(headHash).toBeDefined();
|
||||
|
||||
const startNode = uwf.store.get(headHash as CasRef);
|
||||
expect(startNode).not.toBe(null);
|
||||
expect(startNode?.type).toBe(uwf.schemas.startNode);
|
||||
|
||||
const startPayload = startNode?.payload as StartNodePayload;
|
||||
return startPayload.cwd;
|
||||
}
|
||||
|
||||
test("thread start with custom cwd via cmdThreadStart", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = await createTestWorkflow();
|
||||
const testCwd = "/test/custom/path";
|
||||
|
||||
const result = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir, testCwd);
|
||||
|
||||
expect(result.thread).toBeDefined();
|
||||
const actualCwd = await getStartNodeCwd(result.thread);
|
||||
expect(actualCwd).toBe(testCwd);
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("thread start without cwd defaults to process.cwd()", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = await createTestWorkflow();
|
||||
|
||||
// Call without cwd parameter (it defaults to process.cwd())
|
||||
const result = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
|
||||
|
||||
expect(result.thread).toBeDefined();
|
||||
const actualCwd = await getStartNodeCwd(result.thread);
|
||||
expect(actualCwd).toBe(process.cwd());
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("thread start with relative path fails", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = await createTestWorkflow();
|
||||
|
||||
await expect(
|
||||
cmdThreadStart(storageRoot, workflowPath, "test", tmpDir, "relative/path"),
|
||||
).rejects.toThrow();
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("CLI accepts --cwd option without error", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = await createTestWorkflow();
|
||||
const testCwd = "/test/cli/path";
|
||||
const uwfBin = join(process.cwd(), "dist", "cli.js");
|
||||
|
||||
// Register the workflow
|
||||
execFileSync("node", [uwfBin, "workflow", "add", workflowPath], {
|
||||
env: { ...process.env, UWF_STORAGE_ROOT: storageRoot },
|
||||
encoding: "utf8",
|
||||
});
|
||||
|
||||
// Verify CLI accepts --cwd option (no error thrown)
|
||||
const output = execFileSync(
|
||||
"node",
|
||||
[uwfBin, "thread", "start", "test-cwd-cli", "-p", "test prompt", "--cwd", testCwd],
|
||||
{
|
||||
env: { ...process.env, UWF_STORAGE_ROOT: storageRoot },
|
||||
encoding: "utf8",
|
||||
},
|
||||
);
|
||||
|
||||
const result = JSON.parse(output);
|
||||
expect(result.thread).toBeDefined();
|
||||
expect(result.workflow).toBeDefined();
|
||||
|
||||
// The fact that we got here without throwing means CLI accepted the --cwd option
|
||||
// The actual cwd functionality is tested by the other tests using cmdThreadStart directly
|
||||
await teardown();
|
||||
});
|
||||
});
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
|
||||
import type { CasRef, ThreadId, ThreadStatus } from "@uncaged/workflow-protocol";
|
||||
import { Command } from "commander";
|
||||
import {
|
||||
cmdCasGet,
|
||||
@@ -38,7 +38,6 @@ import {
|
||||
cmdThreadStart,
|
||||
cmdThreadStop,
|
||||
THREAD_READ_DEFAULT_QUOTA,
|
||||
type ThreadStatus,
|
||||
} from "./commands/thread.js";
|
||||
import { parseTimeInput } from "./commands/thread-time-parser.js";
|
||||
import { cmdWorkflowAdd, cmdWorkflowList, cmdWorkflowShow } from "./commands/workflow.js";
|
||||
@@ -118,10 +117,17 @@ thread
|
||||
.description("Create a thread without executing")
|
||||
.argument("<workflow>", "Workflow name or hash")
|
||||
.requiredOption("-p, --prompt <text>", "User prompt")
|
||||
.action((workflow: string, opts: { prompt: string }) => {
|
||||
.option("--cwd <path>", "Working directory for thread execution (default: process.cwd())")
|
||||
.action((workflow: string, opts: { prompt: string; cwd: string | undefined }) => {
|
||||
const storageRoot = resolveStorageRoot();
|
||||
runAction(async () => {
|
||||
const result = await cmdThreadStart(storageRoot, workflow, opts.prompt, process.cwd());
|
||||
const result = await cmdThreadStart(
|
||||
storageRoot,
|
||||
workflow,
|
||||
opts.prompt,
|
||||
process.cwd(),
|
||||
opts.cwd ?? process.cwd(),
|
||||
);
|
||||
writeOutput(result);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -12,6 +12,7 @@ import type {
|
||||
StepOutput,
|
||||
ThreadId,
|
||||
ThreadListItem,
|
||||
ThreadStatus,
|
||||
ThreadsIndex,
|
||||
WorkflowConfig,
|
||||
WorkflowPayload,
|
||||
@@ -22,6 +23,7 @@ import {
|
||||
generateUlid,
|
||||
type ProcessLogger,
|
||||
} from "@uncaged/workflow-util";
|
||||
import type { AdapterOutput } from "@uncaged/workflow-util-agent";
|
||||
import { getEnvPath, loadWorkflowConfig } from "@uncaged/workflow-util-agent";
|
||||
import { config as loadDotenv } from "dotenv";
|
||||
import { parse } from "yaml";
|
||||
@@ -55,6 +57,21 @@ const END_ROLE = "$END";
|
||||
const START_ROLE = "$START";
|
||||
export const THREAD_READ_DEFAULT_QUOTA = 4000;
|
||||
|
||||
/**
|
||||
* Derive the current/next role from the workflow graph and chain state.
|
||||
* Returns null when the next role is $END or evaluation fails.
|
||||
*/
|
||||
function resolveCurrentRole(uwf: UwfStore, head: CasRef, workflowRef: CasRef): string | null {
|
||||
const chain = walkChain(uwf, head);
|
||||
const { lastRole, lastOutput } = resolveEvaluateArgs(uwf, chain);
|
||||
const workflow = loadWorkflowPayload(uwf, workflowRef);
|
||||
const result = evaluate(workflow.graph, lastRole, lastOutput);
|
||||
if (!result.ok) {
|
||||
return null;
|
||||
}
|
||||
return result.value.role === END_ROLE ? null : result.value.role;
|
||||
}
|
||||
|
||||
const PL_THREAD_START = "7HNQ4B2X";
|
||||
const PL_MODERATOR = "M3K8V9T1";
|
||||
const PL_AGENT_SPAWN = "R5J2W8N4";
|
||||
@@ -315,10 +332,18 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
|
||||
if (workflow === null) {
|
||||
fail(`failed to resolve workflow from head: ${activeHead}`);
|
||||
}
|
||||
|
||||
// Check if thread is running
|
||||
const runningMarker = await isThreadRunning(storageRoot, threadId);
|
||||
const status: ThreadStatus = runningMarker !== null ? "running" : "idle";
|
||||
const currentRole = resolveCurrentRole(uwf, activeHead, workflow);
|
||||
|
||||
return {
|
||||
workflow,
|
||||
thread: threadId,
|
||||
head: activeHead,
|
||||
status,
|
||||
currentRole,
|
||||
done: false,
|
||||
background: null,
|
||||
};
|
||||
@@ -326,10 +351,14 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
|
||||
|
||||
const hist = await findThreadInHistory(storageRoot, threadId);
|
||||
if (hist !== null) {
|
||||
const status: ThreadStatus = hist.reason === "cancelled" ? "cancelled" : "completed";
|
||||
|
||||
return {
|
||||
workflow: hist.workflow,
|
||||
thread: threadId,
|
||||
head: hist.head,
|
||||
status,
|
||||
currentRole: null,
|
||||
done: true,
|
||||
background: null,
|
||||
};
|
||||
@@ -338,10 +367,9 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
|
||||
fail(`thread not found: ${threadId}`);
|
||||
}
|
||||
|
||||
export type ThreadStatus = "idle" | "running" | "completed" | "cancelled";
|
||||
|
||||
export type ThreadListItemWithStatus = ThreadListItem & {
|
||||
status: ThreadStatus;
|
||||
currentRole: string | null;
|
||||
};
|
||||
|
||||
async function threadListItemFromActive(
|
||||
@@ -359,7 +387,13 @@ async function threadListItemFromActive(
|
||||
const runningMarker = await isThreadRunning(storageRoot, threadId);
|
||||
const status: ThreadStatus = runningMarker !== null ? "running" : "idle";
|
||||
|
||||
return { thread: threadId, workflow, head, status };
|
||||
return {
|
||||
thread: threadId,
|
||||
workflow,
|
||||
head,
|
||||
status,
|
||||
currentRole: resolveCurrentRole(uwf, head, workflow),
|
||||
};
|
||||
}
|
||||
|
||||
async function collectActiveThreads(
|
||||
@@ -397,6 +431,7 @@ async function collectCompletedThreads(
|
||||
workflow: entry.workflow,
|
||||
head: entry.head,
|
||||
status: entry.reason === "cancelled" ? "cancelled" : "completed",
|
||||
currentRole: null,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -780,7 +815,7 @@ function spawnAgent(
|
||||
role: string,
|
||||
edgePrompt: string,
|
||||
cwd: string,
|
||||
): CasRef {
|
||||
): AdapterOutput {
|
||||
const argv = [...agent.args, "--thread", threadId, "--role", role, "--prompt", edgePrompt];
|
||||
let stdout: string;
|
||||
try {
|
||||
@@ -803,10 +838,22 @@ function spawnAgent(
|
||||
}
|
||||
|
||||
const line = stdout.trim().split("\n").pop()?.trim() ?? "";
|
||||
if (!isCasRef(line)) {
|
||||
failStep(plog, `agent stdout is not a valid CAS hash: ${line || "(empty)"}`);
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(line);
|
||||
} catch {
|
||||
failStep(plog, `agent stdout last line is not valid JSON: ${line || "(empty)"}`);
|
||||
}
|
||||
return line;
|
||||
const obj = parsed as Record<string, unknown>;
|
||||
if (
|
||||
typeof obj !== "object" ||
|
||||
obj === null ||
|
||||
typeof obj.stepHash !== "string" ||
|
||||
!isCasRef(obj.stepHash as string)
|
||||
) {
|
||||
failStep(plog, `agent stdout JSON missing valid stepHash: ${line}`);
|
||||
}
|
||||
return obj as unknown as AdapterOutput;
|
||||
}
|
||||
|
||||
async function archiveThread(
|
||||
@@ -917,6 +964,8 @@ async function cmdThreadStepBackground(
|
||||
failStep(plog, `thread not active: ${threadId}`);
|
||||
}
|
||||
|
||||
const uwf = await createUwfStore(storageRoot);
|
||||
|
||||
// Spawn detached background process
|
||||
const scriptPath = process.argv[1];
|
||||
if (scriptPath === undefined) {
|
||||
@@ -947,6 +996,8 @@ async function cmdThreadStepBackground(
|
||||
workflow: workflowHash,
|
||||
thread: threadId,
|
||||
head: headHash,
|
||||
status: "running",
|
||||
currentRole: resolveCurrentRole(uwf, headHash, workflowHash),
|
||||
done: false,
|
||||
background: true,
|
||||
},
|
||||
@@ -989,6 +1040,8 @@ async function cmdThreadStepOnce(
|
||||
workflow: workflowHash,
|
||||
thread: threadId,
|
||||
head: headHash,
|
||||
status: "completed",
|
||||
currentRole: null,
|
||||
done: true,
|
||||
background: null,
|
||||
};
|
||||
@@ -1009,7 +1062,8 @@ async function cmdThreadStepOnce(
|
||||
});
|
||||
|
||||
loadDotenv({ path: getEnvPath(storageRoot) });
|
||||
const newHead = spawnAgent(plog, agent, threadId, role, edgePrompt, effectiveCwd);
|
||||
const agentResult = spawnAgent(plog, agent, threadId, role, edgePrompt, effectiveCwd);
|
||||
const newHead = agentResult.stepHash as CasRef;
|
||||
|
||||
plog.log(PL_AGENT_DONE, `agent returned head=${newHead}`, null);
|
||||
|
||||
@@ -1041,10 +1095,16 @@ async function cmdThreadStepOnce(
|
||||
await archiveThread(storageRoot, threadId, workflowHash, newHead);
|
||||
}
|
||||
|
||||
// Determine status based on whether thread is done and running state
|
||||
const status: ThreadStatus = done ? "completed" : "idle";
|
||||
const currentRole = done ? null : afterResult.value.role;
|
||||
|
||||
return {
|
||||
workflow: workflowHash,
|
||||
thread: threadId,
|
||||
head: newHead,
|
||||
status,
|
||||
currentRole,
|
||||
done,
|
||||
background: null,
|
||||
};
|
||||
|
||||
@@ -29,6 +29,7 @@ export type {
|
||||
ThreadForkOutput,
|
||||
ThreadId,
|
||||
ThreadListItem,
|
||||
ThreadStatus,
|
||||
ThreadStepsOutput,
|
||||
ThreadsIndex,
|
||||
WorkflowConfig,
|
||||
|
||||
@@ -76,17 +76,29 @@ export type ModeratorContext = {
|
||||
|
||||
// ── 4.5 CLI 输出 ────────────────────────────────────────────────────
|
||||
|
||||
/** Thread status — unified status representation */
|
||||
export type ThreadStatus = "idle" | "running" | "completed" | "cancelled";
|
||||
|
||||
/** uwf thread start */
|
||||
export type StartOutput = {
|
||||
workflow: CasRef;
|
||||
thread: ThreadId;
|
||||
};
|
||||
|
||||
/** uwf thread step / uwf thread show */
|
||||
/**
|
||||
* Output from thread show and thread exec commands.
|
||||
*
|
||||
* @property status - Current thread status (idle/running/completed/cancelled)
|
||||
* @property done - @deprecated Use status field instead. True if thread is completed or cancelled.
|
||||
* @property background - @deprecated Use status field instead. Always null in current implementation.
|
||||
*/
|
||||
export type StepOutput = {
|
||||
workflow: CasRef;
|
||||
thread: ThreadId;
|
||||
head: CasRef;
|
||||
status: ThreadStatus;
|
||||
/** The current or next role. Null when completed, cancelled, or next is $END. */
|
||||
currentRole: string | null;
|
||||
done: boolean;
|
||||
background: boolean | null;
|
||||
};
|
||||
|
||||
@@ -0,0 +1,72 @@
|
||||
import { createMemoryStore, putSchema } from "@uncaged/json-cas";
|
||||
import { describe, expect, test } from "vitest";
|
||||
|
||||
import { tryFrontmatterFastPath } from "../src/frontmatter.js";
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
const PLANNER_SCHEMA = {
|
||||
type: "object",
|
||||
properties: {
|
||||
$status: { type: "string", enum: ["ready", "failed"] },
|
||||
plan: { type: "string" },
|
||||
},
|
||||
required: ["$status"],
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
describe("adapter-stdout: A4 retry loop survives JSON output", () => {
|
||||
test("A4. first extraction fails, second succeeds — final result has correct data", async () => {
|
||||
const store = createMemoryStore();
|
||||
const schemaHash = await putSchema(store, PLANNER_SCHEMA);
|
||||
|
||||
// Simulate the retry loop from createAgent (run.ts lines 163-173):
|
||||
// First attempt: agent outputs garbage (no frontmatter)
|
||||
const badOutput = "Here is my response without frontmatter.\nJust plain text.";
|
||||
const firstAttempt = await tryFrontmatterFastPath(badOutput, schemaHash, store);
|
||||
expect(firstAttempt).toBeNull();
|
||||
|
||||
// Second attempt (after correction message): agent outputs valid frontmatter
|
||||
const goodOutput = `---\n$status: ready\nplan: corrected-hash\n---\nCorrected body with valid frontmatter.`;
|
||||
const secondAttempt = await tryFrontmatterFastPath(goodOutput, schemaHash, store);
|
||||
|
||||
expect(secondAttempt).not.toBeNull();
|
||||
expect(secondAttempt!.outputHash).toMatch(/^[0-9A-Z]{13}$/);
|
||||
expect(secondAttempt!.frontmatter).toEqual({ $status: "ready", plan: "corrected-hash" });
|
||||
expect(secondAttempt!.body).toBe("Corrected body with valid frontmatter.");
|
||||
|
||||
// Verify the final AdapterOutput shape would be correct
|
||||
const adapterOutput = {
|
||||
stepHash: "MOCK_STEP_HASH",
|
||||
detailHash: "MOCK_DETAIL_HA",
|
||||
role: "planner",
|
||||
frontmatter: secondAttempt!.frontmatter,
|
||||
body: secondAttempt!.body,
|
||||
startedAtMs: 1000,
|
||||
completedAtMs: 2000,
|
||||
};
|
||||
|
||||
const json = JSON.stringify(adapterOutput);
|
||||
const parsed = JSON.parse(json);
|
||||
expect(parsed.frontmatter).toEqual({ $status: "ready", plan: "corrected-hash" });
|
||||
expect(parsed.body).toBe("Corrected body with valid frontmatter.");
|
||||
expect(parsed.completedAtMs).toBeGreaterThanOrEqual(parsed.startedAtMs);
|
||||
});
|
||||
|
||||
test("A4. all retries fail — extraction returns null on every attempt", async () => {
|
||||
const store = createMemoryStore();
|
||||
const schemaHash = await putSchema(store, PLANNER_SCHEMA);
|
||||
|
||||
const MAX_RETRIES = 2;
|
||||
const badOutput = "No frontmatter here";
|
||||
|
||||
// Simulate MAX_FRONTMATTER_RETRIES iterations all failing
|
||||
let extracted = await tryFrontmatterFastPath(badOutput, schemaHash, store);
|
||||
for (let retry = 0; retry < MAX_RETRIES && extracted === null; retry++) {
|
||||
// Each retry also gets bad output
|
||||
extracted = await tryFrontmatterFastPath(badOutput, schemaHash, store);
|
||||
}
|
||||
|
||||
expect(extracted).toBeNull();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,105 @@
|
||||
import { createMemoryStore, putSchema } from "@uncaged/json-cas";
|
||||
import { describe, expect, test } from "vitest";
|
||||
|
||||
import { tryFrontmatterFastPath } from "../src/frontmatter.js";
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
const PLANNER_SCHEMA = {
|
||||
type: "object",
|
||||
properties: {
|
||||
$status: { type: "string", enum: ["ready", "failed"] },
|
||||
plan: { type: "string" },
|
||||
},
|
||||
required: ["$status"],
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
const FRONTMATTER_SCHEMA = {
|
||||
type: "object",
|
||||
properties: {
|
||||
status: { anyOf: [{ type: "string" }, { type: "null" }] },
|
||||
next: { anyOf: [{ type: "string" }, { type: "null" }] },
|
||||
confidence: { anyOf: [{ type: "number" }, { type: "null" }] },
|
||||
artifacts: { type: "array", items: { type: "string" } },
|
||||
scope: { type: "string" },
|
||||
},
|
||||
required: ["status", "next", "confidence", "artifacts", "scope"],
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
describe("adapter-stdout: FrontmatterFastPathResult includes frontmatter", () => {
|
||||
test("A2. frontmatter field contains the parsed YAML frontmatter object", async () => {
|
||||
const store = createMemoryStore();
|
||||
const schemaHash = await putSchema(store, PLANNER_SCHEMA);
|
||||
|
||||
const raw = `---\n$status: ready\nplan: abc123\n---\nSome body text`;
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.frontmatter).toEqual({ $status: "ready", plan: "abc123" });
|
||||
});
|
||||
|
||||
test("A3. body field contains the markdown body after frontmatter", async () => {
|
||||
const store = createMemoryStore();
|
||||
const schemaHash = await putSchema(store, PLANNER_SCHEMA);
|
||||
|
||||
const raw = `---\n$status: ready\nplan: hash123\n---\nHere is the body.\n\nWith multiple paragraphs.`;
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.body).toBe("Here is the body.\n\nWith multiple paragraphs.");
|
||||
});
|
||||
|
||||
test("A1. result contains outputHash as valid CasRef", async () => {
|
||||
const store = createMemoryStore();
|
||||
const schemaHash = await putSchema(store, FRONTMATTER_SCHEMA);
|
||||
|
||||
const raw = `---\nstatus: done\nnext: null\nconfidence: 0.9\nartifacts: []\nscope: test\n---\nBody`;
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.outputHash).toMatch(/^[0-9A-Z]{13}$/);
|
||||
expect(result!.frontmatter).toBeDefined();
|
||||
expect(result!.body).toBe("Body");
|
||||
});
|
||||
});
|
||||
|
||||
describe("adapter-stdout: AdapterOutput JSON shape", () => {
|
||||
test("A5. JSON.stringify produces valid parseable JSON with all fields", () => {
|
||||
const output = {
|
||||
stepHash: "0123456789ABC",
|
||||
detailHash: "DEFGH12345678",
|
||||
role: "planner",
|
||||
frontmatter: { $status: "ready", plan: "somehash" },
|
||||
body: "Plan body text",
|
||||
startedAtMs: 1000,
|
||||
completedAtMs: 2000,
|
||||
};
|
||||
|
||||
const json = JSON.stringify(output);
|
||||
const parsed = JSON.parse(json);
|
||||
|
||||
expect(parsed.stepHash).toBe("0123456789ABC");
|
||||
expect(parsed.detailHash).toBe("DEFGH12345678");
|
||||
expect(parsed.role).toBe("planner");
|
||||
expect(parsed.frontmatter).toEqual({ $status: "ready", plan: "somehash" });
|
||||
expect(parsed.body).toBe("Plan body text");
|
||||
expect(parsed.startedAtMs).toBe(1000);
|
||||
expect(parsed.completedAtMs).toBe(2000);
|
||||
});
|
||||
|
||||
test("completedAtMs >= startedAtMs", () => {
|
||||
const output = {
|
||||
stepHash: "0123456789ABC",
|
||||
detailHash: "DEFGH12345678",
|
||||
role: "planner",
|
||||
frontmatter: {},
|
||||
body: "",
|
||||
startedAtMs: 1000,
|
||||
completedAtMs: 2000,
|
||||
};
|
||||
|
||||
expect(output.completedAtMs).toBeGreaterThanOrEqual(output.startedAtMs);
|
||||
});
|
||||
});
|
||||
@@ -5,17 +5,13 @@ import { tryFrontmatterFastPath } from "../src/frontmatter.js";
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
/** JSON Schema that exactly matches the AgentFrontmatter fields. */
|
||||
const FRONTMATTER_SCHEMA = {
|
||||
/** JSON Schema that matches the new status-only AgentFrontmatter. */
|
||||
const STATUS_ONLY_SCHEMA = {
|
||||
type: "object",
|
||||
properties: {
|
||||
status: { anyOf: [{ type: "string" }, { type: "null" }] },
|
||||
next: { anyOf: [{ type: "string" }, { type: "null" }] },
|
||||
confidence: { anyOf: [{ type: "number" }, { type: "null" }] },
|
||||
artifacts: { type: "array", items: { type: "string" } },
|
||||
scope: { type: "string" },
|
||||
},
|
||||
required: ["status", "next", "confidence", "artifacts", "scope"],
|
||||
required: ["status"],
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
@@ -56,24 +52,41 @@ async function makeStoreWithSchema(schema: Record<string, unknown>) {
|
||||
return { store, schemaHash };
|
||||
}
|
||||
|
||||
// ── STANDARD_KEYS ────────────────────────────────────────────────────────────
|
||||
|
||||
describe("STANDARD_KEYS contains only status", () => {
|
||||
test("STANDARD_KEYS is ['status']", async () => {
|
||||
// We verify indirectly: defaultCandidate (no schema fields) returns only { status }
|
||||
const { store, schemaHash } = await makeStoreWithSchema({
|
||||
type: "object",
|
||||
properties: {
|
||||
status: { anyOf: [{ type: "string" }, { type: "null" }] },
|
||||
},
|
||||
});
|
||||
|
||||
const raw = "---\nstatus: done\n---\n\nBody.";
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).not.toBeNull();
|
||||
|
||||
const node = store.get(result!.outputHash);
|
||||
expect(node).not.toBeNull();
|
||||
const payload = node!.payload as Record<string, unknown>;
|
||||
expect(payload.status).toBe("done");
|
||||
// Legacy fields must NOT be present
|
||||
expect(payload.next).toBeUndefined();
|
||||
expect(payload.confidence).toBeUndefined();
|
||||
expect(payload.artifacts).toBeUndefined();
|
||||
expect(payload.scope).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Happy path ─────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("tryFrontmatterFastPath — happy path", () => {
|
||||
test("parses valid frontmatter and returns outputHash + stripped body", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
|
||||
const { store, schemaHash } = await makeStoreWithSchema(STATUS_ONLY_SCHEMA);
|
||||
|
||||
const raw = [
|
||||
"---",
|
||||
"status: done",
|
||||
"next: reviewer",
|
||||
"confidence: 0.9",
|
||||
"artifacts: [src/foo.ts]",
|
||||
"scope: role",
|
||||
"---",
|
||||
"",
|
||||
"## Summary",
|
||||
"Work is complete.",
|
||||
].join("\n");
|
||||
const raw = ["---", "status: done", "---", "", "## Summary", "Work is complete."].join("\n");
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
|
||||
@@ -85,11 +98,10 @@ describe("tryFrontmatterFastPath — happy path", () => {
|
||||
expect((result?.outputHash ?? "").length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test("stored CAS node payload matches frontmatter fields", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
|
||||
test("stored CAS node payload has only status", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(STATUS_ONLY_SCHEMA);
|
||||
|
||||
const raw =
|
||||
"---\nstatus: done\nnext: null\nconfidence: null\nartifacts: []\nscope: role\n---\n\nBody.";
|
||||
const raw = "---\nstatus: done\n---\n\nBody.";
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).not.toBeNull();
|
||||
@@ -98,10 +110,29 @@ describe("tryFrontmatterFastPath — happy path", () => {
|
||||
expect(node).not.toBeNull();
|
||||
const payload = node!.payload as Record<string, unknown>;
|
||||
expect(payload.status).toBe("done");
|
||||
expect(payload.next).toBeNull();
|
||||
expect(payload.confidence).toBeNull();
|
||||
expect(payload.artifacts).toEqual([]);
|
||||
expect(payload.scope).toBe("role");
|
||||
expect(Object.keys(payload)).toEqual(["status"]);
|
||||
});
|
||||
});
|
||||
|
||||
// ── Legacy fields in input are ignored ──────────────────────────────────────
|
||||
|
||||
describe("tryFrontmatterFastPath — legacy fields ignored", () => {
|
||||
test("legacy fields in input do not appear in CAS output", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(STATUS_ONLY_SCHEMA);
|
||||
|
||||
const raw =
|
||||
"---\nstatus: done\nnext: reviewer\nconfidence: 0.9\nartifacts: [a.ts]\nscope: thread\n---\n\nBody.";
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).not.toBeNull();
|
||||
|
||||
const node = store.get(result!.outputHash);
|
||||
const payload = node!.payload as Record<string, unknown>;
|
||||
expect(payload.status).toBe("done");
|
||||
expect(payload.next).toBeUndefined();
|
||||
expect(payload.confidence).toBeUndefined();
|
||||
expect(payload.artifacts).toBeUndefined();
|
||||
expect(payload.scope).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -109,7 +140,7 @@ describe("tryFrontmatterFastPath — happy path", () => {
|
||||
|
||||
describe("tryFrontmatterFastPath — fallback: no frontmatter", () => {
|
||||
test("returns null for plain markdown without frontmatter block", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
|
||||
const { store, schemaHash } = await makeStoreWithSchema(STATUS_ONLY_SCHEMA);
|
||||
|
||||
const result = await tryFrontmatterFastPath(
|
||||
"This is plain markdown without any frontmatter.",
|
||||
@@ -121,35 +152,13 @@ describe("tryFrontmatterFastPath — fallback: no frontmatter", () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ── Fallback: invalid frontmatter ─────────────────────────────────────────────
|
||||
|
||||
describe("tryFrontmatterFastPath — fallback: invalid frontmatter", () => {
|
||||
test("returns null when confidence is out of range [0, 1]", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
|
||||
|
||||
const raw = "---\nstatus: done\nconfidence: 1.5\nscope: role\n---\n\nBody.";
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("returns null when next contains whitespace", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
|
||||
|
||||
const raw = "---\nstatus: done\nnext: some role\nscope: role\n---\n\nBody.";
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Fallback: schema mismatch ─────────────────────────────────────────────────
|
||||
|
||||
describe("tryFrontmatterFastPath — fallback: schema mismatch", () => {
|
||||
test("returns null when outputSchema requires fields not in frontmatter", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(STRICT_SCHEMA);
|
||||
|
||||
const raw = "---\nstatus: done\nscope: role\n---\n\nBody.";
|
||||
const raw = "---\nstatus: done\n---\n\nBody.";
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).toBeNull();
|
||||
@@ -194,7 +203,7 @@ describe("tryFrontmatterFastPath — role-specific fields", () => {
|
||||
test("returns null when required role-specific field is missing", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(REVIEWER_SCHEMA);
|
||||
|
||||
const raw = "---\nstatus: done\nscope: role\n---\n\nBody.";
|
||||
const raw = "---\nstatus: done\n---\n\nBody.";
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).toBeNull();
|
||||
|
||||
@@ -13,13 +13,14 @@ import { extractSchemaFields } from "./build-output-format-instruction.js";
|
||||
|
||||
const log = createLogger({ sink: { kind: "stderr" } });
|
||||
|
||||
const STANDARD_KEYS = ["status", "next", "confidence", "artifacts", "scope"] as const;
|
||||
const STANDARD_KEYS = ["status"] as const;
|
||||
|
||||
type StandardKey = (typeof STANDARD_KEYS)[number];
|
||||
|
||||
export type FrontmatterFastPathResult = {
|
||||
body: string;
|
||||
outputHash: CasRef;
|
||||
frontmatter: Record<string, unknown>;
|
||||
};
|
||||
|
||||
function extractYamlBlock(raw: string): string | null {
|
||||
@@ -62,10 +63,6 @@ function parseRawFrontmatterFields(raw: string): Record<string, unknown> {
|
||||
function defaultCandidate(frontmatter: AgentFrontmatter): Record<string, unknown> {
|
||||
return {
|
||||
status: frontmatter.status,
|
||||
next: frontmatter.next,
|
||||
confidence: frontmatter.confidence,
|
||||
artifacts: [...frontmatter.artifacts],
|
||||
scope: frontmatter.scope,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -73,14 +70,6 @@ function pickStandardField(frontmatter: AgentFrontmatter, key: StandardKey): unk
|
||||
switch (key) {
|
||||
case "status":
|
||||
return frontmatter.status;
|
||||
case "next":
|
||||
return frontmatter.next;
|
||||
case "confidence":
|
||||
return frontmatter.confidence;
|
||||
case "artifacts":
|
||||
return [...frontmatter.artifacts];
|
||||
case "scope":
|
||||
return frontmatter.scope;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,9 +87,6 @@ function pickFieldValue(
|
||||
}
|
||||
|
||||
const coerced = pickStandardField(frontmatter, field);
|
||||
if (field === "artifacts" || field === "scope") {
|
||||
return coerced;
|
||||
}
|
||||
if (coerced !== null) {
|
||||
return coerced;
|
||||
}
|
||||
@@ -110,8 +96,8 @@ function pickFieldValue(
|
||||
/**
|
||||
* Build a CAS candidate object from schema property keys and parsed frontmatter.
|
||||
*
|
||||
* When the schema has no inspectable properties, falls back to the five standard
|
||||
* agent frontmatter fields for backward compatibility.
|
||||
* When the schema has no inspectable properties, falls back to the standard
|
||||
* agent frontmatter field (status only).
|
||||
*/
|
||||
function buildCandidate(
|
||||
frontmatter: AgentFrontmatter,
|
||||
@@ -191,5 +177,5 @@ export async function tryFrontmatterFastPath(
|
||||
return null;
|
||||
}
|
||||
|
||||
return { body, outputHash };
|
||||
return { body, outputHash, frontmatter: candidate };
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ export { createAgent, parseArgv } from "./run.js";
|
||||
export { getCachedSessionId, getCachePath, setCachedSessionId } from "./session-cache.js";
|
||||
export { getConfigPath, getEnvPath, loadWorkflowConfig, resolveStorageRoot } from "./storage.js";
|
||||
export type {
|
||||
AdapterOutput,
|
||||
AgentContext,
|
||||
AgentContinueFn,
|
||||
AgentOptions,
|
||||
|
||||
@@ -6,7 +6,7 @@ import { buildContextWithMeta } from "./context.js";
|
||||
import { tryFrontmatterFastPath } from "./frontmatter.js";
|
||||
import type { AgentStore } from "./storage.js";
|
||||
import { getEnvPath, resolveStorageRoot } from "./storage.js";
|
||||
import type { AgentOptions } from "./types.js";
|
||||
import type { AdapterOutput, AgentOptions } from "./types.js";
|
||||
|
||||
const MAX_FRONTMATTER_RETRIES = 2;
|
||||
|
||||
@@ -85,14 +85,24 @@ async function writeStepNode(options: {
|
||||
return hash;
|
||||
}
|
||||
|
||||
type ExtractedOutput = {
|
||||
outputHash: CasRef;
|
||||
frontmatter: Record<string, unknown>;
|
||||
body: string;
|
||||
};
|
||||
|
||||
async function tryExtractOutput(
|
||||
rawOutput: string,
|
||||
outputSchema: CasRef,
|
||||
ctx: Awaited<ReturnType<typeof buildContextWithMeta>>,
|
||||
): Promise<CasRef | null> {
|
||||
): Promise<ExtractedOutput | null> {
|
||||
const fastPath = await tryFrontmatterFastPath(rawOutput, outputSchema, ctx.meta.store);
|
||||
if (fastPath !== null) {
|
||||
return fastPath.outputHash;
|
||||
return {
|
||||
outputHash: fastPath.outputHash,
|
||||
frontmatter: fastPath.frontmatter,
|
||||
body: fastPath.body,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
@@ -141,6 +151,7 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
|
||||
|
||||
const startedAtMs = Date.now();
|
||||
let agentResult = await runWithMessage("agent run failed", () => options.run(ctx));
|
||||
agentResult.output = agentResult.output.trimStart();
|
||||
|
||||
// Preserve the primary detail from the first run — it contains the full
|
||||
// tool-call turn history. Continuation retries only fix frontmatter
|
||||
@@ -148,9 +159,9 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
|
||||
const primaryDetailHash = agentResult.detailHash;
|
||||
|
||||
// Try to extract frontmatter; retry via continue if it fails
|
||||
let outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
|
||||
let extracted = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
|
||||
|
||||
for (let retry = 0; retry < MAX_FRONTMATTER_RETRIES && outputHash === null; retry++) {
|
||||
for (let retry = 0; retry < MAX_FRONTMATTER_RETRIES && extracted === null; retry++) {
|
||||
const correctionMessage =
|
||||
"Your previous response did not contain valid YAML frontmatter matching the role schema.\n" +
|
||||
"You MUST begin your response with a YAML frontmatter block (--- delimited).\n" +
|
||||
@@ -159,10 +170,11 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
|
||||
agentResult = await runWithMessage("agent continue failed", () =>
|
||||
options.continue(agentResult.sessionId, correctionMessage, ctx.meta.store),
|
||||
);
|
||||
outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
|
||||
agentResult.output = agentResult.output.trimStart();
|
||||
extracted = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
|
||||
}
|
||||
|
||||
if (outputHash === null) {
|
||||
if (extracted === null) {
|
||||
fail(
|
||||
"Agent output does not contain valid YAML frontmatter matching the role schema " +
|
||||
`after ${MAX_FRONTMATTER_RETRIES} retries.\n` +
|
||||
@@ -172,13 +184,22 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
|
||||
const completedAtMs = Date.now();
|
||||
const stepHash = await persistStep({
|
||||
ctx,
|
||||
outputHash,
|
||||
outputHash: extracted.outputHash,
|
||||
detailHash: primaryDetailHash,
|
||||
agentName: agentLabel(options.name),
|
||||
startedAtMs,
|
||||
completedAtMs,
|
||||
});
|
||||
|
||||
process.stdout.write(`${stepHash}\n`);
|
||||
const adapterOutput: AdapterOutput = {
|
||||
stepHash,
|
||||
detailHash: primaryDetailHash,
|
||||
role,
|
||||
frontmatter: extracted.frontmatter,
|
||||
body: extracted.body,
|
||||
startedAtMs,
|
||||
completedAtMs,
|
||||
};
|
||||
process.stdout.write(`${JSON.stringify(adapterOutput)}\n`);
|
||||
};
|
||||
}
|
||||
|
||||
@@ -37,6 +37,16 @@ export type AgentContinueFn = (
|
||||
|
||||
export type AgentRunFn = (ctx: AgentContext) => Promise<AgentRunResult>;
|
||||
|
||||
export type AdapterOutput = {
|
||||
stepHash: string;
|
||||
detailHash: string;
|
||||
role: string;
|
||||
frontmatter: Record<string, unknown>;
|
||||
body: string;
|
||||
startedAtMs: number;
|
||||
completedAtMs: number;
|
||||
};
|
||||
|
||||
export type AgentOptions = {
|
||||
name: string;
|
||||
run: AgentRunFn;
|
||||
|
||||
@@ -41,31 +41,13 @@ describe("parseFrontmatterMarkdown", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("full frontmatter document", () => {
|
||||
it("parses all fields from a well-formed document", () => {
|
||||
const raw = `---
|
||||
status: done
|
||||
next: reviewer
|
||||
confidence: 0.9
|
||||
artifacts:
|
||||
- src/foo.ts
|
||||
- src/bar.ts
|
||||
scope: thread
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
Everything looks good.`;
|
||||
|
||||
describe("status-only frontmatter", () => {
|
||||
it("parses status-only frontmatter", () => {
|
||||
const raw = "---\nstatus: done\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter).not.toBeNull();
|
||||
const fm = result.frontmatter!;
|
||||
expect(fm.status).toBe("done");
|
||||
expect(fm.next).toBe("reviewer");
|
||||
expect(fm.confidence).toBe(0.9);
|
||||
expect(fm.artifacts).toEqual(["src/foo.ts", "src/bar.ts"]);
|
||||
expect(fm.scope).toBe("thread");
|
||||
expect(result.body).toBe("## Summary\n\nEverything looks good.");
|
||||
expect(result.frontmatter).toEqual({ status: "done" });
|
||||
expect(result.body).toBe("body");
|
||||
});
|
||||
|
||||
it("strips leading newline from body", () => {
|
||||
@@ -87,6 +69,22 @@ Everything looks good.`;
|
||||
});
|
||||
});
|
||||
|
||||
describe("ignores legacy fields", () => {
|
||||
it("legacy fields next/confidence/artifacts/scope are NOT present on result", () => {
|
||||
const raw =
|
||||
"---\nstatus: done\nnext: reviewer\nconfidence: 0.9\nartifacts:\n - src/foo.ts\nscope: thread\n---\n\nBody.";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter).not.toBeNull();
|
||||
const fm = result.frontmatter!;
|
||||
expect(fm.status).toBe("done");
|
||||
// Legacy fields must not exist on the object at all
|
||||
expect("next" in fm).toBe(false);
|
||||
expect("confidence" in fm).toBe(false);
|
||||
expect("artifacts" in fm).toBe(false);
|
||||
expect("scope" in fm).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("status field", () => {
|
||||
it.each([
|
||||
"done",
|
||||
@@ -106,109 +104,18 @@ Everything looks good.`;
|
||||
});
|
||||
|
||||
it("returns null status when omitted", () => {
|
||||
const raw = "---\nconfidence: 0.5\n---\nbody";
|
||||
const raw = "---\nfoo: bar\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.status).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("confidence field", () => {
|
||||
it("parses integer as number", () => {
|
||||
const raw = "---\nconfidence: 1\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.confidence).toBe(1);
|
||||
});
|
||||
|
||||
it("parses decimal", () => {
|
||||
const raw = "---\nconfidence: 0.75\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.confidence).toBe(0.75);
|
||||
});
|
||||
|
||||
it("returns null when omitted", () => {
|
||||
const raw = "---\nstatus: done\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.confidence).toBeNull();
|
||||
});
|
||||
|
||||
it("returns null for non-numeric value", () => {
|
||||
const raw = "---\nconfidence: high\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.confidence).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("artifacts field", () => {
|
||||
it("parses block sequence", () => {
|
||||
const raw = "---\nartifacts:\n - a.ts\n - b.ts\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.artifacts).toEqual(["a.ts", "b.ts"]);
|
||||
});
|
||||
|
||||
it("parses inline sequence", () => {
|
||||
const raw = "---\nartifacts: [a.ts, b.ts]\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.artifacts).toEqual(["a.ts", "b.ts"]);
|
||||
});
|
||||
|
||||
it("returns empty array when omitted", () => {
|
||||
const raw = "---\nstatus: done\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.artifacts).toEqual([]);
|
||||
});
|
||||
|
||||
it("wraps single scalar in array", () => {
|
||||
const raw = "---\nartifacts: only-one.ts\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.artifacts).toEqual(["only-one.ts"]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("scope field", () => {
|
||||
it('parses scope "role"', () => {
|
||||
const raw = "---\nscope: role\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.scope).toBe("role");
|
||||
});
|
||||
|
||||
it('parses scope "thread"', () => {
|
||||
const raw = "---\nscope: thread\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.scope).toBe("thread");
|
||||
});
|
||||
|
||||
it('defaults to "role" when omitted', () => {
|
||||
const raw = "---\nstatus: done\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.scope).toBe("role");
|
||||
});
|
||||
|
||||
it('defaults to "role" for unknown scope value', () => {
|
||||
const raw = "---\nscope: global\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.scope).toBe("role");
|
||||
});
|
||||
});
|
||||
|
||||
describe("next field", () => {
|
||||
it("parses a role name", () => {
|
||||
const raw = "---\nnext: planner\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.next).toBe("planner");
|
||||
});
|
||||
|
||||
it("returns null when omitted", () => {
|
||||
const raw = "---\nstatus: done\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.next).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("unknown fields", () => {
|
||||
it("ignores unknown keys silently", () => {
|
||||
const raw = "---\nunknown_field: some_value\nstatus: done\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.status).toBe("done");
|
||||
expect(Object.keys(result.frontmatter!)).toEqual(["status"]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -221,123 +128,58 @@ Everything looks good.`;
|
||||
});
|
||||
|
||||
describe("empty frontmatter block", () => {
|
||||
it("parses empty frontmatter and uses all defaults", () => {
|
||||
it("parses empty frontmatter with status null", () => {
|
||||
const raw = "---\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter).not.toBeNull();
|
||||
const fm = result.frontmatter!;
|
||||
expect(fm.status).toBeNull();
|
||||
expect(fm.next).toBeNull();
|
||||
expect(fm.confidence).toBeNull();
|
||||
expect(fm.artifacts).toEqual([]);
|
||||
expect(fm.scope).toBe("role");
|
||||
expect(Object.keys(fm)).toEqual(["status"]);
|
||||
expect(result.body).toBe("body");
|
||||
});
|
||||
});
|
||||
|
||||
describe("AgentFrontmatter has exactly one field", () => {
|
||||
it("has only status key", () => {
|
||||
const fm: AgentFrontmatter = { status: null };
|
||||
expect(Object.keys(fm)).toEqual(["status"]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("FrontmatterValidationError only has status variant", () => {
|
||||
it("status variant is valid", () => {
|
||||
const err: import("../src/index.js").FrontmatterValidationError = {
|
||||
field: "status",
|
||||
message: "test",
|
||||
};
|
||||
expect(err.field).toBe("status");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ── validateFrontmatter ──────────────────────────────────────────────────────
|
||||
|
||||
function validFm(overrides: Partial<AgentFrontmatter> = {}): AgentFrontmatter {
|
||||
return {
|
||||
status: "done",
|
||||
next: null,
|
||||
confidence: null,
|
||||
artifacts: [],
|
||||
scope: "role",
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("validateFrontmatter", () => {
|
||||
it("returns no errors for a fully valid frontmatter", () => {
|
||||
const errors = validateFrontmatter(validFm());
|
||||
it("returns no errors for a valid status", () => {
|
||||
const errors = validateFrontmatter({ status: "done" });
|
||||
expect(errors).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("returns no errors when all nullable fields are null", () => {
|
||||
const fm: AgentFrontmatter = {
|
||||
status: null,
|
||||
next: null,
|
||||
confidence: null,
|
||||
artifacts: [],
|
||||
scope: "role",
|
||||
};
|
||||
it("returns no errors when status is null", () => {
|
||||
const errors = validateFrontmatter({ status: null });
|
||||
expect(errors).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("returns error for invalid status", () => {
|
||||
const errors = validateFrontmatter({ status: "bogus" as never });
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0]?.field).toBe("status");
|
||||
});
|
||||
|
||||
it("no validation for next/confidence/artifacts/scope — fields do not exist", () => {
|
||||
// AgentFrontmatter only has status — verify at runtime
|
||||
const fm: AgentFrontmatter = { status: "done" };
|
||||
expect(Object.keys(fm)).toEqual(["status"]);
|
||||
expect(validateFrontmatter(fm)).toHaveLength(0);
|
||||
});
|
||||
|
||||
describe("confidence validation", () => {
|
||||
it("accepts 0.0", () => {
|
||||
expect(validateFrontmatter(validFm({ confidence: 0 }))).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("accepts 1.0", () => {
|
||||
expect(validateFrontmatter(validFm({ confidence: 1 }))).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("rejects value below 0", () => {
|
||||
const errors = validateFrontmatter(validFm({ confidence: -0.1 }));
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0]?.field).toBe("confidence");
|
||||
});
|
||||
|
||||
it("rejects value above 1", () => {
|
||||
const errors = validateFrontmatter(validFm({ confidence: 1.01 }));
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0]?.field).toBe("confidence");
|
||||
});
|
||||
});
|
||||
|
||||
describe("next validation", () => {
|
||||
it("accepts a simple role name", () => {
|
||||
expect(validateFrontmatter(validFm({ next: "reviewer" }))).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("accepts kebab-case role name", () => {
|
||||
expect(validateFrontmatter(validFm({ next: "code-reviewer" }))).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("rejects role name with whitespace", () => {
|
||||
const errors = validateFrontmatter(validFm({ next: "role name" }));
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0]?.field).toBe("next");
|
||||
});
|
||||
});
|
||||
|
||||
describe("artifacts validation", () => {
|
||||
it("accepts non-empty path strings", () => {
|
||||
expect(
|
||||
validateFrontmatter(validFm({ artifacts: ["src/foo.ts", "src/bar.ts"] })),
|
||||
).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("rejects empty string artifact entries", () => {
|
||||
const errors = validateFrontmatter(validFm({ artifacts: [""] }));
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0]?.field).toBe("artifacts");
|
||||
});
|
||||
|
||||
it("rejects whitespace-only artifact entries", () => {
|
||||
const errors = validateFrontmatter(validFm({ artifacts: [" "] }));
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0]?.field).toBe("artifacts");
|
||||
});
|
||||
});
|
||||
|
||||
describe("multiple errors", () => {
|
||||
it("reports multiple violations at once", () => {
|
||||
const fm: AgentFrontmatter = {
|
||||
status: "done",
|
||||
next: "bad role",
|
||||
confidence: 2,
|
||||
artifacts: [""],
|
||||
scope: "role",
|
||||
};
|
||||
const errors = validateFrontmatter(fm);
|
||||
const fields = errors.map((e) => e.field);
|
||||
expect(fields).toContain("next");
|
||||
expect(fields).toContain("confidence");
|
||||
expect(fields).toContain("artifacts");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import type {
|
||||
AgentFrontmatter,
|
||||
FrontmatterScope,
|
||||
FrontmatterStatus,
|
||||
FrontmatterValidationError,
|
||||
ParsedFrontmatterMarkdown,
|
||||
@@ -159,40 +158,12 @@ function parseMinimalYaml(yaml: string): Record<string, YamlValue> {
|
||||
|
||||
const VALID_STATUS: readonly FrontmatterStatus[] = ["done", "needs_input", "in_progress", "failed"];
|
||||
|
||||
const VALID_SCOPE: readonly FrontmatterScope[] = ["role", "thread"];
|
||||
|
||||
function coerceStatus(raw: YamlValue): FrontmatterStatus | null {
|
||||
if (raw === null || raw === undefined) return null;
|
||||
const s = String(raw).trim().toLowerCase();
|
||||
return VALID_STATUS.includes(s as FrontmatterStatus) ? (s as FrontmatterStatus) : null;
|
||||
}
|
||||
|
||||
function coerceNext(raw: YamlValue): string | null {
|
||||
if (raw === null || raw === undefined) return null;
|
||||
const s = String(raw).trim();
|
||||
return s === "" ? null : s;
|
||||
}
|
||||
|
||||
function coerceConfidence(raw: YamlValue): number | null {
|
||||
if (raw === null || raw === undefined) return null;
|
||||
const n = typeof raw === "number" ? raw : Number(String(raw).trim());
|
||||
if (Number.isNaN(n)) return null;
|
||||
return n;
|
||||
}
|
||||
|
||||
function coerceArtifacts(raw: YamlValue): readonly string[] {
|
||||
if (raw === null || raw === undefined) return [];
|
||||
if (Array.isArray(raw)) return raw.map(String).filter((s) => s !== "");
|
||||
const s = String(raw).trim();
|
||||
return s === "" ? [] : [s];
|
||||
}
|
||||
|
||||
function coerceScope(raw: YamlValue): FrontmatterScope {
|
||||
if (raw === null || raw === undefined) return "role";
|
||||
const s = String(raw).trim().toLowerCase();
|
||||
return VALID_SCOPE.includes(s as FrontmatterScope) ? (s as FrontmatterScope) : "role";
|
||||
}
|
||||
|
||||
// ── Public API ───────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
@@ -220,10 +191,6 @@ export function parseFrontmatterMarkdown(raw: string): ParsedFrontmatterMarkdown
|
||||
|
||||
const frontmatter: AgentFrontmatter = {
|
||||
status: coerceStatus(fields.status ?? null),
|
||||
next: coerceNext(fields.next ?? null),
|
||||
confidence: coerceConfidence(fields.confidence ?? null),
|
||||
artifacts: coerceArtifacts(fields.artifacts ?? null),
|
||||
scope: coerceScope(fields.scope ?? null),
|
||||
};
|
||||
|
||||
return { frontmatter, body };
|
||||
@@ -235,11 +202,7 @@ export function parseFrontmatterMarkdown(raw: string): ParsedFrontmatterMarkdown
|
||||
* An empty array means the frontmatter is valid.
|
||||
*
|
||||
* Validated constraints:
|
||||
* - `status` — must be one of the FrontmatterStatus literals (if non-null)
|
||||
* - `confidence` — must be in [0.0, 1.0] (if non-null)
|
||||
* - `next` — must be a non-empty string with no whitespace (if non-null)
|
||||
* - `artifacts` — each entry must be a non-empty string
|
||||
* - `scope` — must be one of the FrontmatterScope literals
|
||||
* - `status` — must be one of the FrontmatterStatus literals (if non-null)
|
||||
*/
|
||||
export function validateFrontmatter(
|
||||
frontmatter: AgentFrontmatter,
|
||||
@@ -253,39 +216,5 @@ export function validateFrontmatter(
|
||||
});
|
||||
}
|
||||
|
||||
if (frontmatter.confidence !== null) {
|
||||
if (frontmatter.confidence < 0 || frontmatter.confidence > 1) {
|
||||
errors.push({
|
||||
field: "confidence",
|
||||
message: `confidence ${frontmatter.confidence} is out of range; must be between 0.0 and 1.0 inclusive`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (frontmatter.next !== null) {
|
||||
if (frontmatter.next.trim() === "") {
|
||||
errors.push({ field: "next", message: "next must be a non-empty string when present" });
|
||||
} else if (/\s/.test(frontmatter.next)) {
|
||||
errors.push({
|
||||
field: "next",
|
||||
message: `next "${frontmatter.next}" must not contain whitespace`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for (const artifact of frontmatter.artifacts) {
|
||||
if (artifact.trim() === "") {
|
||||
errors.push({ field: "artifacts", message: "artifact entries must be non-empty strings" });
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!VALID_SCOPE.includes(frontmatter.scope)) {
|
||||
errors.push({
|
||||
field: "scope",
|
||||
message: `invalid scope "${frontmatter.scope}"; must be one of: ${VALID_SCOPE.join(", ")}`,
|
||||
});
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
export { parseFrontmatterMarkdown, validateFrontmatter } from "./frontmatter-markdown.js";
|
||||
export type {
|
||||
AgentFrontmatter,
|
||||
FrontmatterScope,
|
||||
FrontmatterStatus,
|
||||
FrontmatterValidationError,
|
||||
ParsedFrontmatterMarkdown,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Frontmatter Markdown — agent output format (RFC #351 Phase 1).
|
||||
* Frontmatter Markdown — agent output format.
|
||||
*
|
||||
* An agent response is a Markdown document with an optional YAML frontmatter
|
||||
* block at the top. The frontmatter carries structured signals that the
|
||||
@@ -9,17 +9,12 @@
|
||||
*
|
||||
* ---
|
||||
* status: done
|
||||
* next: reviewer
|
||||
* confidence: 0.9
|
||||
* artifacts:
|
||||
* - src/foo.ts
|
||||
* scope: role
|
||||
* ---
|
||||
*
|
||||
* ... free-form markdown body ...
|
||||
*
|
||||
* All frontmatter fields are optional at the parse level. `validateFrontmatter`
|
||||
* enforces the constraints documented on each field below.
|
||||
* Only `status` is a standard frontmatter field. All other fields are
|
||||
* role-specific and defined by the output schema.
|
||||
*/
|
||||
|
||||
// ── Vocabulary types ─────────────────────────────────────────────────────────
|
||||
@@ -34,20 +29,12 @@
|
||||
*/
|
||||
export type FrontmatterStatus = "done" | "needs_input" | "in_progress" | "failed";
|
||||
|
||||
/**
|
||||
* Scope of frontmatter signals.
|
||||
*
|
||||
* - `role` — signals apply to the current role execution only (default)
|
||||
* - `thread` — signals are suggestions for the entire thread moderator
|
||||
*/
|
||||
export type FrontmatterScope = "role" | "thread";
|
||||
|
||||
// ── Core frontmatter schema ──────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Parsed and validated frontmatter from an agent response.
|
||||
*
|
||||
* All fields use explicit `T | null` (no optional `?:` per convention).
|
||||
* Only `status` is a standard field. All other fields are role-specific.
|
||||
*/
|
||||
export type AgentFrontmatter = {
|
||||
/**
|
||||
@@ -55,32 +42,6 @@ export type AgentFrontmatter = {
|
||||
* Null when omitted — engine treats it as "done" for backward compatibility.
|
||||
*/
|
||||
status: FrontmatterStatus | null;
|
||||
|
||||
/**
|
||||
* Suggested next role name for the moderator.
|
||||
* The moderator is NOT obligated to follow this — it is advisory only.
|
||||
* Null when the agent has no preference.
|
||||
*/
|
||||
next: string | null;
|
||||
|
||||
/**
|
||||
* Agent's self-assessed confidence in its output (0.0 – 1.0 inclusive).
|
||||
* Null when omitted.
|
||||
*/
|
||||
confidence: number | null;
|
||||
|
||||
/**
|
||||
* Relative file paths or CAS hashes the agent considers its primary outputs.
|
||||
* Used for GC ref-tracing and human-readable summaries.
|
||||
* Empty array when omitted (never null — an absent list is an empty list).
|
||||
*/
|
||||
artifacts: readonly string[];
|
||||
|
||||
/**
|
||||
* Scope of the frontmatter signals.
|
||||
* Defaults to "role" when omitted.
|
||||
*/
|
||||
scope: FrontmatterScope;
|
||||
};
|
||||
|
||||
// ── Parse output ─────────────────────────────────────────────────────────────
|
||||
@@ -103,9 +64,4 @@ export type ParsedFrontmatterMarkdown = {
|
||||
|
||||
// ── Validation error ─────────────────────────────────────────────────────────
|
||||
|
||||
export type FrontmatterValidationError =
|
||||
| { field: "status"; message: string }
|
||||
| { field: "next"; message: string }
|
||||
| { field: "confidence"; message: string }
|
||||
| { field: "artifacts"; message: string }
|
||||
| { field: "scope"; message: string };
|
||||
export type FrontmatterValidationError = { field: "status"; message: string };
|
||||
|
||||
@@ -8,7 +8,6 @@ export { generateDeveloperReference } from "./developer-reference.js";
|
||||
export { env } from "./env.js";
|
||||
export type {
|
||||
AgentFrontmatter,
|
||||
FrontmatterScope,
|
||||
FrontmatterStatus,
|
||||
FrontmatterValidationError,
|
||||
ParsedFrontmatterMarkdown,
|
||||
|
||||
Reference in New Issue
Block a user