feat(engine): add supervisor scene — opt-in LLM-based thread stop (Phase 3)

Supervisor replaces maxRounds as primary stop mechanism. Every N rounds
(configurable via supervisorInterval, default 3), the engine calls a
cheap LLM to evaluate thread progress and decide continue/stop.

- New engine/supervisor.ts: runSupervisor + parseSupervisorDecisionText
- Supervisor is opt-in: no models.supervisor configured = always continue
- WorkflowConfig gains supervisorInterval (default 3, 0 to disable)
- Engine calls supervisor after each supervisorInterval rounds
- 256 tests pass, 14 new tests for supervisor logic

Refs #110
This commit is contained in:
2026-05-08 02:38:54 +00:00
parent 085cdcd3f4
commit 014c442ed2
12 changed files with 655 additions and 17 deletions
+190
View File
@@ -96,6 +96,98 @@ async function writeExtractRegistryConfig(storageRoot: string): Promise<void> {
await writeFile(join(storageRoot, "workflow.yaml"), EXTRACT_REGISTRY_YAML, "utf8");
}
const SUPERVISOR_INTERVAL_REGISTRY_YAML = `config:
maxDepth: 3
supervisorInterval: 2
providers:
stub:
baseUrl: http://127.0.0.1:9
apiKey: test
models:
extract: stub/model
supervisor: stub/supervisor-cheap
workflows: {}
`;
const SUPERVISOR_LONG_INTERVAL_REGISTRY_YAML = `config:
maxDepth: 3
supervisorInterval: 10
providers:
stub:
baseUrl: http://127.0.0.1:9
apiKey: test
models:
extract: stub/model
supervisor: stub/supervisor-cheap
workflows: {}
`;
async function writeRegistryYaml(storageRoot: string, yaml: string): Promise<void> {
await writeFile(join(storageRoot, "workflow.yaml"), yaml, "utf8");
}
/** Extract rounds use tool_calls; supervisor uses plain `content` (no tools). */
function installMockExtractThenSupervisor(params: {
extractArgs: ReadonlyArray<Record<string, unknown>>;
supervisorContent: string;
onSupervisorCall?: () => void;
}): () => void {
const origFetch = globalThis.fetch;
let extractI = 0;
const mockFetch = async (
_input: Parameters<typeof fetch>[0],
init?: RequestInit,
): Promise<Response> => {
const body = init?.body ? (JSON.parse(String(init.body)) as Record<string, unknown>) : {};
const tools = body.tools;
const hasTools = Array.isArray(tools) && tools.length > 0;
if (hasTools) {
const args =
params.extractArgs[extractI] ?? params.extractArgs[params.extractArgs.length - 1];
if (args === undefined) {
throw new Error("installMockExtractThenSupervisor: empty extractArgs");
}
extractI += 1;
const firstTool = tools[0] as Record<string, unknown>;
const fn = firstTool.function as Record<string, unknown> | undefined;
const toolName = typeof fn?.name === "string" ? fn.name : "extract";
return new Response(
JSON.stringify({
choices: [
{
message: {
tool_calls: [
{
type: "function",
function: {
name: toolName,
arguments: JSON.stringify(args),
},
},
],
},
},
],
}),
{ status: 200, headers: { "Content-Type": "application/json" } },
);
}
params.onSupervisorCall?.();
return new Response(
JSON.stringify({
choices: [{ message: { content: params.supervisorContent } }],
}),
{ status: 200, headers: { "Content-Type": "application/json" } },
);
};
globalThis.fetch = Object.assign(mockFetch, {
preconnect: origFetch.preconnect.bind(origFetch),
}) as typeof fetch;
return () => {
globalThis.fetch = origFetch;
};
}
const demoWorkflow = createWorkflow<DemoMeta>(
{
roles: {
@@ -623,4 +715,102 @@ describe("executeThread", () => {
await rm(root, { recursive: true, force: true });
}
});
test("supervisor stops thread when interval elapses and model returns stop", async () => {
restoreFetch = installMockExtractThenSupervisor({
extractArgs: [{ plan: "do-it", files: ["a.ts"] }, { diff: "+ok" }],
supervisorContent: "stop",
});
const root = await mkdtemp(join(tmpdir(), "wf-engine-sup-stop-"));
try {
const threadId = "01KQXKW18CT8G75T53R8F4G7YG";
const hash = "C9NMV6V2TQT81";
const dataPath = join(root, "logs", hash, `${threadId}.data.jsonl`);
const infoPath = join(root, "logs", hash, `${threadId}.info.jsonl`);
await mkdir(join(root, "logs", hash), { recursive: true });
await writeRegistryYaml(root, SUPERVISOR_INTERVAL_REGISTRY_YAML);
const cas = createCasStore(join(root, "cas"));
const logger = createLogger({ sink: { kind: "file", path: infoPath } });
const ac = new AbortController();
const result = await executeThread(
demoWorkflow,
"demo-flow",
{ prompt: "supervisor-stop-case", steps: [] },
{
maxRounds: 20,
depth: 0,
signal: ac.signal,
awaitAfterEachYield: async () => {},
forkSourceThreadId: null,
prefilledDiskSteps: null,
storageRoot: root,
},
{ threadId, hash, dataJsonlPath: dataPath, infoJsonlPath: infoPath, cas },
logger,
);
expect(result.returnCode).toBe(0);
expect(result.summary).toBe("completed: supervisor stopped thread");
const dataText = await readFile(dataPath, "utf8");
const lines = dataText
.trim()
.split("\n")
.filter((l) => l !== "");
expect(lines.length).toBe(3);
} finally {
await rm(root, { recursive: true, force: true });
}
});
test("supervisor is not invoked before supervisorInterval rounds", async () => {
let supervisorCalls = 0;
restoreFetch = installMockExtractThenSupervisor({
extractArgs: [{ plan: "do-it", files: ["a.ts"] }, { diff: "+ok" }],
supervisorContent: "stop",
onSupervisorCall: () => {
supervisorCalls += 1;
},
});
const root = await mkdtemp(join(tmpdir(), "wf-engine-sup-skip-"));
try {
const threadId = "01KQXKW18CT8G75T53R8F4G7YG";
const hash = "C9NMV6V2TQT81";
const dataPath = join(root, "logs", hash, `${threadId}.data.jsonl`);
const infoPath = join(root, "logs", hash, `${threadId}.info.jsonl`);
await mkdir(join(root, "logs", hash), { recursive: true });
await writeRegistryYaml(root, SUPERVISOR_LONG_INTERVAL_REGISTRY_YAML);
const cas = createCasStore(join(root, "cas"));
const logger = createLogger({ sink: { kind: "file", path: infoPath } });
const ac = new AbortController();
const result = await executeThread(
demoWorkflow,
"demo-flow",
{ prompt: "no-supervisor-yet", steps: [] },
{
maxRounds: 20,
depth: 0,
signal: ac.signal,
awaitAfterEachYield: async () => {},
forkSourceThreadId: null,
prefilledDiskSteps: null,
storageRoot: root,
},
{ threadId, hash, dataJsonlPath: dataPath, infoJsonlPath: infoPath, cas },
logger,
);
expect(supervisorCalls).toBe(0);
expect(result.returnCode).toBe(0);
expect(result.summary).toBe("completed: moderator returned END");
} finally {
await rm(root, { recursive: true, force: true });
}
});
});