feat(workflow-utils): RFC-003 Phase 3 — Extract Layer

- llmExtractWithRetry: retry-once on parse failure with error context
- mergeExtractConfig: three-level merge (global → agent → role)
- extractMetaOrThrow + createLlmExtractFn: ExtractFn factory
- ZodMetaSchema bridges core Schema<T> with runtime Zod validation
- Tests: 8 new cases (success/retry/throw/merge/factory)
- core tsconfig: add DOM lib for AbortSignal declaration emit

Closes #237
Ref: #234
This commit is contained in:
2026-04-29 04:59:47 +00:00
parent 88bd30a1e4
commit 136aafa209
6 changed files with 352 additions and 5 deletions
+2 -1
View File
@@ -3,7 +3,8 @@
"compilerOptions": {
"outDir": "dist",
"rootDir": "src",
"composite": false
"composite": false,
"lib": ["ES2022", "DOM"]
},
"include": ["src"]
}
@@ -0,0 +1,188 @@
import { ExtractError } from "@uncaged/nerve-core";
import { afterEach, describe, expect, it, vi } from "vitest";
import { z } from "zod";
import {
type ZodMetaSchema,
createLlmExtractFn,
extractMetaOrThrow,
} from "../shared/extract-fn.js";
import { llmExtractWithRetry } from "../shared/llm-extract.js";
import { type ExtractConfigLayer, mergeExtractConfig } from "../shared/merge-extract-config.js";
const provider = {
baseUrl: "https://example.com/v1",
apiKey: "k",
model: "m",
};
function toolCallResponse(argsJson: string): {
ok: boolean;
status: number;
text: () => Promise<string>;
} {
return {
ok: true,
status: 200,
text: async () =>
JSON.stringify({
choices: [
{
message: {
tool_calls: [
{
function: {
name: "extract",
arguments: argsJson,
},
},
],
},
},
],
}),
};
}
describe("mergeExtractConfig", () => {
const emptyLayer: ExtractConfigLayer = { provider: null, model: null };
it("resolves global-only extract settings", () => {
const result = mergeExtractConfig(
{ provider: "dashscope", model: "qwen-plus" },
emptyLayer,
emptyLayer,
);
expect(result.ok).toBe(true);
if (!result.ok) {
return;
}
expect(result.value).toEqual({ provider: "dashscope", model: "qwen-plus" });
});
it("lets agent override global provider and model", () => {
const agent: ExtractConfigLayer = { provider: "openai", model: null };
const result = mergeExtractConfig(
{ provider: "dashscope", model: "qwen-plus" },
agent,
emptyLayer,
);
expect(result.ok).toBe(true);
if (!result.ok) {
return;
}
expect(result.value).toEqual({ provider: "openai", model: "qwen-plus" });
});
it("lets role override agent and global", () => {
const agent: ExtractConfigLayer = { provider: "openai", model: "gpt-4o" };
const role: ExtractConfigLayer = { provider: null, model: "small" };
const result = mergeExtractConfig({ provider: "dashscope", model: "qwen-plus" }, agent, role);
expect(result.ok).toBe(true);
if (!result.ok) {
return;
}
expect(result.value).toEqual({ provider: "openai", model: "small" });
});
it("returns error when provider cannot be resolved", () => {
const result = mergeExtractConfig(null, { provider: null, model: "m" }, emptyLayer);
expect(result.ok).toBe(false);
if (result.ok) {
return;
}
expect(result.error.message).toMatch(/provider/);
});
});
describe("extractMetaOrThrow + llmExtractWithRetry", () => {
afterEach(() => {
vi.unstubAllGlobals();
vi.restoreAllMocks();
});
it("extracts structured meta on success (mock LLM)", async () => {
vi.stubGlobal("fetch", vi.fn().mockResolvedValue(toolCallResponse(JSON.stringify({ n: 42 }))));
const schema = z.object({ n: z.number() });
const value = await extractMetaOrThrow("raw agent output", schema, {
provider,
dryRun: false,
});
expect(value).toEqual({ n: 42 });
expect(fetch).toHaveBeenCalledTimes(1);
});
it("retries once after schema validation failure then succeeds", async () => {
const fetchMock = vi
.fn()
.mockResolvedValueOnce(toolCallResponse(JSON.stringify({ n: "bad" })))
.mockResolvedValueOnce(toolCallResponse(JSON.stringify({ n: 99 })));
vi.stubGlobal("fetch", fetchMock);
const schema = z.object({ n: z.number() });
const value = await llmExtractWithRetry({
text: "raw",
schema,
provider,
dryRun: false,
});
expect(value.ok).toBe(true);
if (!value.ok) {
return;
}
expect(value.value).toEqual({ n: 99 });
expect(fetchMock).toHaveBeenCalledTimes(2);
const secondBody = JSON.parse(
(fetchMock.mock.calls[1] as [string, RequestInit])[1].body as string,
) as {
messages: Array<{ role: string; content: string }>;
};
expect(secondBody.messages[1].content).toContain("The previous extraction attempt failed");
});
it("throws ExtractError with raw and causeError after two failures", async () => {
const fetchMock = vi
.fn()
.mockResolvedValue(toolCallResponse(JSON.stringify({ n: "still-bad" })));
vi.stubGlobal("fetch", fetchMock);
const schema = z.object({ n: z.number() });
try {
await extractMetaOrThrow("original-raw-text", schema, { provider, dryRun: false });
expect.fail("expected ExtractError");
} catch (e) {
expect(e).toBeInstanceOf(ExtractError);
const ex = e as ExtractError;
expect(ex.raw).toBe("original-raw-text");
expect(ex.causeError).toBeInstanceOf(Error);
}
expect(fetchMock).toHaveBeenCalledTimes(2);
});
});
describe("createLlmExtractFn", () => {
afterEach(() => {
vi.unstubAllGlobals();
vi.restoreAllMocks();
});
it("implements ExtractFn using ZodMetaSchema", async () => {
vi.stubGlobal("fetch", vi.fn().mockResolvedValue(toolCallResponse(JSON.stringify({ k: "v" }))));
const zod = z.object({ k: z.string() });
const schema: ZodMetaSchema<{ k: string }> = { witness: null, zod };
const fn = createLlmExtractFn<{ k: string }>({ provider, dryRun: false });
const out = await fn("input", schema);
expect(out).toEqual({ k: "v" });
});
});
+7 -1
View File
@@ -4,7 +4,13 @@ export { createHermesRole } from "./role-hermes.js";
export { createLlmRole } from "./role-llm.js";
export { createReActRole } from "./role-react.js";
export { cursorAgent } from "./shared/cursor-agent.js";
export { llmExtract } from "./shared/llm-extract.js";
export { llmExtract, llmExtractWithRetry } from "./shared/llm-extract.js";
export { mergeExtractConfig, type ExtractConfigLayer } from "./shared/merge-extract-config.js";
export {
createLlmExtractFn,
extractMetaOrThrow,
type ZodMetaSchema,
} from "./shared/extract-fn.js";
export {
nerveAgentContext,
readNerveYaml,
@@ -0,0 +1,44 @@
import type { ExtractFn, Schema } from "@uncaged/nerve-core";
import { ExtractError } from "@uncaged/nerve-core";
import type { z } from "zod";
import type { LlmProvider } from "./llm-extract.js";
import { llmErrorToCause, llmExtractWithRetry } from "./llm-extract.js";
/**
* Runtime schema for extract: core `Schema` witness plus the Zod parser used by the LLM extract path.
*/
export type ZodMetaSchema<T> = Schema<T> & { readonly zod: z.ZodType<T> };
export async function extractMetaOrThrow<T>(
raw: string,
zodSchema: z.ZodType<T>,
options: { provider: LlmProvider; dryRun: boolean },
): Promise<T> {
const result = await llmExtractWithRetry({
text: raw,
schema: zodSchema,
provider: options.provider,
dryRun: options.dryRun,
});
if (result.ok) {
return result.value;
}
throw new ExtractError("Structured extraction failed after one retry", {
raw,
causeError: llmErrorToCause(result.error),
});
}
export function createLlmExtractFn<T>(deps: {
provider: LlmProvider;
dryRun: boolean;
}): ExtractFn<T> {
return async (raw, schema) => {
const extended = schema as ZodMetaSchema<T>;
if (!("zod" in extended)) {
throw new Error("extract: schema must be a ZodMetaSchema (include zod parser)");
}
return extractMetaOrThrow(raw, extended.zod, deps);
};
}
@@ -95,12 +95,45 @@ function readToolArgumentsJson(parsed: unknown, previewSource: string): Result<s
return ok(argsRaw);
}
function isRetryableExtractError(error: LlmError): boolean {
return error.kind === "schema_validation_failed" || error.kind === "tool_arguments_invalid_json";
}
function describeRetryHint(error: LlmError): string {
if (error.kind === "schema_validation_failed") {
return `Schema validation failed: ${error.message}`;
}
if (error.kind === "tool_arguments_invalid_json") {
return `Tool arguments were not valid JSON: ${error.message}`;
}
return JSON.stringify(error);
}
export function llmErrorToCause(error: LlmError): Error {
switch (error.kind) {
case "http_error":
return new Error(`HTTP ${error.status}: ${error.body.slice(0, 500)}`);
case "invalid_response_json":
return new Error(error.message);
case "no_tool_call":
return new Error(`No tool call in response: ${error.preview}`);
case "tool_arguments_invalid_json":
return new Error(error.message);
case "schema_validation_failed":
return new Error(error.message);
case "network_error":
return new Error(error.message);
}
}
/**
* Calls an OpenAI-compatible chat completions API with `tool_choice` forced to a single function
* derived from a Zod v4 schema (`toJSONSchema`). Uses `fetch()` only (no shell).
*
* `userContent` is the full user message body (used for retry prompts).
*/
export async function llmExtract<T>(
options: LlmExtractOptionsInput<T>,
async function performLlmExtract<T>(
options: LlmExtractOptionsInput<T> & { userContent: string },
): Promise<Result<T, LlmError>> {
const dryRun = resolveLlmExtractDryRun(options);
if (dryRun) {
@@ -122,7 +155,7 @@ export async function llmExtract<T>(
role: "system" as const,
content: "Extract the requested information from the provided text. Be precise.",
},
{ role: "user" as const, content: options.text },
{ role: "user" as const, content: options.userContent },
],
tools: [
{
@@ -188,3 +221,49 @@ export async function llmExtract<T>(
return ok(validated.data);
}
/**
* Single LLM extract attempt (backward-compatible with callers that pass `text`).
*/
export async function llmExtract<T>(
options: LlmExtractOptionsInput<T>,
): Promise<Result<T, LlmError>> {
return performLlmExtract({ ...options, userContent: options.text });
}
/**
* Runs extract up to two times: on the first schema/tool-args parse failure, resends the agent
* output plus the error so the model can correct the tool call (RFC-003).
*/
export async function llmExtractWithRetry<T>(
options: LlmExtractOptionsInput<T>,
): Promise<Result<T, LlmError>> {
const first = await performLlmExtract({
...options,
userContent: options.text,
});
if (first.ok) {
return first;
}
if (!isRetryableExtractError(first.error)) {
return first;
}
const hint = describeRetryHint(first.error);
const correction = `The previous extraction attempt failed.
${hint}
Respond again with a single tool call whose \`arguments\` JSON strictly matches the schema.`;
const secondContent = `${options.text}
---
${correction}`;
return performLlmExtract({
...options,
userContent: secondContent,
});
}
@@ -0,0 +1,29 @@
import type { ExtractConfig, Result } from "@uncaged/nerve-core";
import { err, ok } from "@uncaged/nerve-core";
/**
* One level in global → agent → role merge. Use `null` for a field to inherit
* from the lower-precedence layer (RFC-003).
*/
export type ExtractConfigLayer = {
provider: string | null;
model: string | null;
};
export function mergeExtractConfig(
global: ExtractConfig | null,
agent: ExtractConfigLayer,
role: ExtractConfigLayer,
): Result<ExtractConfig> {
const provider = role.provider ?? agent.provider ?? global?.provider ?? null;
const model = role.model ?? agent.model ?? global?.model ?? null;
if (provider === null || provider.trim() === "") {
return err(new Error("extract: unresolved provider after merge"));
}
if (model === null || model.trim() === "") {
return err(new Error("extract: unresolved model after merge"));
}
return ok({ provider, model });
}