diff --git a/packages/core/tsconfig.json b/packages/core/tsconfig.json index 9036088..ac92166 100644 --- a/packages/core/tsconfig.json +++ b/packages/core/tsconfig.json @@ -3,7 +3,8 @@ "compilerOptions": { "outDir": "dist", "rootDir": "src", - "composite": false + "composite": false, + "lib": ["ES2022", "DOM"] }, "include": ["src"] } diff --git a/packages/workflow-utils/src/__tests__/extract-layer.test.ts b/packages/workflow-utils/src/__tests__/extract-layer.test.ts new file mode 100644 index 0000000..944c24e --- /dev/null +++ b/packages/workflow-utils/src/__tests__/extract-layer.test.ts @@ -0,0 +1,188 @@ +import { ExtractError } from "@uncaged/nerve-core"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { z } from "zod"; + +import { + type ZodMetaSchema, + createLlmExtractFn, + extractMetaOrThrow, +} from "../shared/extract-fn.js"; +import { llmExtractWithRetry } from "../shared/llm-extract.js"; +import { type ExtractConfigLayer, mergeExtractConfig } from "../shared/merge-extract-config.js"; + +const provider = { + baseUrl: "https://example.com/v1", + apiKey: "k", + model: "m", +}; + +function toolCallResponse(argsJson: string): { + ok: boolean; + status: number; + text: () => Promise; +} { + return { + ok: true, + status: 200, + text: async () => + JSON.stringify({ + choices: [ + { + message: { + tool_calls: [ + { + function: { + name: "extract", + arguments: argsJson, + }, + }, + ], + }, + }, + ], + }), + }; +} + +describe("mergeExtractConfig", () => { + const emptyLayer: ExtractConfigLayer = { provider: null, model: null }; + + it("resolves global-only extract settings", () => { + const result = mergeExtractConfig( + { provider: "dashscope", model: "qwen-plus" }, + emptyLayer, + emptyLayer, + ); + expect(result.ok).toBe(true); + if (!result.ok) { + return; + } + expect(result.value).toEqual({ provider: "dashscope", model: "qwen-plus" }); + }); + + it("lets agent override global provider and model", () => { + const agent: ExtractConfigLayer = { provider: "openai", model: null }; + const result = mergeExtractConfig( + { provider: "dashscope", model: "qwen-plus" }, + agent, + emptyLayer, + ); + expect(result.ok).toBe(true); + if (!result.ok) { + return; + } + expect(result.value).toEqual({ provider: "openai", model: "qwen-plus" }); + }); + + it("lets role override agent and global", () => { + const agent: ExtractConfigLayer = { provider: "openai", model: "gpt-4o" }; + const role: ExtractConfigLayer = { provider: null, model: "small" }; + const result = mergeExtractConfig({ provider: "dashscope", model: "qwen-plus" }, agent, role); + expect(result.ok).toBe(true); + if (!result.ok) { + return; + } + expect(result.value).toEqual({ provider: "openai", model: "small" }); + }); + + it("returns error when provider cannot be resolved", () => { + const result = mergeExtractConfig(null, { provider: null, model: "m" }, emptyLayer); + expect(result.ok).toBe(false); + if (result.ok) { + return; + } + expect(result.error.message).toMatch(/provider/); + }); +}); + +describe("extractMetaOrThrow + llmExtractWithRetry", () => { + afterEach(() => { + vi.unstubAllGlobals(); + vi.restoreAllMocks(); + }); + + it("extracts structured meta on success (mock LLM)", async () => { + vi.stubGlobal("fetch", vi.fn().mockResolvedValue(toolCallResponse(JSON.stringify({ n: 42 })))); + + const schema = z.object({ n: z.number() }); + const value = await extractMetaOrThrow("raw agent output", schema, { + provider, + dryRun: false, + }); + + expect(value).toEqual({ n: 42 }); + expect(fetch).toHaveBeenCalledTimes(1); + }); + + it("retries once after schema validation failure then succeeds", async () => { + const fetchMock = vi + .fn() + .mockResolvedValueOnce(toolCallResponse(JSON.stringify({ n: "bad" }))) + .mockResolvedValueOnce(toolCallResponse(JSON.stringify({ n: 99 }))); + + vi.stubGlobal("fetch", fetchMock); + + const schema = z.object({ n: z.number() }); + const value = await llmExtractWithRetry({ + text: "raw", + schema, + provider, + dryRun: false, + }); + + expect(value.ok).toBe(true); + if (!value.ok) { + return; + } + expect(value.value).toEqual({ n: 99 }); + expect(fetchMock).toHaveBeenCalledTimes(2); + + const secondBody = JSON.parse( + (fetchMock.mock.calls[1] as [string, RequestInit])[1].body as string, + ) as { + messages: Array<{ role: string; content: string }>; + }; + expect(secondBody.messages[1].content).toContain("The previous extraction attempt failed"); + }); + + it("throws ExtractError with raw and causeError after two failures", async () => { + const fetchMock = vi + .fn() + .mockResolvedValue(toolCallResponse(JSON.stringify({ n: "still-bad" }))); + + vi.stubGlobal("fetch", fetchMock); + + const schema = z.object({ n: z.number() }); + + try { + await extractMetaOrThrow("original-raw-text", schema, { provider, dryRun: false }); + expect.fail("expected ExtractError"); + } catch (e) { + expect(e).toBeInstanceOf(ExtractError); + const ex = e as ExtractError; + expect(ex.raw).toBe("original-raw-text"); + expect(ex.causeError).toBeInstanceOf(Error); + } + + expect(fetchMock).toHaveBeenCalledTimes(2); + }); +}); + +describe("createLlmExtractFn", () => { + afterEach(() => { + vi.unstubAllGlobals(); + vi.restoreAllMocks(); + }); + + it("implements ExtractFn using ZodMetaSchema", async () => { + vi.stubGlobal("fetch", vi.fn().mockResolvedValue(toolCallResponse(JSON.stringify({ k: "v" })))); + + const zod = z.object({ k: z.string() }); + const schema: ZodMetaSchema<{ k: string }> = { witness: null, zod }; + + const fn = createLlmExtractFn<{ k: string }>({ provider, dryRun: false }); + const out = await fn("input", schema); + + expect(out).toEqual({ k: "v" }); + }); +}); diff --git a/packages/workflow-utils/src/index.ts b/packages/workflow-utils/src/index.ts index a0fd111..3b80e72 100644 --- a/packages/workflow-utils/src/index.ts +++ b/packages/workflow-utils/src/index.ts @@ -4,7 +4,13 @@ export { createHermesRole } from "./role-hermes.js"; export { createLlmRole } from "./role-llm.js"; export { createReActRole } from "./role-react.js"; export { cursorAgent } from "./shared/cursor-agent.js"; -export { llmExtract } from "./shared/llm-extract.js"; +export { llmExtract, llmExtractWithRetry } from "./shared/llm-extract.js"; +export { mergeExtractConfig, type ExtractConfigLayer } from "./shared/merge-extract-config.js"; +export { + createLlmExtractFn, + extractMetaOrThrow, + type ZodMetaSchema, +} from "./shared/extract-fn.js"; export { nerveAgentContext, readNerveYaml, diff --git a/packages/workflow-utils/src/shared/extract-fn.ts b/packages/workflow-utils/src/shared/extract-fn.ts new file mode 100644 index 0000000..60fc52a --- /dev/null +++ b/packages/workflow-utils/src/shared/extract-fn.ts @@ -0,0 +1,44 @@ +import type { ExtractFn, Schema } from "@uncaged/nerve-core"; +import { ExtractError } from "@uncaged/nerve-core"; +import type { z } from "zod"; + +import type { LlmProvider } from "./llm-extract.js"; +import { llmErrorToCause, llmExtractWithRetry } from "./llm-extract.js"; + +/** + * Runtime schema for extract: core `Schema` witness plus the Zod parser used by the LLM extract path. + */ +export type ZodMetaSchema = Schema & { readonly zod: z.ZodType }; + +export async function extractMetaOrThrow( + raw: string, + zodSchema: z.ZodType, + options: { provider: LlmProvider; dryRun: boolean }, +): Promise { + const result = await llmExtractWithRetry({ + text: raw, + schema: zodSchema, + provider: options.provider, + dryRun: options.dryRun, + }); + if (result.ok) { + return result.value; + } + throw new ExtractError("Structured extraction failed after one retry", { + raw, + causeError: llmErrorToCause(result.error), + }); +} + +export function createLlmExtractFn(deps: { + provider: LlmProvider; + dryRun: boolean; +}): ExtractFn { + return async (raw, schema) => { + const extended = schema as ZodMetaSchema; + if (!("zod" in extended)) { + throw new Error("extract: schema must be a ZodMetaSchema (include zod parser)"); + } + return extractMetaOrThrow(raw, extended.zod, deps); + }; +} diff --git a/packages/workflow-utils/src/shared/llm-extract.ts b/packages/workflow-utils/src/shared/llm-extract.ts index 9be735f..e4a4e69 100644 --- a/packages/workflow-utils/src/shared/llm-extract.ts +++ b/packages/workflow-utils/src/shared/llm-extract.ts @@ -95,12 +95,45 @@ function readToolArgumentsJson(parsed: unknown, previewSource: string): Result( - options: LlmExtractOptionsInput, +async function performLlmExtract( + options: LlmExtractOptionsInput & { userContent: string }, ): Promise> { const dryRun = resolveLlmExtractDryRun(options); if (dryRun) { @@ -122,7 +155,7 @@ export async function llmExtract( role: "system" as const, content: "Extract the requested information from the provided text. Be precise.", }, - { role: "user" as const, content: options.text }, + { role: "user" as const, content: options.userContent }, ], tools: [ { @@ -188,3 +221,49 @@ export async function llmExtract( return ok(validated.data); } + +/** + * Single LLM extract attempt (backward-compatible with callers that pass `text`). + */ +export async function llmExtract( + options: LlmExtractOptionsInput, +): Promise> { + return performLlmExtract({ ...options, userContent: options.text }); +} + +/** + * Runs extract up to two times: on the first schema/tool-args parse failure, resends the agent + * output plus the error so the model can correct the tool call (RFC-003). + */ +export async function llmExtractWithRetry( + options: LlmExtractOptionsInput, +): Promise> { + const first = await performLlmExtract({ + ...options, + userContent: options.text, + }); + if (first.ok) { + return first; + } + if (!isRetryableExtractError(first.error)) { + return first; + } + + const hint = describeRetryHint(first.error); + const correction = `The previous extraction attempt failed. + +${hint} + +Respond again with a single tool call whose \`arguments\` JSON strictly matches the schema.`; + + const secondContent = `${options.text} + +--- + +${correction}`; + + return performLlmExtract({ + ...options, + userContent: secondContent, + }); +} diff --git a/packages/workflow-utils/src/shared/merge-extract-config.ts b/packages/workflow-utils/src/shared/merge-extract-config.ts new file mode 100644 index 0000000..1530922 --- /dev/null +++ b/packages/workflow-utils/src/shared/merge-extract-config.ts @@ -0,0 +1,29 @@ +import type { ExtractConfig, Result } from "@uncaged/nerve-core"; +import { err, ok } from "@uncaged/nerve-core"; + +/** + * One level in global → agent → role merge. Use `null` for a field to inherit + * from the lower-precedence layer (RFC-003). + */ +export type ExtractConfigLayer = { + provider: string | null; + model: string | null; +}; + +export function mergeExtractConfig( + global: ExtractConfig | null, + agent: ExtractConfigLayer, + role: ExtractConfigLayer, +): Result { + const provider = role.provider ?? agent.provider ?? global?.provider ?? null; + const model = role.model ?? agent.model ?? global?.model ?? null; + + if (provider === null || provider.trim() === "") { + return err(new Error("extract: unresolved provider after merge")); + } + if (model === null || model.trim() === "") { + return err(new Error("extract: unresolved model after merge")); + } + + return ok({ provider, model }); +}