feat: @uncaged/workflow-role-llm — role factory + zod@4 schema

Migrated from nerve/workflow-utils:
- createRole with zod@4 schema → typed meta + JSON Schema
- createLlmAdapter — LLM provider abstraction
- llmExtract/llmExtractWithRetry — structured output extraction
- decorateRole/withDryRun/onFail — role decorators
- buildDescriptorFromRoles — auto-generate descriptor from zod schemas
- Zero nerve-core dependencies
- 83 tests pass, biome clean

Closes #9
小橘 <xiaoju@shazhou.work>
This commit is contained in:
2026-05-06 06:50:19 +00:00
parent 3467b772e6
commit c2a8f2d81b
16 changed files with 1311 additions and 1 deletions
@@ -0,0 +1,74 @@
import { describe, expect, test } from "bun:test";
import { validateWorkflowDescriptor } from "@uncaged/workflow";
import * as z from "zod/v4";
import { buildDescriptorFromRoles } from "../src/build-descriptor.js";
describe("buildDescriptorFromRoles", () => {
test("produces a descriptor that validates and includes JSON schemas per role", () => {
const schema = z.object({
title: z.string(),
count: z.number(),
});
const descriptor = buildDescriptorFromRoles({
description: "Demo workflow",
roles: {
analyst: {
name: "analyst",
schema,
description: "Analyzes input",
},
},
});
const validated = validateWorkflowDescriptor(descriptor);
expect(validated.ok).toBe(true);
if (!validated.ok) {
return;
}
expect(validated.value.description).toBe("Demo workflow");
const analyst = validated.value.roles.analyst;
expect(analyst.description).toBe("Analyzes input");
expect(analyst.schema.type).toBe("object");
const props = analyst.schema.properties as Record<string, unknown>;
expect(props.title).toMatchObject({ type: "string" });
expect(props.count).toMatchObject({ type: "number" });
});
test("uses empty description when spec.description is null", () => {
const descriptor = buildDescriptorFromRoles({
description: "W",
roles: {
x: {
name: "x",
schema: z.object({ n: z.number() }),
description: null,
},
},
});
const validated = validateWorkflowDescriptor(descriptor);
expect(validated.ok).toBe(true);
if (!validated.ok) {
return;
}
expect(validated.value.roles.x.description).toBe("");
});
test("throws when role key and spec.name diverge", () => {
expect(() =>
buildDescriptorFromRoles({
description: "W",
roles: {
a: {
name: "b",
schema: z.object({ n: z.number() }),
description: null,
},
},
}),
).toThrow(/must match spec.name/);
});
});
@@ -0,0 +1,64 @@
import { describe, expect, test } from "bun:test";
import { START, type ThreadContext } from "@uncaged/workflow";
import { createLlmAdapter } from "../src/create-llm-adapter.js";
function makeCtx(userContent: string): ThreadContext {
return {
start: {
role: START,
content: userContent,
meta: { maxRounds: 10 },
timestamp: 1,
},
steps: [],
};
}
describe("createLlmAdapter", () => {
const originalFetch = globalThis.fetch;
test("posts system + user (start.content) and returns assistant text", async () => {
globalThis.fetch = () =>
Promise.resolve(
new Response(JSON.stringify({ choices: [{ message: { content: "model reply" } }] }), {
status: 200,
headers: { "Content-Type": "application/json" },
}),
);
const provider = { baseUrl: "https://api.example/v1", apiKey: "k", model: "m" };
const adapter = createLlmAdapter(provider);
const out = await adapter(makeCtx("trigger text"), "system instructions");
globalThis.fetch = originalFetch;
expect(out).toBe("model reply");
});
test("throws on non-ok fetch response", async () => {
globalThis.fetch = () =>
Promise.resolve(
new Response("Internal Server Error", {
status: 500,
headers: { "Content-Type": "text/plain" },
}),
);
const provider = { baseUrl: "https://api.example/v1", apiKey: "k", model: "m" };
const adapter = createLlmAdapter(provider);
await expect(adapter(makeCtx("hi"), "sys")).rejects.toThrow("llm:");
globalThis.fetch = originalFetch;
});
test("throws on fetch network failure", async () => {
globalThis.fetch = () => Promise.reject(new Error("ECONNREFUSED"));
const provider = { baseUrl: "https://api.example/v1", apiKey: "k", model: "m" };
const adapter = createLlmAdapter(provider);
await expect(adapter(makeCtx("hi"), "sys")).rejects.toThrow();
globalThis.fetch = originalFetch;
});
});
@@ -0,0 +1,156 @@
import { afterEach, describe, expect, mock, spyOn, test } from "bun:test";
import type { AgentFn, ThreadContext } from "@uncaged/workflow";
import { START } from "@uncaged/workflow";
import * as z from "zod/v4";
import { createRole } from "../src/create-role.js";
import * as llmExtract from "../src/llm-extract.js";
const provider = {
baseUrl: "https://example.com/v1",
apiKey: "k",
model: "m",
};
function toolCallResponse(argsJson: string): Response {
return new Response(
JSON.stringify({
choices: [
{
message: {
tool_calls: [
{
function: {
name: "extract",
arguments: argsJson,
},
},
],
},
},
],
}),
{ status: 200, headers: { "Content-Type": "application/json" } },
);
}
function makeCtx(): ThreadContext {
return {
start: {
role: START,
content: "",
meta: { maxRounds: 10 },
timestamp: Date.now(),
},
steps: [],
};
}
describe("createRole", () => {
const originalFetch = globalThis.fetch;
afterEach(() => {
globalThis.fetch = originalFetch;
mock.restore();
});
test("runs AgentFn then structured extract", async () => {
globalThis.fetch = () => Promise.resolve(toolCallResponse(JSON.stringify({ n: 3 })));
const schema = z.object({ n: z.number() });
const agent: AgentFn = async (_ctx, prompt) => prompt;
const role = createRole({
name: "test",
schema,
systemPrompt: "hello",
agent,
extract: { provider, dryRun: null },
});
const out = await role(makeCtx());
expect(out.content).toBe("hello");
expect(out.meta).toEqual({ n: 3 });
});
test("passes ThreadContext to AgentFn", async () => {
globalThis.fetch = () => Promise.resolve(toolCallResponse(JSON.stringify({ n: 0 })));
const seen: ThreadContext[] = [];
const agent: AgentFn = async (ctx, _prompt) => {
seen.push(ctx);
return "x";
};
const role = createRole({
name: "test",
schema: z.object({ n: z.number() }),
systemPrompt: "p",
agent,
extract: { provider, dryRun: null },
});
await role(makeCtx());
expect(seen).toHaveLength(1);
expect(seen[0].steps).toEqual([]);
});
test("resolves dynamic systemPrompt functions before AgentFn", async () => {
globalThis.fetch = () => Promise.resolve(toolCallResponse(JSON.stringify({ n: 99 })));
const schema = z.object({ n: z.number() });
const agent: AgentFn = async (_ctx, prompt) => prompt;
const role = createRole({
name: "test",
schema,
systemPrompt: async (ctx) => `rounds=${ctx.steps.length}`,
agent,
extract: { provider, dryRun: null },
});
const ctx = makeCtx();
const out = await role(ctx);
expect(out.content).toBe("rounds=0");
expect(out.meta).toEqual({ n: 99 });
});
test("extract dryRun null runs live extract path", async () => {
const spy = spyOn(llmExtract, "extractMetaOrThrow").mockResolvedValue({ n: 0 });
const agent: AgentFn = async () => "raw";
const role = createRole({
name: "r1",
schema: z.object({ n: z.number() }),
systemPrompt: "p",
agent,
extract: { provider, dryRun: null },
});
await role(makeCtx());
expect(spy).toHaveBeenCalledWith(
"r1",
"raw",
expect.anything(),
expect.objectContaining({ provider, dryRun: false }),
);
});
test("extract.dryRun true uses structured extract dry-run", async () => {
const spy = spyOn(llmExtract, "extractMetaOrThrow").mockResolvedValue({ n: 0 });
const agent: AgentFn = async () => "raw";
const role = createRole({
name: "r2",
schema: z.object({ n: z.number() }),
systemPrompt: "p",
agent,
extract: { provider, dryRun: true },
});
await role(makeCtx());
expect(spy).toHaveBeenCalledWith(
"r2",
"raw",
expect.anything(),
expect.objectContaining({ dryRun: true }),
);
});
});
@@ -0,0 +1,100 @@
import { describe, expect, test } from "bun:test";
import type { Role, ThreadContext } from "@uncaged/workflow";
import { START } from "@uncaged/workflow";
import { decorateRole, onFail, withDryRun } from "../src/decorators.js";
type TestMeta = Record<string, unknown> & { ok: boolean };
function fakeCtx(): ThreadContext {
return {
start: {
role: START,
content: "",
meta: {
maxRounds: 10,
},
timestamp: Date.now(),
},
steps: [],
};
}
const successRole: Role<TestMeta> = async () => ({
content: "done",
meta: { ok: true },
});
const failRole: Role<TestMeta> = async () => {
throw new Error("boom");
};
const failNonErrorRole: Role<TestMeta> = async () => {
throw "string error";
};
describe("withDryRun", () => {
test("short-circuits on dry-run", async () => {
const dec = withDryRun<TestMeta>({ label: "test", meta: { ok: true }, dryRun: true });
const role = dec(successRole);
const result = await role(fakeCtx());
expect(result.content).toBe("[dry-run] test skipped");
expect(result.meta).toEqual({ ok: true });
});
test("delegates when not dry-run", async () => {
const innerDec = withDryRun<TestMeta>({ label: "test", meta: { ok: true }, dryRun: false });
const role = innerDec(successRole);
const result = await role(fakeCtx());
expect(result.content).toBe("done");
expect(result.meta).toEqual({ ok: true });
});
});
describe("onFail", () => {
test("passes through on success", async () => {
const dec = onFail<TestMeta>({ label: "test", meta: { ok: false } });
const role = dec(successRole);
const result = await role(fakeCtx());
expect(result.content).toBe("done");
expect(result.meta).toEqual({ ok: true });
});
test("catches Error and returns structured failure", async () => {
const dec = onFail<TestMeta>({ label: "test", meta: { ok: false } });
const role = dec(failRole);
const result = await role(fakeCtx());
expect(result.content).toBe("test failed: boom");
expect(result.meta).toEqual({ ok: false });
});
test("catches non-Error throws", async () => {
const dec = onFail<TestMeta>({ label: "test", meta: { ok: false } });
const role = dec(failNonErrorRole);
const result = await role(fakeCtx());
expect(result.content).toBe("test failed: string error");
expect(result.meta).toEqual({ ok: false });
});
});
describe("decorateRole", () => {
test("applies decorators left-to-right", async () => {
const role = decorateRole(failRole, [
withDryRun<TestMeta>({ label: "x", meta: { ok: true }, dryRun: false }),
onFail<TestMeta>({ label: "x", meta: { ok: false } }),
]);
const result = await role(fakeCtx());
expect(result.content).toBe("x failed: boom");
expect(result.meta).toEqual({ ok: false });
});
test("dry-run short-circuits before onFail", async () => {
const role = decorateRole(failRole, [
withDryRun<TestMeta>({ label: "x", meta: { ok: true }, dryRun: true }),
onFail<TestMeta>({ label: "x", meta: { ok: false } }),
]);
const result = await role(fakeCtx());
expect(result.content).toBe("[dry-run] x skipped");
expect(result.meta).toEqual({ ok: true });
});
});
@@ -0,0 +1,143 @@
import { describe, expect, test } from "bun:test";
import * as z from "zod/v4";
import { llmExtract } from "../src/llm-extract.js";
describe("llmExtract", () => {
const originalFetch = globalThis.fetch;
test("parses tool call arguments and validates with the zod schema", async () => {
const schema = z
.object({
name: z.string(),
description: z.string(),
})
.describe("Extract sense metadata from plan");
let capturedUrl: string | null = null;
let capturedInit: RequestInit | null = null;
globalThis.fetch = (input: RequestInfo | URL, init?: RequestInit) => {
capturedUrl = typeof input === "string" ? input : input.toString();
capturedInit = init ?? null;
return Promise.resolve(
new Response(
JSON.stringify({
choices: [
{
message: {
tool_calls: [
{
function: {
name: "extract",
arguments: JSON.stringify({
name: "cpu-usage",
description: "CPU load",
}),
},
},
],
},
},
],
}),
{ status: 200, headers: { "Content-Type": "application/json" } },
),
);
};
const result = await llmExtract({
text: "some plan",
schema,
provider: {
baseUrl: "https://example.com/v1",
apiKey: "k",
model: "m",
},
dryRun: false,
});
globalThis.fetch = originalFetch;
expect(result.ok).toBe(true);
if (!result.ok) {
return;
}
expect(result.value).toEqual({ name: "cpu-usage", description: "CPU load" });
expect(capturedUrl).toBe("https://example.com/v1/chat/completions");
expect(capturedInit?.method).toBe("POST");
expect(capturedInit?.headers).toMatchObject({
Authorization: "Bearer k",
"Content-Type": "application/json",
});
const body = JSON.parse(capturedInit?.body as string) as {
model: string;
tool_choice: { function: { name: string } };
};
expect(body.model).toBe("m");
expect(body.tool_choice.function.name).toBeDefined();
});
test("returns schema_validation_failed when arguments do not match the schema", async () => {
const schema = z.object({ n: z.number() });
globalThis.fetch = () =>
Promise.resolve(
new Response(
JSON.stringify({
choices: [
{
message: {
tool_calls: [
{ function: { name: "extract", arguments: JSON.stringify({ n: "oops" }) } },
],
},
},
],
}),
{ status: 200, headers: { "Content-Type": "application/json" } },
),
);
const result = await llmExtract({
text: "x",
schema,
provider: { baseUrl: "https://example.com", apiKey: "k", model: "m" },
dryRun: false,
});
globalThis.fetch = originalFetch;
expect(result.ok).toBe(false);
if (result.ok) {
return;
}
expect(result.error.kind).toBe("schema_validation_failed");
});
test("dryRun skips fetch and returns schema-shaped stub values", async () => {
let calls = 0;
globalThis.fetch = () => {
calls += 1;
return Promise.resolve(new Response("{}", { status: 200 }));
};
const schema = z.object({ n: z.number() });
const result = await llmExtract({
text: "ignored",
schema,
provider: { baseUrl: "https://example.com", apiKey: "k", model: "m" },
dryRun: true,
});
globalThis.fetch = originalFetch;
expect(calls).toBe(0);
expect(result.ok).toBe(true);
if (!result.ok) {
return;
}
expect(result.value).toEqual({ n: 0 });
});
});
+15
View File
@@ -0,0 +1,15 @@
{
"name": "@uncaged/workflow-role-llm",
"version": "0.1.0",
"type": "module",
"main": "src/index.ts",
"types": "src/index.ts",
"scripts": {
"build": "echo 'TODO'",
"test": "bun test"
},
"dependencies": {
"@uncaged/workflow": "workspace:*",
"zod": "^4.0.0"
}
}
@@ -0,0 +1,38 @@
import type { WorkflowDescriptor, WorkflowRoleSchema } from "@uncaged/workflow";
import * as z from "zod/v4";
export type RoleDescriptorInput<M extends Record<string, unknown> = Record<string, unknown>> = {
name: string;
schema: z.ZodType<M>;
/** Human-readable role description; use empty string when unknown. */
description: string | null;
};
function stripJsonSchemaMeta(json: Record<string, unknown>): WorkflowRoleSchema {
const { $schema: _drop, ...rest } = json;
return rest as WorkflowRoleSchema;
}
/**
* Builds a {@link WorkflowDescriptor} from role specs, emitting JSON Schema per role via
* `z.toJSONSchema`.
*/
export function buildDescriptorFromRoles(args: {
description: string;
roles: Record<string, RoleDescriptorInput>;
}): WorkflowDescriptor {
const roles: WorkflowDescriptor["roles"] = {};
for (const [key, spec] of Object.entries(args.roles)) {
if (spec.name !== key) {
throw new Error(
`buildDescriptorFromRoles: role key "${key}" must match spec.name "${spec.name}"`,
);
}
const rawJsonSchema = z.toJSONSchema(spec.schema) as Record<string, unknown>;
roles[key] = {
description: spec.description === null ? "" : spec.description,
schema: stripJsonSchemaMeta(rawJsonSchema),
};
}
return { description: args.description, roles };
}
@@ -0,0 +1,107 @@
import { type AgentFn, err, ok, type Result, type ThreadContext } from "@uncaged/workflow";
import type { LlmMessage, LlmProvider } from "./types.js";
export type LlmChatError =
| { kind: "http_error"; status: number; body: string }
| { kind: "invalid_response_json"; message: string }
| { kind: "network_error"; message: string }
| { kind: "empty_choices" }
| { kind: "no_assistant_text" };
function chatUrl(baseUrl: string): string {
const trimmed = baseUrl.replace(/\/+$/, "");
return `${trimmed}/chat/completions`;
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value);
}
function formatLlmChatError(e: LlmChatError): string {
return JSON.stringify(e);
}
async function fetchChatJson(
provider: LlmProvider,
body: Record<string, unknown>,
): Promise<Result<unknown, LlmChatError>> {
let response: Response;
try {
response = await fetch(chatUrl(provider.baseUrl), {
method: "POST",
headers: {
Authorization: `Bearer ${provider.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify(body),
});
} catch (cause) {
const message = cause instanceof Error ? cause.message : String(cause);
return err({ kind: "network_error", message });
}
const responseText = await response.text();
if (!response.ok) {
return err({ kind: "http_error", status: response.status, body: responseText.slice(0, 4000) });
}
let parsed: unknown;
try {
parsed = JSON.parse(responseText) as unknown;
} catch (cause) {
const message = cause instanceof Error ? cause.message : String(cause);
return err({ kind: "invalid_response_json", message });
}
return ok(parsed);
}
function parseAssistantText(parsed: unknown): Result<string, LlmChatError> {
if (!isRecord(parsed)) {
return err({ kind: "invalid_response_json", message: "Not an object" });
}
const choices = parsed.choices;
if (!Array.isArray(choices) || choices.length === 0) {
return err({ kind: "empty_choices" });
}
const c0 = choices[0];
if (!isRecord(c0)) {
return err({ kind: "empty_choices" });
}
const messageObj = c0.message;
if (!isRecord(messageObj)) {
return err({ kind: "no_assistant_text" });
}
const content = messageObj.content;
if (typeof content === "string") {
return ok(content);
}
return err({ kind: "no_assistant_text" });
}
export async function chatCompletionText(options: {
provider: LlmProvider;
messages: LlmMessage[];
}): Promise<Result<string, LlmChatError>> {
const body = { model: options.provider.model, messages: options.messages };
const res = await fetchChatJson(options.provider, body);
if (!res.ok) {
return res;
}
return parseAssistantText(res.value);
}
/** Single-turn chat adapter: system comes from `createRole` prompt; user is the thread start frame. */
export function createLlmAdapter(provider: LlmProvider): AgentFn {
return async (ctx: ThreadContext, systemPrompt: string) => {
const result = await chatCompletionText({
provider,
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: ctx.start.content },
],
});
if (!result.ok) {
throw new Error(`llm: ${formatLlmChatError(result.error)}`);
}
return result.value;
};
}
@@ -0,0 +1,35 @@
import type { AgentFn, Role, ThreadContext } from "@uncaged/workflow";
import type * as z from "zod/v4";
import { extractMetaOrThrow } from "./llm-extract.js";
import type { LlmProvider } from "./types.js";
export type CreateRoleArgs<M extends Record<string, unknown>> = {
name: string;
schema: z.ZodType<M>;
systemPrompt: string | ((ctx: ThreadContext) => Promise<string>);
agent: AgentFn;
extract: {
provider: LlmProvider;
/** When `true`, structured extract returns schema-shaped defaults. When `null`, live API extract. */
dryRun: boolean | null;
};
};
function resolveExtractDryRun(extractDryRun: boolean | null): boolean {
return extractDryRun === true;
}
/** Builds a {@link Role} from an {@link AgentFn}, system prompt, Zod meta schema, and extract wiring. */
export function createRole<M extends Record<string, unknown>>(args: CreateRoleArgs<M>): Role<M> {
return async (ctx: ThreadContext) => {
const promptText =
typeof args.systemPrompt === "string" ? args.systemPrompt : await args.systemPrompt(ctx);
const raw = await args.agent(ctx, promptText);
const meta = await extractMetaOrThrow(args.name, raw, args.schema, {
provider: args.extract.provider,
dryRun: resolveExtractDryRun(args.extract.dryRun),
});
return { content: raw, meta };
};
}
@@ -0,0 +1,63 @@
import type { Role, ThreadContext } from "@uncaged/workflow";
/** A role decorator: takes a role, returns an enhanced role. */
export type RoleDecorator<M extends Record<string, unknown>> = (role: Role<M>) => Role<M>;
/**
* Apply an ordered list of decorators to a role.
* Decorators are applied left-to-right (first in list wraps innermost).
*/
export function decorateRole<M extends Record<string, unknown>>(
role: Role<M>,
decorators: RoleDecorator<M>[],
): Role<M> {
return decorators.reduce((r, dec) => dec(r), role);
}
export type WithDryRunOptions<M extends Record<string, unknown>> = {
/** Used in skip message (e.g. "committer", "publish"). */
label: string;
/** Meta returned when dry-run skips execution. */
meta: M;
/** Adapter-level dry-run flag (e.g. from extract / wiring config). */
dryRun: boolean;
};
/** Short-circuits with a stable result when `dryRun` is true. */
export function withDryRun<M extends Record<string, unknown>>(
opts: WithDryRunOptions<M>,
): RoleDecorator<M> {
return (role) => async (ctx: ThreadContext) => {
if (opts.dryRun) {
return {
content: `[dry-run] ${opts.label} skipped`,
meta: opts.meta,
};
}
return role(ctx);
};
}
export type OnFailOptions<M extends Record<string, unknown>> = {
/** Used in failure message (e.g. "committer", "publish"). */
label: string;
/** Meta returned when the inner role throws. */
meta: M;
};
/** Catches thrown errors and converts them into a structured {@link Role} result instead of propagating. */
export function onFail<M extends Record<string, unknown>>(
opts: OnFailOptions<M>,
): RoleDecorator<M> {
return (role) => async (ctx: ThreadContext) => {
try {
return await role(ctx);
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
return {
content: `${opts.label} failed: ${msg}`,
meta: opts.meta,
};
}
};
}
+22
View File
@@ -0,0 +1,22 @@
export { buildDescriptorFromRoles, type RoleDescriptorInput } from "./build-descriptor.js";
export { chatCompletionText, createLlmAdapter, type LlmChatError } from "./create-llm-adapter.js";
export { type CreateRoleArgs, createRole } from "./create-role.js";
export {
decorateRole,
type OnFailOptions,
onFail,
type RoleDecorator,
type WithDryRunOptions,
withDryRun,
} from "./decorators.js";
export {
extractMetaOrThrow,
type LlmError,
type LlmExtractArgs,
type LlmProvider,
llmErrorToCause,
llmExtract,
llmExtractWithRetry,
} from "./llm-extract.js";
export { schemaDefaults } from "./schema-defaults.js";
export type { LlmMessage, MetaExtractConfig } from "./types.js";
@@ -0,0 +1,274 @@
import { err, ok, type Result } from "@uncaged/workflow";
import * as z from "zod/v4";
import { schemaDefaults } from "./schema-defaults.js";
import type { LlmProvider } from "./types.js";
export type { LlmProvider } from "./types.js";
export type LlmExtractArgs<T> = {
text: string;
schema: z.ZodType<T>;
provider: LlmProvider;
dryRun: boolean;
};
export type LlmError =
| { kind: "http_error"; status: number; body: string }
| { kind: "invalid_response_json"; message: string }
| { kind: "no_tool_call"; preview: string }
| { kind: "tool_arguments_invalid_json"; message: string }
| { kind: "schema_validation_failed"; message: string }
| { kind: "network_error"; message: string };
function chatCompletionsUrl(baseUrl: string): string {
const trimmed = baseUrl.replace(/\/+$/, "");
return `${trimmed}/chat/completions`;
}
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value);
}
function stripJsonSchemaMeta(json: Record<string, unknown>): Record<string, unknown> {
const { $schema: _drop, ...rest } = json;
return rest;
}
function readToolName(parametersSchema: Record<string, unknown>): string {
const title = parametersSchema.title;
if (typeof title === "string" && title.trim().length > 0) {
return title.trim();
}
return "extract";
}
function readToolDescription(parametersSchema: Record<string, unknown>): string {
const d = parametersSchema.description;
if (typeof d === "string" && d.trim().length > 0) {
return d.trim();
}
return "Extract structured data from the input text.";
}
function readToolArgumentsJson(parsed: unknown, previewSource: string): Result<string, LlmError> {
if (!isRecord(parsed)) {
return err({ kind: "invalid_response_json", message: "Top-level JSON is not an object" });
}
const choices = parsed.choices;
if (!Array.isArray(choices) || choices.length === 0) {
return err({ kind: "no_tool_call", preview: previewSource.slice(0, 500) });
}
const first = choices[0];
if (!isRecord(first)) {
return err({ kind: "no_tool_call", preview: previewSource.slice(0, 500) });
}
const messageObj = first.message;
if (!isRecord(messageObj)) {
return err({ kind: "no_tool_call", preview: previewSource.slice(0, 500) });
}
const toolCalls = messageObj.tool_calls;
if (!Array.isArray(toolCalls) || toolCalls.length === 0) {
return err({ kind: "no_tool_call", preview: previewSource.slice(0, 500) });
}
const call0 = toolCalls[0];
if (!isRecord(call0)) {
return err({ kind: "no_tool_call", preview: previewSource.slice(0, 500) });
}
const fn = call0.function;
if (!isRecord(fn)) {
return err({ kind: "no_tool_call", preview: previewSource.slice(0, 500) });
}
const argsRaw = fn.arguments;
if (typeof argsRaw !== "string") {
return err({ kind: "no_tool_call", preview: previewSource.slice(0, 500) });
}
return ok(argsRaw);
}
function isRetryableExtractError(error: LlmError): boolean {
return error.kind === "schema_validation_failed" || error.kind === "tool_arguments_invalid_json";
}
function describeRetryHint(error: LlmError): string {
if (error.kind === "schema_validation_failed") {
return `Schema validation failed: ${error.message}`;
}
if (error.kind === "tool_arguments_invalid_json") {
return `Tool arguments were not valid JSON: ${error.message}`;
}
return JSON.stringify(error);
}
export function llmErrorToCause(error: LlmError): Error {
switch (error.kind) {
case "http_error":
return new Error(`HTTP ${error.status}: ${error.body.slice(0, 500)}`);
case "invalid_response_json":
return new Error(error.message);
case "no_tool_call":
return new Error(`No tool call in response: ${error.preview}`);
case "tool_arguments_invalid_json":
return new Error(error.message);
case "schema_validation_failed":
return new Error(error.message);
case "network_error":
return new Error(error.message);
}
}
async function performLlmExtract<T>(
options: LlmExtractArgs<T> & { userContent: string },
): Promise<Result<T, LlmError>> {
if (options.dryRun) {
return ok(schemaDefaults(options.schema) as T);
}
const rawJsonSchema = z.toJSONSchema(options.schema) as Record<string, unknown>;
const parameters = stripJsonSchemaMeta(rawJsonSchema);
const toolName = readToolName(parameters);
const toolDescription = readToolDescription(parameters);
const body = {
model: options.provider.model,
messages: [
{
role: "system" as const,
content: "Extract the requested information from the provided text. Be precise.",
},
{ role: "user" as const, content: options.userContent },
],
tools: [
{
type: "function" as const,
function: {
name: toolName,
description: toolDescription,
parameters,
},
},
],
tool_choice: { type: "function" as const, function: { name: toolName } },
};
let response: Response;
try {
response = await fetch(chatCompletionsUrl(options.provider.baseUrl), {
method: "POST",
headers: {
Authorization: `Bearer ${options.provider.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify(body),
});
} catch (cause) {
const message = cause instanceof Error ? cause.message : String(cause);
return err({ kind: "network_error", message });
}
const responseText = await response.text();
if (!response.ok) {
return err({ kind: "http_error", status: response.status, body: responseText.slice(0, 4000) });
}
let parsed: unknown;
try {
parsed = JSON.parse(responseText) as unknown;
} catch (cause) {
const message = cause instanceof Error ? cause.message : String(cause);
return err({ kind: "invalid_response_json", message });
}
const argsJson = readToolArgumentsJson(parsed, responseText);
if (!argsJson.ok) {
return argsJson;
}
let argsParsed: unknown;
try {
argsParsed = JSON.parse(argsJson.value) as unknown;
} catch (cause) {
const message = cause instanceof Error ? cause.message : String(cause);
return err({ kind: "tool_arguments_invalid_json", message });
}
const validated = options.schema.safeParse(argsParsed);
if (!validated.success) {
return err({
kind: "schema_validation_failed",
message: validated.error.message,
});
}
return ok(validated.data);
}
/** Single LLM extract attempt over OpenAI-compatible chat completions with forced tool call. */
export async function llmExtract<T>(options: LlmExtractArgs<T>): Promise<Result<T, LlmError>> {
return performLlmExtract({ ...options, userContent: options.text });
}
/**
* Runs extract up to two times: on the first schema/tool-args parse failure, resends the agent
* output plus the error so the model can correct the tool call.
*/
export async function llmExtractWithRetry<T>(
options: LlmExtractArgs<T>,
): Promise<Result<T, LlmError>> {
const first = await performLlmExtract({
...options,
userContent: options.text,
});
if (first.ok) {
return first;
}
if (!isRetryableExtractError(first.error)) {
return first;
}
const hint = describeRetryHint(first.error);
const correction = `The previous extraction attempt failed.
${hint}
Respond again with a single tool call whose \`arguments\` JSON strictly matches the schema.`;
const secondContent = `${options.text}
---
${correction}`;
return performLlmExtract({
...options,
userContent: secondContent,
});
}
export async function extractMetaOrThrow<T extends Record<string, unknown>>(
roleName: string,
raw: string,
schema: z.ZodType<T>,
options: { provider: LlmProvider; dryRun: boolean },
): Promise<T> {
const result = await llmExtractWithRetry({
text: raw,
schema,
provider: options.provider,
dryRun: options.dryRun,
});
if (!result.ok) {
throw new Error(
`Role "${roleName}": structured extraction failed after retry: ${JSON.stringify(result.error)}`,
);
}
return result.value;
}
@@ -0,0 +1,190 @@
import type * as z from "zod/v4";
type ZodTypeAny = z.ZodType;
type Def = Record<string, unknown> & { type: string };
type TypeHandler = (schema: ZodTypeAny, def: Def) => unknown;
function isPlainObject(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value);
}
function isZodExactOptional(s: ZodTypeAny): boolean {
return s.constructor.name === "ZodExactOptional";
}
function resolveDefaultValue(defaultValue: unknown | (() => unknown)): unknown {
if (typeof defaultValue === "function") {
return (defaultValue as () => unknown)();
}
return defaultValue;
}
function mergeIntersection(left: unknown, right: unknown): unknown {
if (isPlainObject(left) && isPlainObject(right)) {
return { ...left, ...right };
}
return right;
}
function defaultsForObject(_schema: ZodTypeAny, def: Def): unknown {
const shape = def.shape as Record<string, ZodTypeAny> | undefined;
if (shape === undefined) {
return {};
}
const out: Record<string, unknown> = {};
for (const key of Object.keys(shape)) {
const child = shape[key];
const cdef = child.def as { type: string };
if (cdef.type === "optional") {
if (isZodExactOptional(child)) {
continue;
}
out[key] = undefined;
} else {
out[key] = schemaDefaultsInner(child);
}
}
return out;
}
function firstUnionOption(_schema: ZodTypeAny, def: Def): unknown {
const options = def.options as readonly ZodTypeAny[] | undefined;
if (options === undefined || options.length === 0) {
return null;
}
return schemaDefaultsInner(options[0]);
}
function defaultsFromNullable(_schema: ZodTypeAny, _def: Def): unknown {
return null;
}
function defaultsFromInner(_schema: ZodTypeAny, def: Def): unknown {
const inner = def.innerType as ZodTypeAny | undefined;
if (inner === undefined) {
return null;
}
return schemaDefaultsInner(inner);
}
function defaultsForPipe(_schema: ZodTypeAny, def: Def): unknown {
const out = def.out as ZodTypeAny | undefined;
if (out === undefined) {
return null;
}
return schemaDefaultsInner(out);
}
function defaultsForIntersection(_schema: ZodTypeAny, def: Def): unknown {
const left = def.left as ZodTypeAny | undefined;
const right = def.right as ZodTypeAny | undefined;
if (left === undefined || right === undefined) {
return null;
}
return mergeIntersection(schemaDefaultsInner(left), schemaDefaultsInner(right));
}
function defaultsForTuple(_schema: ZodTypeAny, def: Def): unknown {
const items = def.items as readonly ZodTypeAny[] | undefined;
if (items === undefined) {
return [];
}
return items.map((item) => schemaDefaultsInner(item));
}
function defaultsForLazy(schema: ZodTypeAny, def: Def): unknown {
const inner =
(schema as { _zod?: { innerType?: ZodTypeAny } })._zod?.innerType ??
(def.getter as (() => ZodTypeAny) | undefined)?.();
if (inner === undefined) {
return null;
}
return schemaDefaultsInner(inner);
}
function defaultsForPromise(_schema: ZodTypeAny, def: Def): unknown {
const inner = def.innerType as ZodTypeAny | undefined;
if (inner === undefined) {
return Promise.resolve(null);
}
return Promise.resolve(schemaDefaultsInner(inner));
}
function firstEnumValue(_schema: ZodTypeAny, def: Def): unknown {
const entries = def.entries as Record<string, string | number> | undefined;
if (entries === undefined) {
return null;
}
const values = Object.values(entries);
return values[0] ?? null;
}
function firstLiteralValue(_schema: ZodTypeAny, def: Def): unknown {
const values = def.values as unknown[] | undefined;
if (values === undefined || values.length === 0) {
return null;
}
return values[0];
}
const TYPE_HANDLERS: Record<string, TypeHandler> = {
string: () => "",
number: () => 0,
boolean: () => false,
bigint: () => 0n,
date: () => new Date(0),
symbol: () => Symbol(),
undefined: () => undefined,
null: () => null,
void: () => undefined,
any: () => null,
unknown: () => null,
never: () => undefined,
nan: () => Number.NaN,
array: () => [],
object: defaultsForObject,
record: () => ({}),
map: () => new Map(),
set: () => new Set(),
enum: firstEnumValue,
literal: firstLiteralValue,
optional: () => undefined,
nullable: defaultsFromNullable,
default: (_s, def) => resolveDefaultValue(def.defaultValue as unknown | (() => unknown)),
prefault: (_s, def) => resolveDefaultValue(def.defaultValue as unknown | (() => unknown)),
nonoptional: defaultsFromInner,
catch: defaultsFromInner,
success: () => false,
readonly: defaultsFromInner,
union: firstUnionOption,
xor: firstUnionOption,
intersection: defaultsForIntersection,
pipe: defaultsForPipe,
transform: () => null,
tuple: defaultsForTuple,
lazy: defaultsForLazy,
promise: defaultsForPromise,
file: () => new File([], ""),
function: () => null,
custom: () => null,
template_literal: () => "",
};
/**
* Produces a structurally valid placeholder that mirrors primitive/array/object
* shape for a Zod schema. Used for `llmExtract` dry runs so downstream code
* does not throw on `undefined` fields.
*/
export function schemaDefaults(schema: z.ZodType): unknown {
return schemaDefaultsInner(schema as ZodTypeAny);
}
function schemaDefaultsInner(schema: ZodTypeAny): unknown {
const def = schema.def as Def;
const run = TYPE_HANDLERS[def.type];
if (run === undefined) {
return null;
}
return run(schema, def);
}
+15
View File
@@ -0,0 +1,15 @@
import type * as z from "zod/v4";
export type LlmProvider = {
baseUrl: string;
apiKey: string;
model: string;
};
export type LlmMessage = { role: "system" | "user" | "assistant"; content: string };
/** Pairs an OpenAI-compatible provider with the Zod meta schema used for structured extraction. */
export type MetaExtractConfig<T> = {
provider: LlmProvider;
schema: z.ZodType<T>;
};
+10
View File
@@ -0,0 +1,10 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"rootDir": "src",
"outDir": "dist",
"composite": true
},
"include": ["src/**/*.ts"],
"references": [{ "path": "../workflow" }]
}