From e5ae9a134cb9c9e8f7821a22f937b1bcd3ccf33f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Fri, 22 May 2026 06:29:56 +0000 Subject: [PATCH 01/13] feat: register $first/$last JSONata functions in moderator Register custom $first(role) and $last(role) functions in the JSONata evaluator. These search the steps array and return the matching role's frontmatter (output) directly, replacing verbose steps[-1].output.x expressions with semantic $last('role').field syntax. - workflow-moderator: register functions via expr.registerFunction() - Updated all condition expressions in .workflows/ and examples/ - Added tests for $last, $first, and unmatched role (undefined) Fixes #376 --- .workflows/solve-issue.yaml | 12 +- examples/solve-issue.yaml | 2 +- .../__tests__/evaluate.test.ts | 122 +++++++++++++++++- packages/workflow-moderator/src/evaluate.ts | 34 ++++- 4 files changed, 157 insertions(+), 13 deletions(-) diff --git a/.workflows/solve-issue.yaml b/.workflows/solve-issue.yaml index fddb1e1..cb78960 100644 --- a/.workflows/solve-issue.yaml +++ b/.workflows/solve-issue.yaml @@ -124,22 +124,22 @@ roles: conditions: insufficientInfo: description: "Planner determined there's not enough info to proceed" - expression: "steps[-1].output.status = 'insufficient_info'" + expression: "$last('planner').status = 'insufficient_info'" devFailed: description: "Developer failed to implement" - expression: "steps[-1].output.status = 'failed'" + expression: "$last('developer').status = 'failed'" rejected: description: "Reviewer rejected the implementation" - expression: "steps[-1].output.approved = false" + expression: "$last('reviewer').approved = false" fixCode: description: "Tester found code issues" - expression: "steps[-1].output.status = 'fix_code'" + expression: "$last('tester').status = 'fix_code'" fixSpec: description: "Tester found spec issues" - expression: "steps[-1].output.status = 'fix_spec'" + expression: "$last('tester').status = 'fix_spec'" hookFailed: description: "Push hook failed" - expression: "steps[-1].output.success = false" + expression: "$last('committer').success = false" graph: $START: - role: "planner" diff --git a/examples/solve-issue.yaml b/examples/solve-issue.yaml index 299b572..4e5d65b 100644 --- a/examples/solve-issue.yaml +++ b/examples/solve-issue.yaml @@ -57,7 +57,7 @@ roles: conditions: notApproved: description: "Reviewer rejected the implementation" - expression: "steps[-1].output.approved = false" + expression: "$last('reviewer').approved = false" graph: $START: - role: "planner" diff --git a/packages/workflow-moderator/__tests__/evaluate.test.ts b/packages/workflow-moderator/__tests__/evaluate.test.ts index 54b6604..01fdd5d 100644 --- a/packages/workflow-moderator/__tests__/evaluate.test.ts +++ b/packages/workflow-moderator/__tests__/evaluate.test.ts @@ -35,11 +35,11 @@ const solveIssueWorkflow: WorkflowPayload = { conditions: { needsClarification: { description: "Planner requests clarification from user", - expression: "$exists(steps[-1].output.needsClarification)", + expression: "$exists($last('planner').needsClarification)", }, - notApproved: { + rejected: { description: "Reviewer rejected the implementation", - expression: "steps[-1].output.approved = false", + expression: "$last('reviewer').approved = false", }, }, graph: { @@ -50,7 +50,7 @@ const solveIssueWorkflow: WorkflowPayload = { ], developer: [{ role: "reviewer", condition: null }], reviewer: [ - { role: "developer", condition: "notApproved" }, + { role: "developer", condition: "rejected" }, { role: "$END", condition: null }, ], }, @@ -72,7 +72,7 @@ describe("evaluate", () => { expect(result).toEqual({ ok: true, value: "planner" }); }); - test("condition match (notApproved → developer)", async () => { + test("condition match (rejected → developer)", async () => { const context = makeContext([ { role: "reviewer", @@ -126,4 +126,116 @@ describe("evaluate", () => { const result = await evaluate(solveIssueWorkflow, context); expect(result).toEqual({ ok: true, value: "developer" }); }); + + test("$last returns most recent matching role's frontmatter", async () => { + const workflow: WorkflowPayload = { + ...solveIssueWorkflow, + conditions: { + devFailed: { + description: "Developer failed", + expression: "$last('developer').status = 'failed'", + }, + }, + graph: { + $START: [{ role: "developer", condition: null }], + developer: [ + { role: "$END", condition: "devFailed" }, + { role: "reviewer", condition: null }, + ], + }, + }; + const context = makeContext([ + { + role: "developer", + output: { status: "done" }, + detail: "1VPBG9SM5E7WK", + agent: "uwf-hermes", + }, + { + role: "reviewer", + output: { approved: false }, + detail: "2MXBG6PN4A8JR", + agent: "uwf-hermes", + }, + { + role: "developer", + output: { status: "failed" }, + detail: "3QNTH7WK8D2PA", + agent: "uwf-hermes", + }, + ]); + const result = await evaluate(workflow, context); + expect(result).toEqual({ ok: true, value: "$END" }); + }); + + test("$first returns earliest matching role's frontmatter", async () => { + const workflow: WorkflowPayload = { + ...solveIssueWorkflow, + conditions: { + firstPlanReady: { + description: "First planner run was ready", + expression: "$first('planner').status = 'ready'", + }, + }, + graph: { + $START: [{ role: "planner", condition: null }], + planner: [ + { role: "$END", condition: "firstPlanReady" }, + { role: "developer", condition: null }, + ], + }, + }; + const context = makeContext([ + { + role: "planner", + output: { status: "ready", plan: "ABC123" }, + detail: "7BQST3VW9F2MA", + agent: "uwf-hermes", + }, + { + role: "developer", + output: { status: "done" }, + detail: "1VPBG9SM5E7WK", + agent: "uwf-hermes", + }, + { + role: "planner", + output: { status: "revised", plan: "DEF456" }, + detail: "4RNMK6PX8B3WQ", + agent: "uwf-hermes", + }, + ]); + const result = await evaluate(workflow, context); + expect(result).toEqual({ ok: true, value: "$END" }); + }); + + test("$last returns undefined for unmatched role", async () => { + const workflow: WorkflowPayload = { + ...solveIssueWorkflow, + conditions: { + hasReviewer: { + description: "Reviewer has run", + expression: "$exists($last('reviewer'))", + }, + }, + graph: { + $START: [{ role: "planner", condition: null }], + planner: [ + { role: "$END", condition: "hasReviewer" }, + { role: "developer", condition: null }, + ], + }, + }; + const context = makeContext([ + { + role: "planner", + output: { status: "ready" }, + detail: "7BQST3VW9F2MA", + agent: "uwf-hermes", + }, + ]); + const result = await evaluate(workflow, context); + // no reviewer step → $exists returns false → fallback to developer + expect(result).toEqual({ ok: true, value: "developer" }); + }); }); diff --git a/packages/workflow-moderator/src/evaluate.ts b/packages/workflow-moderator/src/evaluate.ts index 48cb7c7..a37c73e 100644 --- a/packages/workflow-moderator/src/evaluate.ts +++ b/packages/workflow-moderator/src/evaluate.ts @@ -21,12 +21,44 @@ function isTruthy(value: unknown): boolean { return true; } +function findByRole( + steps: ModeratorContext["steps"], + role: string, + direction: "first" | "last", +): unknown { + if (direction === "last") { + for (let i = steps.length - 1; i >= 0; i--) { + if (steps[i].role === role) { + return steps[i].output; + } + } + } else { + for (const step of steps) { + if (step.role === role) { + return step.output; + } + } + } + return undefined; +} + async function evaluateJsonata( expression: string, context: ModeratorContext, ): Promise> { try { - const result = await jsonata(expression).evaluate(context); + const expr = jsonata(expression); + expr.registerFunction( + "first", + (role: string) => findByRole(context.steps, role, "first"), + "", + ); + expr.registerFunction( + "last", + (role: string) => findByRole(context.steps, role, "last"), + "", + ); + const result = await expr.evaluate(context); return { ok: true, value: result }; } catch (error) { return { From e67932c83c3235feda220f74ef98ce5468d61e2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Fri, 22 May 2026 07:38:14 +0000 Subject: [PATCH 02/13] fix: accept omitted condition in fallback transitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fallback transitions (last entry in graph node) omit the condition field in YAML, resulting in undefined instead of null. The validator and materializer now handle this: - validate.ts: accept undefined as valid condition value - workflow.ts: normalizeGraph() coerces undefined → null before CAS put This was broken by the graph fallback pattern introduced in #370. --- .../cli-workflow/src/commands/workflow.ts | 21 +++++++++++++++++-- packages/cli-workflow/src/validate.ts | 5 ++++- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/packages/cli-workflow/src/commands/workflow.ts b/packages/cli-workflow/src/commands/workflow.ts index 8bc1f84..782f2b5 100644 --- a/packages/cli-workflow/src/commands/workflow.ts +++ b/packages/cli-workflow/src/commands/workflow.ts @@ -2,7 +2,12 @@ import { readFile } from "node:fs/promises"; import type { JSONSchema } from "@uncaged/json-cas"; import { putSchema, validate } from "@uncaged/json-cas"; -import type { CasRef, RoleDefinition, WorkflowPayload } from "@uncaged/workflow-protocol"; +import type { + CasRef, + RoleDefinition, + Transition, + WorkflowPayload, +} from "@uncaged/workflow-protocol"; import { parse } from "yaml"; import { @@ -46,6 +51,18 @@ function isJsonSchema(value: unknown): value is JSONSchema { return typeof value === "object" && value !== null && !Array.isArray(value); } +/** Normalize graph transitions: ensure condition is null (not undefined) for fallback entries. */ +function normalizeGraph(graph: Record): Record { + const result: Record = {}; + for (const [node, transitions] of Object.entries(graph)) { + result[node] = transitions.map((t) => ({ + role: t.role, + condition: t.condition ?? null, + })); + } + return result; +} + async function resolveFrontmatterRef( uwf: UwfStore, roleName: string, @@ -84,7 +101,7 @@ export async function materializeWorkflowPayload( description: raw.description, roles, conditions: raw.conditions, - graph: raw.graph, + graph: normalizeGraph(raw.graph), }; } diff --git a/packages/cli-workflow/src/validate.ts b/packages/cli-workflow/src/validate.ts index 5a3a5cc..6cffdef 100644 --- a/packages/cli-workflow/src/validate.ts +++ b/packages/cli-workflow/src/validate.ts @@ -42,7 +42,10 @@ function isTransition(value: unknown): boolean { return false; } const condition = value.condition; - return typeof value.role === "string" && (condition === null || typeof condition === "string"); + return ( + typeof value.role === "string" && + (condition === null || condition === undefined || typeof condition === "string") + ); } function isStringRecord(value: unknown, itemCheck: (item: unknown) => boolean): boolean { From 45dacf540b00293c97e8eb7eb96bc2b1b3af3023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Fri, 22 May 2026 08:06:26 +0000 Subject: [PATCH 03/13] feat: thread step --count/-c to run multiple steps Add --count/-c flag to 'uwf thread step' for running N steps in one invocation, stopping early if $END is reached. - cmdThreadStep now loops up to count times, delegates to cmdThreadStepOnce - CLI parses -c/--count, defaults to 1 (backward compatible single output) - Validation rejects 0, negative, and non-integer counts - 7 new tests covering CLI parsing and count validation Fixes #373 Co-authored-by: uwf-hermes (solve-issue workflow) --- .../src/__tests__/thread-step-count.test.ts | 71 +++++++++++++++++++ packages/cli-workflow/src/cli.ts | 14 ++-- packages/cli-workflow/src/commands/thread.ts | 21 ++++++ 3 files changed, 102 insertions(+), 4 deletions(-) create mode 100644 packages/cli-workflow/src/__tests__/thread-step-count.test.ts diff --git a/packages/cli-workflow/src/__tests__/thread-step-count.test.ts b/packages/cli-workflow/src/__tests__/thread-step-count.test.ts new file mode 100644 index 0000000..2340f0c --- /dev/null +++ b/packages/cli-workflow/src/__tests__/thread-step-count.test.ts @@ -0,0 +1,71 @@ +import { execFileSync } from "node:child_process"; +import { join } from "node:path"; +import { describe, expect, test } from "vitest"; + +const CLI_PATH = join(import.meta.dirname, "..", "cli.js"); + +function runCli(args: string[]): { stdout: string; stderr: string; exitCode: number } { + try { + const stdout = execFileSync("bun", ["run", CLI_PATH, ...args], { + encoding: "utf8", + env: { ...process.env, WORKFLOW_STORAGE_ROOT: "/tmp/uwf-test-nonexistent" }, + stdio: ["ignore", "pipe", "pipe"], + }); + return { stdout, stderr: "", exitCode: 0 }; + } catch (e: unknown) { + const err = e as NodeJS.ErrnoException & { stdout?: string; stderr?: string; status?: number }; + return { + stdout: err.stdout ?? "", + stderr: err.stderr ?? "", + exitCode: err.status ?? 1, + }; + } +} + +describe("thread step --count CLI parsing", () => { + test("--help shows -c/--count option", () => { + const result = runCli(["thread", "step", "--help"]); + expect(result.stdout).toContain("--count"); + expect(result.stdout).toContain("-c"); + }); + + test("description says 'one or more steps'", () => { + const result = runCli(["thread", "step", "--help"]); + expect(result.stdout).toContain("one or more steps"); + }); +}); + +describe("cmdThreadStep count logic", () => { + test("count=0 fails with validation error", () => { + const result = runCli(["thread", "step", "FAKE_THREAD_ID", "-c", "0"]); + expect(result.exitCode).not.toBe(0); + expect(result.stderr).toContain("positive integer"); + }); + + test("negative count fails with validation error", () => { + const result = runCli(["thread", "step", "FAKE_THREAD_ID", "-c", "-1"]); + expect(result.exitCode).not.toBe(0); + expect(result.stderr).toContain("positive integer"); + }); + + test("non-integer count fails with validation error", () => { + const result = runCli(["thread", "step", "FAKE_THREAD_ID", "-c", "1.5"]); + expect(result.exitCode).not.toBe(0); + expect(result.stderr).toContain("positive integer"); + }); + + test("count=1 is the default (no -c flag)", () => { + // Without -c, it should attempt to run 1 step (failing on missing thread, not on count validation) + const result = runCli(["thread", "step", "FAKE_THREAD_ID"]); + expect(result.exitCode).not.toBe(0); + // Should NOT contain "positive integer" error — should fail on thread lookup instead + expect(result.stderr).not.toContain("positive integer"); + }); + + test("count=3 passes validation (fails on thread lookup)", () => { + const result = runCli(["thread", "step", "FAKE_THREAD_ID", "-c", "3"]); + expect(result.exitCode).not.toBe(0); + // Should NOT contain "positive integer" error — should fail on thread/storage lookup + expect(result.stderr).not.toContain("positive integer"); + }); +}); diff --git a/packages/cli-workflow/src/cli.ts b/packages/cli-workflow/src/cli.ts index 378c093..dd8cc72 100755 --- a/packages/cli-workflow/src/cli.ts +++ b/packages/cli-workflow/src/cli.ts @@ -108,15 +108,21 @@ thread thread .command("step") - .description("Execute one step") + .description("Execute one or more steps") .argument("", "Thread ULID") .option("--agent ", "Override agent command") - .action((threadId: string, opts: { agent: string | undefined }) => { + .option("-c, --count ", "Number of steps to run (default: 1)") + .action((threadId: string, opts: { agent: string | undefined; count: string | undefined }) => { const storageRoot = resolveStorageRoot(); runAction(async () => { const agentOverride = opts.agent ?? null; - const result = await cmdThreadStep(storageRoot, threadId, agentOverride); - writeOutput(result); + const count = opts.count !== undefined ? Number(opts.count) : 1; + const results = await cmdThreadStep(storageRoot, threadId, agentOverride, count); + if (results.length === 1) { + writeOutput(results[0]); + } else { + writeOutput(results); + } }); }); diff --git a/packages/cli-workflow/src/commands/thread.ts b/packages/cli-workflow/src/commands/thread.ts index e7146be..e7614bb 100644 --- a/packages/cli-workflow/src/commands/thread.ts +++ b/packages/cli-workflow/src/commands/thread.ts @@ -673,6 +673,27 @@ export async function cmdThreadStep( storageRoot: string, threadId: ThreadId, agentOverride: string | null, + count: number, +): Promise { + if (count < 1 || !Number.isInteger(count)) { + fail(`--count must be a positive integer, got: ${count}`); + } + + const results: StepOutput[] = []; + for (let i = 0; i < count; i++) { + const result = await cmdThreadStepOnce(storageRoot, threadId, agentOverride); + results.push(result); + if (result.done) { + break; + } + } + return results; +} + +async function cmdThreadStepOnce( + storageRoot: string, + threadId: ThreadId, + agentOverride: string | null, ): Promise { const index = await loadThreadsIndex(storageRoot); const headHash = index[threadId]; From 3b7d0564bb026ff0f1c4be64bb2170ac52feb06a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Fri, 22 May 2026 08:53:27 +0000 Subject: [PATCH 04/13] feat: uwf cas put-text for storing plain text in CAS - Register built-in text schema ({type: 'string'}) alongside workflow schemas - Add cmdCasPutText command: uwf cas put-text - Update CLI reference in workflow-util - Update solve-issue.yaml procedure to use put-text Refs #380 --- .workflows/solve-issue.yaml | 2 +- packages/cli-workflow/src/cli.ts | 12 ++++++++++++ packages/cli-workflow/src/commands/cas.ts | 11 ++++++++++- packages/cli-workflow/src/schemas.ts | 8 ++++++-- packages/workflow-util/src/cli-reference.ts | 2 ++ 5 files changed, 31 insertions(+), 4 deletions(-) diff --git a/.workflows/solve-issue.yaml b/.workflows/solve-issue.yaml index cb78960..cfc41ea 100644 --- a/.workflows/solve-issue.yaml +++ b/.workflows/solve-issue.yaml @@ -20,7 +20,7 @@ roles: 2. Revise the test spec accordingly After producing the test spec: - 1. Store it via `uwf cas put ""` and capture the returned hash + 1. Store it via `uwf cas put-text ""` and capture the returned hash 2. Put the hash in meta.plan (required when status=ready) output: "Output a brief summary of the test spec. Frontmatter must include: status (ready or insufficient_info) and plan (CAS hash of the test spec, required when status=ready)." frontmatter: diff --git a/packages/cli-workflow/src/cli.ts b/packages/cli-workflow/src/cli.ts index 378c093..06ce750 100755 --- a/packages/cli-workflow/src/cli.ts +++ b/packages/cli-workflow/src/cli.ts @@ -7,6 +7,7 @@ import { cmdCasGet, cmdCasHas, cmdCasPut, + cmdCasPutText, cmdCasRefs, cmdCasReindex, cmdCasSchemaGet, @@ -295,6 +296,17 @@ cas }); }); +cas + .command("put-text") + .description("Store a plain text string, print its hash") + .argument("", "Text content to store") + .action((text: string) => { + const storageRoot = resolveStorageRoot(); + runAction(async () => { + writeOutput(await cmdCasPutText(storageRoot, text)); + }); + }); + cas .command("has") .description("Check if a hash exists") diff --git a/packages/cli-workflow/src/commands/cas.ts b/packages/cli-workflow/src/commands/cas.ts index 2bc7b58..8f9dbeb 100644 --- a/packages/cli-workflow/src/commands/cas.ts +++ b/packages/cli-workflow/src/commands/cas.ts @@ -2,9 +2,11 @@ import { readFileSync } from "node:fs"; import { join } from "node:path"; import type { JSONSchema, Store } from "@uncaged/json-cas"; -import { bootstrap, getSchema, refs, walk } from "@uncaged/json-cas"; +import { bootstrap, getSchema, putSchema, refs, walk } from "@uncaged/json-cas"; import { createFsStore } from "@uncaged/json-cas-fs"; +import { TEXT_SCHEMA } from "../schemas.js"; + // ---- Helpers ---- function openStore(storageRoot: string): Store { @@ -121,3 +123,10 @@ export async function cmdCasSchemaGet(storageRoot: string, hash: string): Promis } return schema; } + +export async function cmdCasPutText(storageRoot: string, text: string): Promise<{ hash: string }> { + const store = openStore(storageRoot); + const typeHash = await putSchema(store, TEXT_SCHEMA); + const hash = await store.put(typeHash, text); + return { hash }; +} diff --git a/packages/cli-workflow/src/schemas.ts b/packages/cli-workflow/src/schemas.ts index 06cc087..530c2c0 100644 --- a/packages/cli-workflow/src/schemas.ts +++ b/packages/cli-workflow/src/schemas.ts @@ -2,10 +2,13 @@ import type { Hash, Store } from "@uncaged/json-cas"; import { putSchema } from "@uncaged/json-cas"; import { START_NODE_SCHEMA, STEP_NODE_SCHEMA, WORKFLOW_SCHEMA } from "@uncaged/workflow-protocol"; +export const TEXT_SCHEMA = { type: "string" as const }; + export type UwfSchemaHashes = { workflow: Hash; startNode: Hash; stepNode: Hash; + text: Hash; }; /** @@ -13,10 +16,11 @@ export type UwfSchemaHashes = { * Idempotent: safe to call on every CLI invocation. */ export async function registerUwfSchemas(store: Store): Promise { - const [workflow, startNode, stepNode] = await Promise.all([ + const [workflow, startNode, stepNode, text] = await Promise.all([ putSchema(store, WORKFLOW_SCHEMA), putSchema(store, START_NODE_SCHEMA), putSchema(store, STEP_NODE_SCHEMA), + putSchema(store, TEXT_SCHEMA), ]); - return { workflow, startNode, stepNode }; + return { workflow, startNode, stepNode, text }; } diff --git a/packages/workflow-util/src/cli-reference.ts b/packages/workflow-util/src/cli-reference.ts index f2d3881..d4a90cf 100644 --- a/packages/workflow-util/src/cli-reference.ts +++ b/packages/workflow-util/src/cli-reference.ts @@ -46,6 +46,8 @@ uwf cas get # read a CAS node (type + payload) [--timestamp] # include timestamp in output uwf cas put # store a node, print its hash # : JSON file path or inline JSON string +uwf cas put-text # store a plain text string, print its hash + # shortcut for put with the built-in text schema uwf cas has # check if a hash exists uwf cas refs # list direct CAS references from a node uwf cas walk # recursive traversal from a node From 99a2890be2ece4694142a80e0168c588c18daf2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Fri, 22 May 2026 08:58:01 +0000 Subject: [PATCH 05/13] feat: remove LLM extract fallback, require YAML frontmatter Agent output must contain valid YAML frontmatter matching the role schema. If frontmatter parsing fails, the step fails immediately with a clear error instead of falling back to an LLM extraction that can fabricate values. The extract module remains as a public API export but is no longer used in the agent run loop. Breaking change: agents that relied on LLM extraction to produce valid output will now fail. They must output proper frontmatter. --- packages/workflow-agent-kit/src/run.ts | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/packages/workflow-agent-kit/src/run.ts b/packages/workflow-agent-kit/src/run.ts index f6ba1ab..f011fe1 100644 --- a/packages/workflow-agent-kit/src/run.ts +++ b/packages/workflow-agent-kit/src/run.ts @@ -3,10 +3,9 @@ import type { CasRef, StepNodePayload, ThreadId } from "@uncaged/workflow-protoc import { config as loadDotenv } from "dotenv"; import { buildOutputFormatInstruction } from "./build-output-format-instruction.js"; import { buildContextWithMeta } from "./context.js"; -import { extract } from "./extract.js"; import { tryFrontmatterFastPath } from "./frontmatter.js"; import type { AgentStore } from "./storage.js"; -import { getEnvPath, loadWorkflowConfig, resolveStorageRoot } from "./storage.js"; +import { getEnvPath, resolveStorageRoot } from "./storage.js"; import type { AgentContext, AgentOptions, AgentRunResult } from "./types.js"; function fail(message: string): never { @@ -73,24 +72,19 @@ async function runAgent(options: AgentOptions, ctx: AgentContext): Promise>, ): Promise { - const fastPath = await runWithMessage("frontmatter fast path", () => - tryFrontmatterFastPath(rawOutput, outputSchema, ctx.meta.store), - ).catch(() => null); + const fastPath = await tryFrontmatterFastPath(rawOutput, outputSchema, ctx.meta.store); if (fastPath !== null) { return fastPath.outputHash; } - const config = await runWithMessage("failed to load config", () => - loadWorkflowConfig(storageRoot), + fail( + "Agent output does not contain valid YAML frontmatter matching the role schema.\n" + + "The agent must output a YAML frontmatter block (--- delimited) as the first thing in its response.\n" + + `Raw output (first 500 chars): ${rawOutput.slice(0, 500)}`, ); - const extracted = await runWithMessage("extract failed", () => - extract(rawOutput, outputSchema, config), - ); - return extracted.hash; } async function persistStep(options: { @@ -136,12 +130,7 @@ export function createAgent(options: AgentOptions): () => Promise { } const agentResult = await runAgent(options, ctx); - const outputHash = await extractOutput( - agentResult.output, - roleDef.frontmatter, - storageRoot, - ctx, - ); + const outputHash = await extractOutput(agentResult.output, roleDef.frontmatter, ctx); const stepHash = await persistStep({ ctx, outputHash, From a803fcb4fc06224b06f1b8e7182d747abfb358d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Fri, 22 May 2026 09:04:34 +0000 Subject: [PATCH 06/13] =?UTF-8?q?fix:=20solve-issue.yaml=20meta.plan=20?= =?UTF-8?q?=E2=86=92=20frontmatter.plan?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follows #375 rename. --- .workflows/solve-issue.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.workflows/solve-issue.yaml b/.workflows/solve-issue.yaml index cfc41ea..8fab9f4 100644 --- a/.workflows/solve-issue.yaml +++ b/.workflows/solve-issue.yaml @@ -21,7 +21,7 @@ roles: After producing the test spec: 1. Store it via `uwf cas put-text ""` and capture the returned hash - 2. Put the hash in meta.plan (required when status=ready) + 2. Put the hash in frontmatter.plan (required when status=ready) output: "Output a brief summary of the test spec. Frontmatter must include: status (ready or insufficient_info) and plan (CAS hash of the test spec, required when status=ready)." frontmatter: type: object From 7ff90cef4fa73acefba0ac8d591fead7b64ab52d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Fri, 22 May 2026 09:13:05 +0000 Subject: [PATCH 07/13] =?UTF-8?q?feat:=20agent=20session=20protocol=20?= =?UTF-8?q?=E2=80=94=20sessionId=20in=20result,=20continue=20support,=20fr?= =?UTF-8?q?ontmatter=20retry?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Breaking changes: - AgentRunResult now requires sessionId field - AgentOptions now requires continue function - Agent CLI outputs JSON {stepHash, sessionId} instead of plain CAS hash - Engine parses JSON output (with legacy CAS hash fallback) New features: - Frontmatter validation retry: if agent output lacks valid frontmatter, engine calls agent.continue() up to 2 times with correction message - Session tracking: sessionId flows from agent → engine → StepOutput - Hermes agent: session parse failure is now a hard error (no raw text fallback) - Hermes agent: supports --resume for continue sessions Closes #384 --- packages/cli-workflow/src/commands/thread.ts | 30 +++++-- packages/workflow-agent-hermes/src/hermes.ts | 90 ++++++++++++++------ packages/workflow-agent-kit/src/index.ts | 8 +- packages/workflow-agent-kit/src/run.ts | 60 +++++++++---- packages/workflow-agent-kit/src/types.ts | 8 ++ packages/workflow-protocol/src/types.ts | 1 + 6 files changed, 149 insertions(+), 48 deletions(-) diff --git a/packages/cli-workflow/src/commands/thread.ts b/packages/cli-workflow/src/commands/thread.ts index e7146be..b1a6c51 100644 --- a/packages/cli-workflow/src/commands/thread.ts +++ b/packages/cli-workflow/src/commands/thread.ts @@ -624,7 +624,12 @@ function resolveAgentConfig( return agentConfig; } -function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): CasRef { +type SpawnAgentResult = { + stepHash: CasRef; + sessionId: string; +}; + +function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): SpawnAgentResult { const argv = [...agent.args, threadId, role]; let stdout: string; try { @@ -646,10 +651,24 @@ function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): CasRe } const line = stdout.trim().split("\n").pop()?.trim() ?? ""; - if (!isCasRef(line)) { - fail(`agent stdout is not a valid CAS hash: ${line || "(empty)"}`); + + // Try JSON output first (new protocol) + try { + const parsed = JSON.parse(line) as Record; + const stepHash = parsed.stepHash; + const sessionId = parsed.sessionId; + if (typeof stepHash === "string" && isCasRef(stepHash) && typeof sessionId === "string") { + return { stepHash, sessionId }; + } + } catch { + // Not JSON — fall through to legacy CAS hash parsing } - return line; + + // Legacy: plain CAS hash on stdout + if (!isCasRef(line)) { + fail(`agent stdout is not a valid CAS hash or JSON: ${line || "(empty)"}`); + } + return { stepHash: line, sessionId: "" }; } async function archiveThread( @@ -706,7 +725,7 @@ export async function cmdThreadStep( const agent = resolveAgentConfig(config, workflow, role, agentOverride); loadDotenv({ path: getEnvPath(storageRoot) }); - const newHead = spawnAgent(agent, threadId, role); + const { stepHash: newHead, sessionId } = spawnAgent(agent, threadId, role); // Re-create store to pick up nodes written by the agent subprocess const uwfAfter = await createUwfStore(storageRoot); @@ -737,6 +756,7 @@ export async function cmdThreadStep( thread: threadId, head: newHead, done, + sessionId, }; } diff --git a/packages/workflow-agent-hermes/src/hermes.ts b/packages/workflow-agent-hermes/src/hermes.ts index 3311da8..5264934 100644 --- a/packages/workflow-agent-hermes/src/hermes.ts +++ b/packages/workflow-agent-hermes/src/hermes.ts @@ -1,4 +1,5 @@ import { spawn } from "node:child_process"; +import type { Store } from "@uncaged/json-cas"; import { type AgentContext, @@ -10,7 +11,6 @@ import { import { loadHermesSession, parseSessionIdFromStdout, - storeHermesRawOutput, storeHermesSessionDetail, } from "./session-detail.js"; @@ -52,17 +52,8 @@ export function buildHermesPrompt(ctx: AgentContext): string { return parts.join("\n"); } -function spawnHermesChat(prompt: string): Promise<{ stdout: string; stderr: string }> { +function spawnHermes(args: string[]): Promise<{ stdout: string; stderr: string }> { return new Promise((resolve, reject) => { - const args = [ - "chat", - "-q", - prompt, - "--yolo", - "--max-turns", - String(HERMES_MAX_TURNS), - "--quiet", - ]; const child = spawn(HERMES_COMMAND, args, { env: process.env, shell: false, @@ -94,23 +85,73 @@ function spawnHermesChat(prompt: string): Promise<{ stdout: string; stderr: stri }); } +function spawnHermesChat(prompt: string): Promise<{ stdout: string; stderr: string }> { + return spawnHermes([ + "chat", + "-q", + prompt, + "--yolo", + "--max-turns", + String(HERMES_MAX_TURNS), + "--quiet", + ]); +} + +function spawnHermesResume( + sessionId: string, + message: string, +): Promise<{ stdout: string; stderr: string }> { + return spawnHermes([ + "chat", + "--resume", + sessionId, + "-q", + message, + "--yolo", + "--max-turns", + String(HERMES_MAX_TURNS), + "--quiet", + ]); +} + +function parseSessionId(stdout: string, stderr: string): string { + const sessionId = parseSessionIdFromStdout(stderr) ?? parseSessionIdFromStdout(stdout); + if (sessionId === null) { + throw new Error( + "Failed to parse session_id from hermes output.\n" + + `stderr (first 200 chars): ${stderr.slice(0, 200)}\n` + + `stdout (first 200 chars): ${stdout.slice(0, 200)}`, + ); + } + return sessionId; +} + +async function buildResultFromSession(sessionId: string, store: Store): Promise { + const session = await loadHermesSession(sessionId); + if (session === null) { + throw new Error(`Failed to load hermes session file for session_id: ${sessionId}`); + } + const { detailHash, output } = await storeHermesSessionDetail(store, session); + return { output, detailHash, sessionId }; +} + async function runHermes(ctx: AgentContext): Promise { const fullPrompt = buildHermesPrompt(ctx); const { stdout, stderr } = await spawnHermesChat(fullPrompt); - const { store } = ctx; + const sessionId = parseSessionId(stdout, stderr); + return buildResultFromSession(sessionId, ctx.store); +} - // --quiet mode: session_id may be on stdout or stderr - const sessionId = parseSessionIdFromStdout(stderr) ?? parseSessionIdFromStdout(stdout); - if (sessionId !== null) { - const session = await loadHermesSession(sessionId); - if (session !== null) { - const { detailHash, output } = await storeHermesSessionDetail(store, session); - return { output, detailHash }; - } - } - - const detailHash = await storeHermesRawOutput(store, stdout); - return { output: stdout, detailHash }; +async function continueHermes( + sessionId: string, + message: string, + store: Store, +): Promise { + const { stdout, stderr } = await spawnHermesResume(sessionId, message); + // Resume may return a new session_id + const newSessionId = parseSessionIdFromStdout(stderr) ?? parseSessionIdFromStdout(stdout); + const resolvedId = newSessionId ?? sessionId; + return buildResultFromSession(resolvedId, store); } /** Agent CLI factory: parses argv, runs Hermes, extracts output, writes StepNode. */ @@ -118,5 +159,6 @@ export function createHermesAgent(): () => Promise { return createAgent({ name: "hermes", run: runHermes, + continue: continueHermes, }); } diff --git a/packages/workflow-agent-kit/src/index.ts b/packages/workflow-agent-kit/src/index.ts index 778d3f7..97fa132 100644 --- a/packages/workflow-agent-kit/src/index.ts +++ b/packages/workflow-agent-kit/src/index.ts @@ -12,4 +12,10 @@ export type { FrontmatterFastPathResult } from "./frontmatter.js"; export { tryFrontmatterFastPath } from "./frontmatter.js"; export { createAgent } from "./run.js"; export { getConfigPath, getEnvPath, loadWorkflowConfig } from "./storage.js"; -export type { AgentContext, AgentOptions, AgentRunFn, AgentRunResult } from "./types.js"; +export type { + AgentContext, + AgentContinueFn, + AgentOptions, + AgentRunFn, + AgentRunResult, +} from "./types.js"; diff --git a/packages/workflow-agent-kit/src/run.ts b/packages/workflow-agent-kit/src/run.ts index f011fe1..0083d21 100644 --- a/packages/workflow-agent-kit/src/run.ts +++ b/packages/workflow-agent-kit/src/run.ts @@ -6,7 +6,9 @@ import { buildContextWithMeta } from "./context.js"; import { tryFrontmatterFastPath } from "./frontmatter.js"; import type { AgentStore } from "./storage.js"; import { getEnvPath, resolveStorageRoot } from "./storage.js"; -import type { AgentContext, AgentOptions, AgentRunResult } from "./types.js"; +import type { AgentOptions } from "./types.js"; + +const MAX_FRONTMATTER_RETRIES = 2; function fail(message: string): never { process.stderr.write(`${message}\n`); @@ -65,26 +67,16 @@ async function writeStepNode(options: { return hash; } -async function runAgent(options: AgentOptions, ctx: AgentContext): Promise { - return runWithMessage("agent run failed", () => options.run(ctx)); -} - -async function extractOutput( +async function tryExtractOutput( rawOutput: string, outputSchema: CasRef, ctx: Awaited>, -): Promise { +): Promise { const fastPath = await tryFrontmatterFastPath(rawOutput, outputSchema, ctx.meta.store); - if (fastPath !== null) { return fastPath.outputHash; } - - fail( - "Agent output does not contain valid YAML frontmatter matching the role schema.\n" + - "The agent must output a YAML frontmatter block (--- delimited) as the first thing in its response.\n" + - `Raw output (first 500 chars): ${rawOutput.slice(0, 500)}`, - ); + return null; } async function persistStep(options: { @@ -106,10 +98,18 @@ async function persistStep(options: { }); } +export type AgentCliOutput = { + stepHash: CasRef; + sessionId: string; +}; + /** * Create an agent CLI entrypoint. * Parses argv (` `), runs the agent, extracts structured output, - * writes StepNode to CAS, and prints the new node hash to stdout. + * writes StepNode to CAS, and prints JSON result to stdout. + * + * If frontmatter extraction fails, retries up to MAX_FRONTMATTER_RETRIES times + * by calling agent.continue() with a correction message. */ export function createAgent(options: AgentOptions): () => Promise { return async function main(): Promise { @@ -129,8 +129,31 @@ export function createAgent(options: AgentOptions): () => Promise { ctx.outputFormatInstruction = buildOutputFormatInstruction(frontmatterSchema); } - const agentResult = await runAgent(options, ctx); - const outputHash = await extractOutput(agentResult.output, roleDef.frontmatter, ctx); + let agentResult = await runWithMessage("agent run failed", () => options.run(ctx)); + + // Try to extract frontmatter; retry via continue if it fails + let outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx); + + for (let retry = 0; retry < MAX_FRONTMATTER_RETRIES && outputHash === null; retry++) { + const correctionMessage = + "Your previous response did not contain valid YAML frontmatter matching the role schema.\n" + + "You MUST begin your response with a YAML frontmatter block (--- delimited).\n" + + "Please output ONLY the corrected frontmatter block followed by your work."; + + agentResult = await runWithMessage("agent continue failed", () => + options.continue(agentResult.sessionId, correctionMessage, ctx.meta.store), + ); + outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx); + } + + if (outputHash === null) { + fail( + "Agent output does not contain valid YAML frontmatter matching the role schema " + + `after ${MAX_FRONTMATTER_RETRIES} retries.\n` + + `Raw output (first 500 chars): ${agentResult.output.slice(0, 500)}`, + ); + } + const stepHash = await persistStep({ ctx, outputHash, @@ -138,6 +161,7 @@ export function createAgent(options: AgentOptions): () => Promise { agentName: agentLabel(options.name), }); - process.stdout.write(`${stepHash}\n`); + const result: AgentCliOutput = { stepHash, sessionId: agentResult.sessionId }; + process.stdout.write(`${JSON.stringify(result)}\n`); }; } diff --git a/packages/workflow-agent-kit/src/types.ts b/packages/workflow-agent-kit/src/types.ts index 8c57d0b..c959524 100644 --- a/packages/workflow-agent-kit/src/types.ts +++ b/packages/workflow-agent-kit/src/types.ts @@ -17,11 +17,19 @@ export type AgentContext = ModeratorContext & { export type AgentRunResult = { output: string; detailHash: string; + sessionId: string; }; +export type AgentContinueFn = ( + sessionId: string, + message: string, + store: AgentContext["store"], +) => Promise; + export type AgentRunFn = (ctx: AgentContext) => Promise; export type AgentOptions = { name: string; run: AgentRunFn; + continue: AgentContinueFn; }; diff --git a/packages/workflow-protocol/src/types.ts b/packages/workflow-protocol/src/types.ts index 8960f0c..01e3ec8 100644 --- a/packages/workflow-protocol/src/types.ts +++ b/packages/workflow-protocol/src/types.ts @@ -81,6 +81,7 @@ export type StepOutput = { thread: ThreadId; head: CasRef; done: boolean; + sessionId?: string; }; /** uwf thread steps — single step entry */ From f0f840e6e02f65a4b56b12a2b590e8bf642a13be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Fri, 22 May 2026 09:16:13 +0000 Subject: [PATCH 08/13] =?UTF-8?q?fix:=20StepOutput.sessionId=20=E2=86=92?= =?UTF-8?q?=20string=20|=20null,=20legacy=20fallback=20=E2=86=92=20null?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/cli-workflow/src/commands/thread.ts | 7 +++++-- packages/workflow-protocol/src/types.ts | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/packages/cli-workflow/src/commands/thread.ts b/packages/cli-workflow/src/commands/thread.ts index b1a6c51..f695e30 100644 --- a/packages/cli-workflow/src/commands/thread.ts +++ b/packages/cli-workflow/src/commands/thread.ts @@ -200,6 +200,7 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr thread: threadId, head: activeHead, done: false, + sessionId: null, }; } @@ -210,6 +211,7 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr thread: threadId, head: hist.head, done: true, + sessionId: null, }; } @@ -626,7 +628,7 @@ function resolveAgentConfig( type SpawnAgentResult = { stepHash: CasRef; - sessionId: string; + sessionId: string | null; }; function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): SpawnAgentResult { @@ -668,7 +670,7 @@ function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): Spawn if (!isCasRef(line)) { fail(`agent stdout is not a valid CAS hash or JSON: ${line || "(empty)"}`); } - return { stepHash: line, sessionId: "" }; + return { stepHash: line, sessionId: null }; } async function archiveThread( @@ -717,6 +719,7 @@ export async function cmdThreadStep( thread: threadId, head: headHash, done: true, + sessionId: null, }; } diff --git a/packages/workflow-protocol/src/types.ts b/packages/workflow-protocol/src/types.ts index 01e3ec8..ec43431 100644 --- a/packages/workflow-protocol/src/types.ts +++ b/packages/workflow-protocol/src/types.ts @@ -81,7 +81,7 @@ export type StepOutput = { thread: ThreadId; head: CasRef; done: boolean; - sessionId?: string; + sessionId: string | null; }; /** uwf thread steps — single step entry */ From 487c48effaddafecd6c1e6671d57317c6761534b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Fri, 22 May 2026 09:39:36 +0000 Subject: [PATCH 09/13] fix: revert output protocol changes from #385 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent CLI outputs plain CAS hash (not JSON), engine parses plain hash. StepOutput no longer carries sessionId — session info is already in CAS detail. Keeps the valuable parts of #385: sessionId in AgentRunResult (process-internal), continue support, and frontmatter retry loop. --- packages/cli-workflow/src/commands/thread.ts | 31 +++----------------- packages/workflow-agent-kit/src/run.ts | 16 +--------- packages/workflow-protocol/src/types.ts | 1 - 3 files changed, 5 insertions(+), 43 deletions(-) diff --git a/packages/cli-workflow/src/commands/thread.ts b/packages/cli-workflow/src/commands/thread.ts index f695e30..e7146be 100644 --- a/packages/cli-workflow/src/commands/thread.ts +++ b/packages/cli-workflow/src/commands/thread.ts @@ -200,7 +200,6 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr thread: threadId, head: activeHead, done: false, - sessionId: null, }; } @@ -211,7 +210,6 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr thread: threadId, head: hist.head, done: true, - sessionId: null, }; } @@ -626,12 +624,7 @@ function resolveAgentConfig( return agentConfig; } -type SpawnAgentResult = { - stepHash: CasRef; - sessionId: string | null; -}; - -function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): SpawnAgentResult { +function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): CasRef { const argv = [...agent.args, threadId, role]; let stdout: string; try { @@ -653,24 +646,10 @@ function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): Spawn } const line = stdout.trim().split("\n").pop()?.trim() ?? ""; - - // Try JSON output first (new protocol) - try { - const parsed = JSON.parse(line) as Record; - const stepHash = parsed.stepHash; - const sessionId = parsed.sessionId; - if (typeof stepHash === "string" && isCasRef(stepHash) && typeof sessionId === "string") { - return { stepHash, sessionId }; - } - } catch { - // Not JSON — fall through to legacy CAS hash parsing - } - - // Legacy: plain CAS hash on stdout if (!isCasRef(line)) { - fail(`agent stdout is not a valid CAS hash or JSON: ${line || "(empty)"}`); + fail(`agent stdout is not a valid CAS hash: ${line || "(empty)"}`); } - return { stepHash: line, sessionId: null }; + return line; } async function archiveThread( @@ -719,7 +698,6 @@ export async function cmdThreadStep( thread: threadId, head: headHash, done: true, - sessionId: null, }; } @@ -728,7 +706,7 @@ export async function cmdThreadStep( const agent = resolveAgentConfig(config, workflow, role, agentOverride); loadDotenv({ path: getEnvPath(storageRoot) }); - const { stepHash: newHead, sessionId } = spawnAgent(agent, threadId, role); + const newHead = spawnAgent(agent, threadId, role); // Re-create store to pick up nodes written by the agent subprocess const uwfAfter = await createUwfStore(storageRoot); @@ -759,7 +737,6 @@ export async function cmdThreadStep( thread: threadId, head: newHead, done, - sessionId, }; } diff --git a/packages/workflow-agent-kit/src/run.ts b/packages/workflow-agent-kit/src/run.ts index 0083d21..73f7e58 100644 --- a/packages/workflow-agent-kit/src/run.ts +++ b/packages/workflow-agent-kit/src/run.ts @@ -98,19 +98,6 @@ async function persistStep(options: { }); } -export type AgentCliOutput = { - stepHash: CasRef; - sessionId: string; -}; - -/** - * Create an agent CLI entrypoint. - * Parses argv (` `), runs the agent, extracts structured output, - * writes StepNode to CAS, and prints JSON result to stdout. - * - * If frontmatter extraction fails, retries up to MAX_FRONTMATTER_RETRIES times - * by calling agent.continue() with a correction message. - */ export function createAgent(options: AgentOptions): () => Promise { return async function main(): Promise { const { threadId, role } = parseArgv(process.argv); @@ -161,7 +148,6 @@ export function createAgent(options: AgentOptions): () => Promise { agentName: agentLabel(options.name), }); - const result: AgentCliOutput = { stepHash, sessionId: agentResult.sessionId }; - process.stdout.write(`${JSON.stringify(result)}\n`); + process.stdout.write(`${stepHash}\n`); }; } diff --git a/packages/workflow-protocol/src/types.ts b/packages/workflow-protocol/src/types.ts index ec43431..8960f0c 100644 --- a/packages/workflow-protocol/src/types.ts +++ b/packages/workflow-protocol/src/types.ts @@ -81,7 +81,6 @@ export type StepOutput = { thread: ThreadId; head: CasRef; done: boolean; - sessionId: string | null; }; /** uwf thread steps — single step entry */ From 67870392abd8e0d4352bee3d0b64490354ac826d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Fri, 22 May 2026 09:57:30 +0000 Subject: [PATCH 10/13] fix: dynamic frontmatter field extraction from role schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace hardcoded 5-field candidate with schema-driven extraction. Now reads outputSchema properties and picks matching fields from parsed frontmatter, supporting role-specific fields like plan, approved, success. Falls back to standard 5 fields when schema has no properties. Fixes #388 小橘 --- .../__tests__/frontmatter-fast-path.test.ts | 66 ++++++++ .../src/build-output-format-instruction.ts | 2 +- .../workflow-agent-kit/src/frontmatter.ts | 152 ++++++++++++++++-- 3 files changed, 210 insertions(+), 10 deletions(-) diff --git a/packages/workflow-agent-kit/__tests__/frontmatter-fast-path.test.ts b/packages/workflow-agent-kit/__tests__/frontmatter-fast-path.test.ts index c41920b..69df302 100644 --- a/packages/workflow-agent-kit/__tests__/frontmatter-fast-path.test.ts +++ b/packages/workflow-agent-kit/__tests__/frontmatter-fast-path.test.ts @@ -29,6 +29,27 @@ const STRICT_SCHEMA = { additionalProperties: false, }; +/** Role-specific schema (reviewer) — only approved, no standard agent fields. */ +const REVIEWER_SCHEMA = { + type: "object", + properties: { + approved: { type: "boolean" }, + }, + required: ["approved"], + additionalProperties: false, +}; + +/** Role-specific schema (planner) — custom status enum + plan hash. */ +const PLANNER_SCHEMA = { + type: "object", + properties: { + status: { type: "string", enum: ["ready", "insufficient_info"] }, + plan: { type: "string" }, + }, + required: ["status"], + additionalProperties: false, +}; + async function makeStoreWithSchema(schema: Record) { const store = createMemoryStore(); const schemaHash = await putSchema(store, schema); @@ -134,3 +155,48 @@ describe("tryFrontmatterFastPath — fallback: schema mismatch", () => { expect(result).toBeNull(); }); }); + +// ── Role-specific schema fields ─────────────────────────────────────────────── + +describe("tryFrontmatterFastPath — role-specific fields", () => { + test("extracts approved only for reviewer schema (no extra standard fields)", async () => { + const { store, schemaHash } = await makeStoreWithSchema(REVIEWER_SCHEMA); + + const raw = "---\napproved: true\n---\n\nReview passed."; + + const result = await tryFrontmatterFastPath(raw, schemaHash, store); + expect(result).not.toBeNull(); + + const node = store.get(result!.outputHash); + expect(node).not.toBeNull(); + const payload = node!.payload as Record; + expect(payload).toEqual({ approved: true }); + expect(payload.status).toBeUndefined(); + expect(payload.scope).toBeUndefined(); + }); + + test("extracts plan and role-specific status for planner schema", async () => { + const { store, schemaHash } = await makeStoreWithSchema(PLANNER_SCHEMA); + + const raw = "---\nstatus: ready\nplan: 01HASHPLANNER0001\n---\n\nSpec summary."; + + const result = await tryFrontmatterFastPath(raw, schemaHash, store); + expect(result).not.toBeNull(); + + const node = store.get(result!.outputHash); + expect(node).not.toBeNull(); + const payload = node!.payload as Record; + expect(payload.status).toBe("ready"); + expect(payload.plan).toBe("01HASHPLANNER0001"); + expect(payload.scope).toBeUndefined(); + }); + + test("returns null when required role-specific field is missing", async () => { + const { store, schemaHash } = await makeStoreWithSchema(REVIEWER_SCHEMA); + + const raw = "---\nstatus: done\nscope: role\n---\n\nBody."; + + const result = await tryFrontmatterFastPath(raw, schemaHash, store); + expect(result).toBeNull(); + }); +}); diff --git a/packages/workflow-agent-kit/src/build-output-format-instruction.ts b/packages/workflow-agent-kit/src/build-output-format-instruction.ts index 5ecb381..2f8e7cd 100644 --- a/packages/workflow-agent-kit/src/build-output-format-instruction.ts +++ b/packages/workflow-agent-kit/src/build-output-format-instruction.ts @@ -9,7 +9,7 @@ import type { JSONSchema } from "@uncaged/json-cas"; * * Returns an empty array for schemas with no inspectable property definitions. */ -function extractSchemaFields(schema: JSONSchema): string[] { +export function extractSchemaFields(schema: JSONSchema): string[] { if (typeof schema.properties === "object" && schema.properties !== null) { return Object.keys(schema.properties as Record); } diff --git a/packages/workflow-agent-kit/src/frontmatter.ts b/packages/workflow-agent-kit/src/frontmatter.ts index 49075fe..3e49666 100644 --- a/packages/workflow-agent-kit/src/frontmatter.ts +++ b/packages/workflow-agent-kit/src/frontmatter.ts @@ -1,13 +1,139 @@ import type { Store } from "@uncaged/json-cas"; -import { validate } from "@uncaged/json-cas"; +import { getSchema, validate } from "@uncaged/json-cas"; import type { CasRef } from "@uncaged/workflow-protocol"; -import { parseFrontmatterMarkdown, validateFrontmatter } from "@uncaged/workflow-util"; +import { + type AgentFrontmatter, + createLogger, + parseFrontmatterMarkdown, + validateFrontmatter, +} from "@uncaged/workflow-util"; +import { parse as parseYaml } from "yaml"; + +import { extractSchemaFields } from "./build-output-format-instruction.js"; + +const log = createLogger({ sink: { kind: "stderr" } }); + +const STANDARD_KEYS = ["status", "next", "confidence", "artifacts", "scope"] as const; + +type StandardKey = (typeof STANDARD_KEYS)[number]; export type FrontmatterFastPathResult = { body: string; outputHash: CasRef; }; +function extractYamlBlock(raw: string): string | null { + const fence = "---"; + if (!raw.startsWith(fence)) { + return null; + } + + const rest = raw.slice(fence.length); + if (rest.length > 0 && rest[0] !== "\n" && rest[0] !== "\r") { + return null; + } + + const afterOpen = rest.startsWith("\n") ? rest.slice(1) : rest; + const closeIndex = afterOpen.indexOf(`\n${fence}`); + if (closeIndex === -1) { + return null; + } + + return afterOpen.slice(0, closeIndex); +} + +function parseRawFrontmatterFields(raw: string): Record { + const yamlText = extractYamlBlock(raw); + if (yamlText === null) { + return {}; + } + + try { + const parsed = parseYaml(yamlText); + if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) { + return {}; + } + return parsed as Record; + } catch { + return {}; + } +} + +function defaultCandidate(frontmatter: AgentFrontmatter): Record { + return { + status: frontmatter.status, + next: frontmatter.next, + confidence: frontmatter.confidence, + artifacts: [...frontmatter.artifacts], + scope: frontmatter.scope, + }; +} + +function pickStandardField(frontmatter: AgentFrontmatter, key: StandardKey): unknown { + switch (key) { + case "status": + return frontmatter.status; + case "next": + return frontmatter.next; + case "confidence": + return frontmatter.confidence; + case "artifacts": + return [...frontmatter.artifacts]; + case "scope": + return frontmatter.scope; + } +} + +function isStandardKey(key: string): key is StandardKey { + return (STANDARD_KEYS as readonly string[]).includes(key); +} + +function pickFieldValue( + field: string, + frontmatter: AgentFrontmatter, + rawFields: Record, +): unknown | undefined { + if (!isStandardKey(field)) { + return Object.hasOwn(rawFields, field) ? rawFields[field] : undefined; + } + + const coerced = pickStandardField(frontmatter, field); + if (field === "artifacts" || field === "scope") { + return coerced; + } + if (coerced !== null) { + return coerced; + } + return Object.hasOwn(rawFields, field) ? rawFields[field] : coerced; +} + +/** + * Build a CAS candidate object from schema property keys and parsed frontmatter. + * + * When the schema has no inspectable properties, falls back to the five standard + * agent frontmatter fields for backward compatibility. + */ +function buildCandidate( + frontmatter: AgentFrontmatter, + rawFields: Record, + schemaFields: string[], +): Record { + if (schemaFields.length === 0) { + return defaultCandidate(frontmatter); + } + + const candidate: Record = {}; + + for (const field of schemaFields) { + const value = pickFieldValue(field, frontmatter, rawFields); + if (value !== undefined) { + candidate[field] = value; + } + } + + return candidate; +} + /** * Try to satisfy `outputSchema` from frontmatter fields alone. * @@ -32,16 +158,22 @@ export async function tryFrontmatterFastPath( const validationErrors = validateFrontmatter(frontmatter); if (validationErrors.length > 0) { + log( + "9GNPS4WY", + `frontmatter validation errors: ${validationErrors.map((e) => e.message).join("; ")}`, + ); return null; } - const candidate: Record = { - status: frontmatter.status, - next: frontmatter.next, - confidence: frontmatter.confidence, - artifacts: [...frontmatter.artifacts], - scope: frontmatter.scope, - }; + const schema = getSchema(store, outputSchema); + if (schema === null) { + log("8FHMR2QX", `output schema not found in CAS: ${outputSchema}`); + return null; + } + + const schemaFields = extractSchemaFields(schema); + const rawFields = parseRawFrontmatterFields(raw); + const candidate = buildCandidate(frontmatter, rawFields, schemaFields); let outputHash: CasRef; let node: ReturnType; @@ -50,10 +182,12 @@ export async function tryFrontmatterFastPath( outputHash = await store.put(outputSchema, candidate); node = store.get(outputHash); } catch { + log("2KMQT7NR", "failed to store frontmatter candidate in CAS"); return null; } if (node === null || !validate(store, node)) { + log("2KMQT7NR", "stored frontmatter candidate failed schema validation"); return null; } From 4d47effd39e206f56f8ceba7cd93f6904beed706 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Fri, 22 May 2026 10:03:31 +0000 Subject: [PATCH 11/13] fix: generate frontmatter instruction dynamically from role schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace hardcoded 5-field example with schema-driven generation. Now shows actual enum values, types, and required markers for each role's frontmatter schema. Fixes #389 小橘 --- .../build-output-format-instruction.test.ts | 69 ++++++- .../src/build-output-format-instruction.ts | 174 +++++++++++++++--- 2 files changed, 211 insertions(+), 32 deletions(-) diff --git a/packages/workflow-agent-kit/__tests__/build-output-format-instruction.test.ts b/packages/workflow-agent-kit/__tests__/build-output-format-instruction.test.ts index 31e4680..fad5030 100644 --- a/packages/workflow-agent-kit/__tests__/build-output-format-instruction.test.ts +++ b/packages/workflow-agent-kit/__tests__/build-output-format-instruction.test.ts @@ -2,13 +2,32 @@ import { describe, expect, test } from "vitest"; import { buildOutputFormatInstruction } from "../src/build-output-format-instruction.js"; +const PLANNER_SCHEMA = { + type: "object", + properties: { + status: { type: "string", enum: ["ready", "insufficient_info"] }, + plan: { type: "string" }, + }, + required: ["status"], + additionalProperties: false, +}; + +const REVIEWER_SCHEMA = { + type: "object", + properties: { + approved: { type: "boolean" }, + }, + required: ["approved"], + additionalProperties: false, +}; + describe("buildOutputFormatInstruction", () => { test("always includes the frontmatter example block", () => { const result = buildOutputFormatInstruction({}); expect(result).toContain("---"); - expect(result).toContain("status: done"); - expect(result).toContain("confidence:"); - expect(result).toContain("scope: role"); + expect(result).not.toContain("status: done"); + expect(result).not.toContain("confidence:"); + expect(result).not.toContain("scope: role"); }); test("always marks frontmatter as the primary deliverable", () => { @@ -16,17 +35,36 @@ describe("buildOutputFormatInstruction", () => { expect(result).toContain("primary deliverable"); }); - test("lists fields from a flat object schema", () => { + test("generates planner-specific YAML example from schema", () => { + const result = buildOutputFormatInstruction(PLANNER_SCHEMA); + expect(result).toContain("status: ready # required | ready | insufficient_info"); + expect(result).toContain("plan: "); + expect(result).not.toContain("status: done"); + expect(result).not.toContain("confidence:"); + expect(result).not.toContain("artifacts:"); + }); + + test("generates reviewer-specific YAML example from schema", () => { + const result = buildOutputFormatInstruction(REVIEWER_SCHEMA); + expect(result).toContain("approved: true # required | true | false"); + expect(result).not.toContain("status:"); + }); + + test("lists fields from a flat object schema with required marker", () => { const schema = { type: "object", properties: { status: { type: "string" }, confidence: { type: "number" }, }, + required: ["status"], }; const result = buildOutputFormatInstruction(schema); - expect(result).toContain("`status`"); + expect(result).toContain("`status` (required)"); expect(result).toContain("`confidence`"); + expect(result).not.toContain("`confidence` (required)"); + expect(result).toContain("status: # required"); + expect(result).toContain("confidence: "); }); test("lists union of fields from an anyOf schema", () => { @@ -45,6 +83,8 @@ describe("buildOutputFormatInstruction", () => { const result = buildOutputFormatInstruction(schema); expect(result).toContain("`alpha`"); expect(result).toContain("`beta`"); + expect(result).toContain("alpha: "); + expect(result).toContain("beta: "); }); test("lists union of fields from a oneOf schema", () => { @@ -63,6 +103,8 @@ describe("buildOutputFormatInstruction", () => { const result = buildOutputFormatInstruction(schema); expect(result).toContain("`foo`"); expect(result).toContain("`bar`"); + expect(result).toContain("foo: "); + expect(result).toContain("bar: true # true | false"); }); test("falls back gracefully for a non-object schema with no properties", () => { @@ -80,6 +122,23 @@ describe("buildOutputFormatInstruction", () => { const result = buildOutputFormatInstruction(schema); const matches = [...result.matchAll(/`shared`/g)]; expect(matches.length).toBe(1); + expect(result).toContain("shared: "); + }); + + test("marks required when any union variant requires the field", () => { + const schema = { + anyOf: [ + { + type: "object", + properties: { shared: { type: "string" } }, + required: ["shared"], + }, + { type: "object", properties: { shared: { type: "number" } } }, + ], + }; + const result = buildOutputFormatInstruction(schema); + expect(result).toContain("`shared` (required)"); + expect(result).toContain("shared: # required"); }); test("includes focus reminder about role scope", () => { diff --git a/packages/workflow-agent-kit/src/build-output-format-instruction.ts b/packages/workflow-agent-kit/src/build-output-format-instruction.ts index 2f8e7cd..52f4356 100644 --- a/packages/workflow-agent-kit/src/build-output-format-instruction.ts +++ b/packages/workflow-agent-kit/src/build-output-format-instruction.ts @@ -1,5 +1,11 @@ import type { JSONSchema } from "@uncaged/json-cas"; +type SchemaProperty = { + name: string; + schema: JSONSchema; + required: boolean; +}; + /** * Extract top-level property names from a JSON Schema object. * @@ -10,8 +16,43 @@ import type { JSONSchema } from "@uncaged/json-cas"; * Returns an empty array for schemas with no inspectable property definitions. */ export function extractSchemaFields(schema: JSONSchema): string[] { + return extractSchemaProperties(schema).map((p) => p.name); +} + +function extractSchemaProperties(schema: JSONSchema): SchemaProperty[] { + const objectSchemas = collectObjectSchemas(schema); + if (objectSchemas.length === 0) { + return []; + } + + const byName = new Map(); + + for (const objectSchema of objectSchemas) { + const requiredSet = new Set( + Array.isArray(objectSchema.required) ? (objectSchema.required as string[]) : [], + ); + const properties = objectSchema.properties as Record | null | undefined; + if (typeof properties !== "object" || properties === null) { + continue; + } + + for (const [name, propSchema] of Object.entries(properties)) { + const required = requiredSet.has(name); + const existing = byName.get(name); + if (existing === undefined) { + byName.set(name, { name, schema: propSchema, required }); + } else if (required) { + byName.set(name, { ...existing, required: true }); + } + } + } + + return [...byName.values()]; +} + +function collectObjectSchemas(schema: JSONSchema): JSONSchema[] { if (typeof schema.properties === "object" && schema.properties !== null) { - return Object.keys(schema.properties as Record); + return [schema]; } const unionKey = Array.isArray(schema.anyOf) @@ -20,18 +61,109 @@ export function extractSchemaFields(schema: JSONSchema): string[] { ? "oneOf" : null; - if (unionKey !== null) { - const variants = schema[unionKey] as JSONSchema[]; - const fieldSet = new Set(); - for (const variant of variants) { - for (const field of extractSchemaFields(variant)) { - fieldSet.add(field); - } - } - return [...fieldSet]; + if (unionKey === null) { + return []; } - return []; + const variants = schema[unionKey] as JSONSchema[]; + const result: JSONSchema[] = []; + for (const variant of variants) { + result.push(...collectObjectSchemas(variant)); + } + return result; +} + +function resolvePropertySchema(prop: JSONSchema): JSONSchema { + if (Array.isArray(prop.enum) && prop.enum.length > 0) { + return prop; + } + + const unionKey = Array.isArray(prop.anyOf) ? "anyOf" : Array.isArray(prop.oneOf) ? "oneOf" : null; + + if (unionKey !== null) { + const variants = prop[unionKey] as JSONSchema[]; + const nonNull = variants.filter((v) => v.type !== "null"); + if (nonNull.length === 1) { + return nonNull[0]; + } + } + + return prop; +} + +function formatYamlScalar(value: unknown): string { + if (typeof value === "boolean") { + return String(value); + } + if (typeof value === "number") { + return String(value); + } + return String(value); +} + +function buildPropertyComment(parts: string[]): string { + const filtered = parts.filter((p) => p.length > 0); + return filtered.length > 0 ? ` # ${filtered.join(" | ")}` : ""; +} + +function buildPropertyExampleLine(prop: SchemaProperty): string { + const resolved = resolvePropertySchema(prop.schema); + const commentParts: string[] = []; + if (prop.required) { + commentParts.push("required"); + } + + if (Array.isArray(resolved.enum) && resolved.enum.length > 0) { + const enumValues = resolved.enum.map((v) => String(v)); + commentParts.push(...enumValues); + const first = resolved.enum[0]; + return `${prop.name}: ${formatYamlScalar(first)}${buildPropertyComment(commentParts)}`; + } + + if (resolved.type === "boolean") { + commentParts.push("true", "false"); + return `${prop.name}: true${buildPropertyComment(commentParts)}`; + } + + if (resolved.type === "string") { + return `${prop.name}: ${buildPropertyComment(commentParts)}`; + } + + if (resolved.type === "number" || resolved.type === "integer") { + return `${prop.name}: ${buildPropertyComment(commentParts)}`; + } + + if (resolved.type === "array") { + return `${prop.name}:\n - ${buildPropertyComment(commentParts)}`; + } + + if (resolved.type === "object") { + return `${prop.name}: ${buildPropertyComment(commentParts)}`; + } + + return `${prop.name}: ${buildPropertyComment(commentParts)}`; +} + +function buildYamlExampleBlock(properties: SchemaProperty[]): string { + if (properties.length === 0) { + return "---\n\n... your markdown work here ..."; + } + + const lines = properties.map((p) => buildPropertyExampleLine(p)); + return `---\n${lines.join("\n")}\n---\n\n... your markdown work here ...`; +} + +function buildFieldList(properties: SchemaProperty[]): string { + if (properties.length === 0) { + return " (schema fields will be extracted automatically)"; + } + + return properties + .map((p) => { + const suffix = p.required ? " (required)" : ""; + return ` - \`${p.name}\`${suffix}`; + }) + .join("\n"); } /** @@ -42,28 +174,16 @@ export function extractSchemaFields(schema: JSONSchema): string[] { * system prompt so the deliverable format is the first thing the agent sees. */ export function buildOutputFormatInstruction(schema: JSONSchema): string { - const fields = extractSchemaFields(schema); - - const fieldList = - fields.length > 0 - ? fields.map((f) => ` - \`${f}\``).join("\n") - : " (schema fields will be extracted automatically)"; + const properties = extractSchemaProperties(schema); + const yamlExample = buildYamlExampleBlock(properties); + const fieldList = buildFieldList(properties); return `## Deliverable Format Your response MUST begin with a YAML frontmatter block followed by your markdown work: \`\`\` ---- -status: done # done | needs_input | in_progress | failed -next: # suggested next role, or omit -confidence: 0.9 # 0.0–1.0, your self-assessed confidence -artifacts: # list of file paths or CAS hashes you produced - - path/to/file.ts -scope: role # role | thread ---- - -... your markdown work here ... +${yamlExample} \`\`\` The frontmatter is the **primary deliverable** — the engine reads it directly. From 6d94be34a9e893c71f892ecbd01e6c8aa7f69a6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Fri, 22 May 2026 10:29:08 +0000 Subject: [PATCH 12/13] feat: validate model connectivity during uwf setup Send a test completion request after configuration to verify the model is reachable. If validation fails, warn the user and suggest trying a different model or checking their settings. Fixes #335 --- .plan/335-setup-validate.md | 83 ++++++++++ .../src/__tests__/setup-validate.test.ts | 150 ++++++++++++++++++ packages/cli-workflow/src/commands/setup.ts | 57 ++++++- 3 files changed, 288 insertions(+), 2 deletions(-) create mode 100644 .plan/335-setup-validate.md create mode 100644 packages/cli-workflow/src/__tests__/setup-validate.test.ts diff --git a/.plan/335-setup-validate.md b/.plan/335-setup-validate.md new file mode 100644 index 0000000..2a0c22d --- /dev/null +++ b/.plan/335-setup-validate.md @@ -0,0 +1,83 @@ +# Test Spec: uwf setup model connectivity validation (#335) + +## Context + +File: `packages/cli-workflow/src/commands/setup.ts` +Test file: `packages/cli-workflow/src/__tests__/setup-validate.test.ts` + +After `cmdSetup` writes config, it should send a test chat completion request to verify the configured model is reachable. If validation fails, warn the user (don't abort — config is already saved). + +## Implementation Notes + +- Add a `validateModel(baseUrl, apiKey, model)` function that sends a minimal chat completion request (`POST /chat/completions` with `messages: [{role:"user",content:"hi"}]`, `max_tokens: 1`) +- Returns `Result` — ok if 2xx response, error with reason string otherwise +- Use `AbortSignal.timeout(15_000)` for the request +- Both `cmdSetup` and `cmdSetupInteractive` should call it after saving config +- `cmdSetup` returns validation result in its return object: `{ ...existing, validation: { ok: true } | { ok: false, error: string } }` +- `cmdSetupInteractive` prints a warning to console if validation fails, success message if it passes +- Use the project logger (`createLogger`) — no raw `console.log` except in interactive CLI output (per CLAUDE.md) + +## Test Cases (vitest) + +### 1. `validateModel` — success path +- Mock `fetch` to return `{ status: 200, ok: true, json: () => ({}) }` +- Call `validateModel(baseUrl, apiKey, model)` +- Assert returns `{ ok: true, value: undefined }` +- Assert fetch was called with correct URL (`${baseUrl}/chat/completions`), correct headers (`Authorization: Bearer ${apiKey}`), correct body (model, messages, max_tokens: 1) + +### 2. `validateModel` — HTTP error (401 unauthorized) +- Mock `fetch` to return `{ status: 401, ok: false, statusText: "Unauthorized" }` +- Call `validateModel(baseUrl, apiKey, model)` +- Assert returns `{ ok: false, error: }` + +### 3. `validateModel` — HTTP error (404 model not found) +- Mock `fetch` to return `{ status: 404, ok: false, statusText: "Not Found" }` +- Assert returns `{ ok: false, error: }` + +### 4. `validateModel` — network timeout +- Mock `fetch` to throw `DOMException` with name `AbortError` +- Assert returns `{ ok: false, error: }` + +### 5. `validateModel` — network error (DNS failure, connection refused) +- Mock `fetch` to throw `TypeError("fetch failed")` +- Assert returns `{ ok: false, error: }` + +### 6. `cmdSetup` — includes validation result on success +- Mock global `fetch` for `/chat/completions` to succeed +- Call `cmdSetup({ provider, baseUrl, apiKey, model, storageRoot })` +- Assert returned object has `validation: { ok: true, value: undefined }` +- Assert config files are still written (existing behavior preserved) + +### 7. `cmdSetup` — includes validation result on failure (config still saved) +- Mock global `fetch` for `/chat/completions` to return 401 +- Call `cmdSetup({ ... })` +- Assert returned object has `validation: { ok: false, error: ... }` +- Assert `config.yaml` and `.env` are still written (validation failure doesn't prevent saving) + +### 8. `cmdSetupInteractive` — prints success message on validation pass +- Mock `fetch` for both `/models` and `/chat/completions` to succeed +- Mock stdin to provide valid selections +- Capture console output +- Assert output contains a success message like "Model verified" or "✓" + +### 9. `cmdSetupInteractive` — prints warning on validation failure +- Mock `fetch`: `/models` succeeds, `/chat/completions` returns 401 +- Mock stdin for valid selections +- Capture console output +- Assert output contains a warning about model not being reachable and suggests trying a different model + +### 10. `validateModel` — request body correctness +- Mock `fetch` to capture the request body +- Call `validateModel(baseUrl, apiKey, "test-model")` +- Assert body is `{ model: "test-model", messages: [{role: "user", content: "hi"}], max_tokens: 1 }` + +## Export Requirements + +- `validateModel` must be exported (for direct unit testing) +- Signature: `async function validateModel(baseUrl: string, apiKey: string, model: string): Promise>` +- `Result` type: `{ ok: true; value: T } | { ok: false; error: E }` (project convention) + +## Files to Create/Modify + +- **New**: `packages/cli-workflow/src/__tests__/setup-validate.test.ts` — all test cases above +- **Modify**: `packages/cli-workflow/src/commands/setup.ts` — add `validateModel`, integrate into `cmdSetup` and `cmdSetupInteractive` diff --git a/packages/cli-workflow/src/__tests__/setup-validate.test.ts b/packages/cli-workflow/src/__tests__/setup-validate.test.ts new file mode 100644 index 0000000..f96baa3 --- /dev/null +++ b/packages/cli-workflow/src/__tests__/setup-validate.test.ts @@ -0,0 +1,150 @@ +import { mkdtemp, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import { cmdSetup, validateModel } from "../commands/setup.js"; + +describe("validateModel", () => { + const BASE_URL = "https://api.example.com/v1"; + const API_KEY = "sk-test-key"; + const MODEL = "test-model"; + + afterEach(() => { + vi.restoreAllMocks(); + }); + + test("success path — returns ok on 200", async () => { + const mockFetch = vi + .spyOn(globalThis, "fetch") + .mockResolvedValue(new Response(JSON.stringify({}), { status: 200 })); + + const result = await validateModel(BASE_URL, API_KEY, MODEL); + + expect(result).toEqual({ ok: true, value: undefined }); + expect(mockFetch).toHaveBeenCalledOnce(); + + const [url, opts] = mockFetch.mock.calls[0]!; + expect(url).toBe(`${BASE_URL}/chat/completions`); + expect((opts as RequestInit).headers).toEqual( + expect.objectContaining({ Authorization: `Bearer ${API_KEY}` }), + ); + const body = JSON.parse((opts as RequestInit).body as string); + expect(body).toEqual({ + model: MODEL, + messages: [{ role: "user", content: "hi" }], + max_tokens: 1, + }); + }); + + test("HTTP 401 — returns error containing 401", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response("Unauthorized", { status: 401, statusText: "Unauthorized" }), + ); + + const result = await validateModel(BASE_URL, API_KEY, MODEL); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error).toContain("401"); + } + }); + + test("HTTP 404 — returns error containing 404", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response("Not Found", { status: 404, statusText: "Not Found" }), + ); + + const result = await validateModel(BASE_URL, API_KEY, MODEL); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error).toContain("404"); + } + }); + + test("network timeout — returns error mentioning timeout", async () => { + const err = new DOMException("signal timed out", "AbortError"); + vi.spyOn(globalThis, "fetch").mockRejectedValue(err); + + const result = await validateModel(BASE_URL, API_KEY, MODEL); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.toLowerCase()).toMatch(/timeout|timed out/); + } + }); + + test("network error (DNS/connection) — returns error mentioning connectivity", async () => { + vi.spyOn(globalThis, "fetch").mockRejectedValue(new TypeError("fetch failed")); + + const result = await validateModel(BASE_URL, API_KEY, MODEL); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error.toLowerCase()).toMatch(/connect|reach|network/); + } + }); + + test("request body correctness", async () => { + const mockFetch = vi + .spyOn(globalThis, "fetch") + .mockResolvedValue(new Response(JSON.stringify({}), { status: 200 })); + + await validateModel(BASE_URL, API_KEY, "my-special-model"); + + const body = JSON.parse((mockFetch.mock.calls[0]![1] as RequestInit).body as string); + expect(body).toEqual({ + model: "my-special-model", + messages: [{ role: "user", content: "hi" }], + max_tokens: 1, + }); + }); +}); + +describe("cmdSetup with validation", () => { + let storageRoot: string; + + beforeEach(async () => { + storageRoot = await mkdtemp(join(tmpdir(), "uwf-setup-validate-")); + }); + + afterEach(async () => { + vi.restoreAllMocks(); + await rm(storageRoot, { recursive: true, force: true }); + }); + + const setupArgs = () => ({ + provider: "testprovider", + baseUrl: "https://api.test.com/v1", + apiKey: "sk-test", + model: "test-model", + storageRoot, + }); + + test("includes validation result on success", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response(JSON.stringify({}), { status: 200 }), + ); + + const result = await cmdSetup(setupArgs()); + + expect(result.validation).toEqual({ ok: true, value: undefined }); + // Config files should still be written + expect(result.configPath).toBeTruthy(); + expect(result.envPath).toBeTruthy(); + }); + + test("includes validation failure — config still saved", async () => { + vi.spyOn(globalThis, "fetch").mockResolvedValue( + new Response("Unauthorized", { status: 401, statusText: "Unauthorized" }), + ); + + const result = await cmdSetup(setupArgs()); + + expect(result.validation).toBeDefined(); + expect((result.validation as { ok: boolean }).ok).toBe(false); + // Config files should still be written despite validation failure + expect(result.configPath).toBeTruthy(); + expect(result.envPath).toBeTruthy(); + }); +}); diff --git a/packages/cli-workflow/src/commands/setup.ts b/packages/cli-workflow/src/commands/setup.ts index 0eeab20..aa4ef61 100644 --- a/packages/cli-workflow/src/commands/setup.ts +++ b/packages/cli-workflow/src/commands/setup.ts @@ -2,9 +2,45 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; import { join } from "node:path"; import { stdin as input, stdout as output } from "node:process"; import { createInterface } from "node:readline/promises"; - +import type { Result } from "@uncaged/workflow-util"; import { parse, stringify } from "yaml"; +/** + * Send a minimal chat completion request to verify the model is reachable. + * Returns ok on 2xx, error with reason string otherwise. + */ +export async function validateModel( + baseUrl: string, + apiKey: string, + model: string, +): Promise> { + try { + const url = `${baseUrl.replace(/\/+$/, "")}/chat/completions`; + const res = await fetch(url, { + method: "POST", + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model, + messages: [{ role: "user", content: "hi" }], + max_tokens: 1, + }), + signal: AbortSignal.timeout(15_000), + }); + if (!res.ok) { + return { ok: false, error: `HTTP ${res.status} ${res.statusText}` }; + } + return { ok: true, value: undefined }; + } catch (err: unknown) { + if (err instanceof DOMException && err.name === "AbortError") { + return { ok: false, error: "Request timed out — model endpoint unreachable" }; + } + return { ok: false, error: `Network error — could not reach endpoint (${String(err)})` }; + } +} + /** * Preset provider list — embedded to avoid runtime YAML loading dependency. * Keep in sync with providers.yaml in cli-workflow. @@ -163,12 +199,16 @@ export async function cmdSetup(args: SetupArgs): Promise envData[envName] = args.apiKey; saveEnvFile(envPath, envData); + // Validate model connectivity + const validation = await validateModel(args.baseUrl, args.apiKey, args.model); + return { configPath, envPath, provider: args.provider, model: args.model, defaultAgent: merged.defaultAgent, + validation, }; } @@ -328,7 +368,7 @@ export async function cmdSetupInteractive(storageRoot: string): Promise Register a workflow"); console.log(' uwf thread start -p "..." Start a thread'); From 76fab228278678b3a702ffa3d80daee1d51eb017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Fri, 22 May 2026 10:49:04 +0000 Subject: [PATCH 13/13] fix: explicitly forbid extra frontmatter fields in output format instruction buildOutputFormatInstruction now includes explicit language telling agents to output ONLY schema-defined fields and to focus on their role's deliverable. Fixes #394 --- .../build-output-format-instruction.test.ts | 22 +++++++++++++++++++ .../src/build-output-format-instruction.ts | 2 ++ 2 files changed, 24 insertions(+) diff --git a/packages/workflow-agent-kit/__tests__/build-output-format-instruction.test.ts b/packages/workflow-agent-kit/__tests__/build-output-format-instruction.test.ts index fad5030..c6b824c 100644 --- a/packages/workflow-agent-kit/__tests__/build-output-format-instruction.test.ts +++ b/packages/workflow-agent-kit/__tests__/build-output-format-instruction.test.ts @@ -141,6 +141,28 @@ describe("buildOutputFormatInstruction", () => { expect(result).toContain("shared: # required"); }); + test("explicitly forbids extra frontmatter fields", () => { + const result = buildOutputFormatInstruction(PLANNER_SCHEMA); + expect(result).toMatch(/\b(only|exclusively)\b.*fields/i); + expect(result).toMatch(/do not add (extra|additional|other) fields/i); + }); + + test("forbids extra fields even for empty schema", () => { + const result = buildOutputFormatInstruction({}); + expect(result).toMatch(/do not add (extra|additional|other) fields/i); + }); + + test("forbids extra fields for anyOf/oneOf schemas", () => { + const schema = { + anyOf: [ + { type: "object", properties: { alpha: { type: "string" } } }, + { type: "object", properties: { beta: { type: "number" } } }, + ], + }; + const result = buildOutputFormatInstruction(schema); + expect(result).toMatch(/do not add (extra|additional|other) fields/i); + }); + test("includes focus reminder about role scope", () => { const result = buildOutputFormatInstruction({}); expect(result).toContain("Focus exclusively on YOUR role"); diff --git a/packages/workflow-agent-kit/src/build-output-format-instruction.ts b/packages/workflow-agent-kit/src/build-output-format-instruction.ts index 52f4356..e286c6c 100644 --- a/packages/workflow-agent-kit/src/build-output-format-instruction.ts +++ b/packages/workflow-agent-kit/src/build-output-format-instruction.ts @@ -191,5 +191,7 @@ Your meta output must satisfy these fields: ${fieldList} +Output ONLY the fields listed above. Do not add extra fields that are not specified in the schema. + Focus exclusively on YOUR role's deliverable. Do not perform actions outside your role's scope.`; }