Compare commits

...

9 Commits

Author SHA1 Message Date
xiaoju 487c48effa fix: revert output protocol changes from #385
Agent CLI outputs plain CAS hash (not JSON), engine parses plain hash.
StepOutput no longer carries sessionId — session info is already in CAS detail.
Keeps the valuable parts of #385: sessionId in AgentRunResult (process-internal),
continue support, and frontmatter retry loop.
2026-05-22 09:39:36 +00:00
xiaomo 4eca2d533c Merge pull request 'feat: agent session protocol — sessionId, continue, frontmatter retry' (#385) from feat/384-agent-session-protocol into main 2026-05-22 09:20:35 +00:00
xiaoju f0f840e6e0 fix: StepOutput.sessionId → string | null, legacy fallback → null 2026-05-22 09:16:13 +00:00
xiaoju 7ff90cef4f feat: agent session protocol — sessionId in result, continue support, frontmatter retry
Breaking changes:
- AgentRunResult now requires sessionId field
- AgentOptions now requires continue function
- Agent CLI outputs JSON {stepHash, sessionId} instead of plain CAS hash
- Engine parses JSON output (with legacy CAS hash fallback)

New features:
- Frontmatter validation retry: if agent output lacks valid frontmatter,
  engine calls agent.continue() up to 2 times with correction message
- Session tracking: sessionId flows from agent → engine → StepOutput
- Hermes agent: session parse failure is now a hard error (no raw text fallback)
- Hermes agent: supports --resume for continue sessions

Closes #384
2026-05-22 09:13:05 +00:00
xiaoju e62d51d845 Merge remote-tracking branch 'origin/feat/remove-llm-extract' into feat/384-agent-session-protocol 2026-05-22 09:06:24 +00:00
xiaoju a803fcb4fc fix: solve-issue.yaml meta.plan → frontmatter.plan
Follows #375 rename.
2026-05-22 09:04:34 +00:00
xiaomo d00c93fc19 Merge pull request 'feat: uwf cas put-text for storing plain text in CAS' (#382) from feat/cas-put-text into main 2026-05-22 09:02:09 +00:00
xiaoju 99a2890be2 feat: remove LLM extract fallback, require YAML frontmatter
Agent output must contain valid YAML frontmatter matching the role schema.
If frontmatter parsing fails, the step fails immediately with a clear error
instead of falling back to an LLM extraction that can fabricate values.

The extract module remains as a public API export but is no longer used
in the agent run loop.

Breaking change: agents that relied on LLM extraction to produce valid
output will now fail. They must output proper frontmatter.
2026-05-22 08:58:01 +00:00
xiaoju 3b7d0564bb feat: uwf cas put-text for storing plain text in CAS
- Register built-in text schema ({type: 'string'}) alongside workflow schemas
- Add cmdCasPutText command: uwf cas put-text <text>
- Update CLI reference in workflow-util
- Update solve-issue.yaml procedure to use put-text

Refs #380
2026-05-22 08:53:27 +00:00
9 changed files with 146 additions and 64 deletions
+2 -2
View File
@@ -20,8 +20,8 @@ roles:
2. Revise the test spec accordingly
After producing the test spec:
1. Store it via `uwf cas put "<markdown content>"` and capture the returned hash
2. Put the hash in meta.plan (required when status=ready)
1. Store it via `uwf cas put-text "<markdown content>"` and capture the returned hash
2. Put the hash in frontmatter.plan (required when status=ready)
output: "Output a brief summary of the test spec. Frontmatter must include: status (ready or insufficient_info) and plan (CAS hash of the test spec, required when status=ready)."
frontmatter:
type: object
+12
View File
@@ -7,6 +7,7 @@ import {
cmdCasGet,
cmdCasHas,
cmdCasPut,
cmdCasPutText,
cmdCasRefs,
cmdCasReindex,
cmdCasSchemaGet,
@@ -295,6 +296,17 @@ cas
});
});
cas
.command("put-text")
.description("Store a plain text string, print its hash")
.argument("<text>", "Text content to store")
.action((text: string) => {
const storageRoot = resolveStorageRoot();
runAction(async () => {
writeOutput(await cmdCasPutText(storageRoot, text));
});
});
cas
.command("has")
.description("Check if a hash exists")
+10 -1
View File
@@ -2,9 +2,11 @@ import { readFileSync } from "node:fs";
import { join } from "node:path";
import type { JSONSchema, Store } from "@uncaged/json-cas";
import { bootstrap, getSchema, refs, walk } from "@uncaged/json-cas";
import { bootstrap, getSchema, putSchema, refs, walk } from "@uncaged/json-cas";
import { createFsStore } from "@uncaged/json-cas-fs";
import { TEXT_SCHEMA } from "../schemas.js";
// ---- Helpers ----
function openStore(storageRoot: string): Store {
@@ -121,3 +123,10 @@ export async function cmdCasSchemaGet(storageRoot: string, hash: string): Promis
}
return schema;
}
export async function cmdCasPutText(storageRoot: string, text: string): Promise<{ hash: string }> {
const store = openStore(storageRoot);
const typeHash = await putSchema(store, TEXT_SCHEMA);
const hash = await store.put(typeHash, text);
return { hash };
}
+6 -2
View File
@@ -2,10 +2,13 @@ import type { Hash, Store } from "@uncaged/json-cas";
import { putSchema } from "@uncaged/json-cas";
import { START_NODE_SCHEMA, STEP_NODE_SCHEMA, WORKFLOW_SCHEMA } from "@uncaged/workflow-protocol";
export const TEXT_SCHEMA = { type: "string" as const };
export type UwfSchemaHashes = {
workflow: Hash;
startNode: Hash;
stepNode: Hash;
text: Hash;
};
/**
@@ -13,10 +16,11 @@ export type UwfSchemaHashes = {
* Idempotent: safe to call on every CLI invocation.
*/
export async function registerUwfSchemas(store: Store): Promise<UwfSchemaHashes> {
const [workflow, startNode, stepNode] = await Promise.all([
const [workflow, startNode, stepNode, text] = await Promise.all([
putSchema(store, WORKFLOW_SCHEMA),
putSchema(store, START_NODE_SCHEMA),
putSchema(store, STEP_NODE_SCHEMA),
putSchema(store, TEXT_SCHEMA),
]);
return { workflow, startNode, stepNode };
return { workflow, startNode, stepNode, text };
}
+66 -24
View File
@@ -1,4 +1,5 @@
import { spawn } from "node:child_process";
import type { Store } from "@uncaged/json-cas";
import {
type AgentContext,
@@ -10,7 +11,6 @@ import {
import {
loadHermesSession,
parseSessionIdFromStdout,
storeHermesRawOutput,
storeHermesSessionDetail,
} from "./session-detail.js";
@@ -52,17 +52,8 @@ export function buildHermesPrompt(ctx: AgentContext): string {
return parts.join("\n");
}
function spawnHermesChat(prompt: string): Promise<{ stdout: string; stderr: string }> {
function spawnHermes(args: string[]): Promise<{ stdout: string; stderr: string }> {
return new Promise((resolve, reject) => {
const args = [
"chat",
"-q",
prompt,
"--yolo",
"--max-turns",
String(HERMES_MAX_TURNS),
"--quiet",
];
const child = spawn(HERMES_COMMAND, args, {
env: process.env,
shell: false,
@@ -94,23 +85,73 @@ function spawnHermesChat(prompt: string): Promise<{ stdout: string; stderr: stri
});
}
function spawnHermesChat(prompt: string): Promise<{ stdout: string; stderr: string }> {
return spawnHermes([
"chat",
"-q",
prompt,
"--yolo",
"--max-turns",
String(HERMES_MAX_TURNS),
"--quiet",
]);
}
function spawnHermesResume(
sessionId: string,
message: string,
): Promise<{ stdout: string; stderr: string }> {
return spawnHermes([
"chat",
"--resume",
sessionId,
"-q",
message,
"--yolo",
"--max-turns",
String(HERMES_MAX_TURNS),
"--quiet",
]);
}
function parseSessionId(stdout: string, stderr: string): string {
const sessionId = parseSessionIdFromStdout(stderr) ?? parseSessionIdFromStdout(stdout);
if (sessionId === null) {
throw new Error(
"Failed to parse session_id from hermes output.\n" +
`stderr (first 200 chars): ${stderr.slice(0, 200)}\n` +
`stdout (first 200 chars): ${stdout.slice(0, 200)}`,
);
}
return sessionId;
}
async function buildResultFromSession(sessionId: string, store: Store): Promise<AgentRunResult> {
const session = await loadHermesSession(sessionId);
if (session === null) {
throw new Error(`Failed to load hermes session file for session_id: ${sessionId}`);
}
const { detailHash, output } = await storeHermesSessionDetail(store, session);
return { output, detailHash, sessionId };
}
async function runHermes(ctx: AgentContext): Promise<AgentRunResult> {
const fullPrompt = buildHermesPrompt(ctx);
const { stdout, stderr } = await spawnHermesChat(fullPrompt);
const { store } = ctx;
const sessionId = parseSessionId(stdout, stderr);
return buildResultFromSession(sessionId, ctx.store);
}
// --quiet mode: session_id may be on stdout or stderr
const sessionId = parseSessionIdFromStdout(stderr) ?? parseSessionIdFromStdout(stdout);
if (sessionId !== null) {
const session = await loadHermesSession(sessionId);
if (session !== null) {
const { detailHash, output } = await storeHermesSessionDetail(store, session);
return { output, detailHash };
}
}
const detailHash = await storeHermesRawOutput(store, stdout);
return { output: stdout, detailHash };
async function continueHermes(
sessionId: string,
message: string,
store: Store,
): Promise<AgentRunResult> {
const { stdout, stderr } = await spawnHermesResume(sessionId, message);
// Resume may return a new session_id
const newSessionId = parseSessionIdFromStdout(stderr) ?? parseSessionIdFromStdout(stdout);
const resolvedId = newSessionId ?? sessionId;
return buildResultFromSession(resolvedId, store);
}
/** Agent CLI factory: parses argv, runs Hermes, extracts output, writes StepNode. */
@@ -118,5 +159,6 @@ export function createHermesAgent(): () => Promise<void> {
return createAgent({
name: "hermes",
run: runHermes,
continue: continueHermes,
});
}
+7 -1
View File
@@ -12,4 +12,10 @@ export type { FrontmatterFastPathResult } from "./frontmatter.js";
export { tryFrontmatterFastPath } from "./frontmatter.js";
export { createAgent } from "./run.js";
export { getConfigPath, getEnvPath, loadWorkflowConfig } from "./storage.js";
export type { AgentContext, AgentOptions, AgentRunFn, AgentRunResult } from "./types.js";
export type {
AgentContext,
AgentContinueFn,
AgentOptions,
AgentRunFn,
AgentRunResult,
} from "./types.js";
+33 -34
View File
@@ -3,11 +3,12 @@ import type { CasRef, StepNodePayload, ThreadId } from "@uncaged/workflow-protoc
import { config as loadDotenv } from "dotenv";
import { buildOutputFormatInstruction } from "./build-output-format-instruction.js";
import { buildContextWithMeta } from "./context.js";
import { extract } from "./extract.js";
import { tryFrontmatterFastPath } from "./frontmatter.js";
import type { AgentStore } from "./storage.js";
import { getEnvPath, loadWorkflowConfig, resolveStorageRoot } from "./storage.js";
import type { AgentContext, AgentOptions, AgentRunResult } from "./types.js";
import { getEnvPath, resolveStorageRoot } from "./storage.js";
import type { AgentOptions } from "./types.js";
const MAX_FRONTMATTER_RETRIES = 2;
function fail(message: string): never {
process.stderr.write(`${message}\n`);
@@ -66,31 +67,16 @@ async function writeStepNode(options: {
return hash;
}
async function runAgent(options: AgentOptions, ctx: AgentContext): Promise<AgentRunResult> {
return runWithMessage("agent run failed", () => options.run(ctx));
}
async function extractOutput(
async function tryExtractOutput(
rawOutput: string,
outputSchema: CasRef,
storageRoot: string,
ctx: Awaited<ReturnType<typeof buildContextWithMeta>>,
): Promise<CasRef> {
const fastPath = await runWithMessage("frontmatter fast path", () =>
tryFrontmatterFastPath(rawOutput, outputSchema, ctx.meta.store),
).catch(() => null);
): Promise<CasRef | null> {
const fastPath = await tryFrontmatterFastPath(rawOutput, outputSchema, ctx.meta.store);
if (fastPath !== null) {
return fastPath.outputHash;
}
const config = await runWithMessage("failed to load config", () =>
loadWorkflowConfig(storageRoot),
);
const extracted = await runWithMessage("extract failed", () =>
extract(rawOutput, outputSchema, config),
);
return extracted.hash;
return null;
}
async function persistStep(options: {
@@ -112,11 +98,6 @@ async function persistStep(options: {
});
}
/**
* Create an agent CLI entrypoint.
* Parses argv (`<thread-id> <role>`), runs the agent, extracts structured output,
* writes StepNode to CAS, and prints the new node hash to stdout.
*/
export function createAgent(options: AgentOptions): () => Promise<void> {
return async function main(): Promise<void> {
const { threadId, role } = parseArgv(process.argv);
@@ -135,13 +116,31 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
ctx.outputFormatInstruction = buildOutputFormatInstruction(frontmatterSchema);
}
const agentResult = await runAgent(options, ctx);
const outputHash = await extractOutput(
agentResult.output,
roleDef.frontmatter,
storageRoot,
ctx,
);
let agentResult = await runWithMessage("agent run failed", () => options.run(ctx));
// Try to extract frontmatter; retry via continue if it fails
let outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
for (let retry = 0; retry < MAX_FRONTMATTER_RETRIES && outputHash === null; retry++) {
const correctionMessage =
"Your previous response did not contain valid YAML frontmatter matching the role schema.\n" +
"You MUST begin your response with a YAML frontmatter block (--- delimited).\n" +
"Please output ONLY the corrected frontmatter block followed by your work.";
agentResult = await runWithMessage("agent continue failed", () =>
options.continue(agentResult.sessionId, correctionMessage, ctx.meta.store),
);
outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
}
if (outputHash === null) {
fail(
"Agent output does not contain valid YAML frontmatter matching the role schema " +
`after ${MAX_FRONTMATTER_RETRIES} retries.\n` +
`Raw output (first 500 chars): ${agentResult.output.slice(0, 500)}`,
);
}
const stepHash = await persistStep({
ctx,
outputHash,
+8
View File
@@ -17,11 +17,19 @@ export type AgentContext = ModeratorContext & {
export type AgentRunResult = {
output: string;
detailHash: string;
sessionId: string;
};
export type AgentContinueFn = (
sessionId: string,
message: string,
store: AgentContext["store"],
) => Promise<AgentRunResult>;
export type AgentRunFn = (ctx: AgentContext) => Promise<AgentRunResult>;
export type AgentOptions = {
name: string;
run: AgentRunFn;
continue: AgentContinueFn;
};
@@ -46,6 +46,8 @@ uwf cas get <hash> # read a CAS node (type + payload)
[--timestamp] # include timestamp in output
uwf cas put <type-hash> <data> # store a node, print its hash
# <data>: JSON file path or inline JSON string
uwf cas put-text <text> # store a plain text string, print its hash
# shortcut for put with the built-in text schema
uwf cas has <hash> # check if a hash exists
uwf cas refs <hash> # list direct CAS references from a node
uwf cas walk <hash> # recursive traversal from a node