feat(step-read): store assembled prompt in CAS, add --prompt flag
CI / check (push) Failing after 1m6s

Store the fully assembled prompt sent to each agent in CAS as a text
node, referenced from StepNodePayload.assembledPrompt. This enables
exact reproduction of what the agent received for debugging hallucinations.

Changes:
- workflow-protocol: StepRecord + STEP_NODE_SCHEMA add assembledPrompt field
- workflow-util-agent: AgentRunResult includes assembledPrompt, run.ts stores it
- workflow-util-agent: schemas register TEXT_SCHEMA for prompt storage
- workflow-agent-claude-code: return assembled prompt from buildClaudeCodePrompt
- workflow-agent-hermes: return assembled prompt from buildHermesPrompt
- workflow-agent-builtin: return empty prompt (no prompt assembly)
- cli-workflow: step read --prompt renders the stored prompt
- All test fixtures updated for new field

Legacy steps without assembledPrompt show 'Prompt not recorded' message.

小橘 🍊
This commit is contained in:
2026-05-29 01:42:43 +00:00
parent 7612c97ae7
commit d310d43ab8
19 changed files with 94 additions and 21 deletions
+2 -1
View File
@@ -28,7 +28,8 @@
"@uncaged/workflow-agent-hermes": "workspace:*",
"bun-types": "^1.3.13",
"typescript": "^5.8.3",
"vitest": "^4.1.7"
"vitest": "^4.1.7",
"yaml": "^2.9.0"
},
"repository": {
"type": "git",
@@ -146,10 +146,11 @@ describe("step read", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
// Read step with large quota
const markdown = await cmdStepRead(tmpDir, stepHash, 10000);
const markdown = await cmdStepRead(tmpDir, stepHash, 10000, false);
// Assert structure
expect(markdown).toContain(`# Step ${stepHash}`);
@@ -231,10 +232,11 @@ describe("step read", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
// Read step with limited quota (700 chars)
const markdown = await cmdStepRead(tmpDir, stepHash, 700);
const markdown = await cmdStepRead(tmpDir, stepHash, 700, false);
// Assert only most recent turns fit
expect(markdown).toContain(`# Step ${stepHash}`);
@@ -310,10 +312,11 @@ describe("step read", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
// Read step with minimal quota (1 char)
const markdown = await cmdStepRead(tmpDir, stepHash, 1);
const markdown = await cmdStepRead(tmpDir, stepHash, 1, false);
// Assert at least one turn is always shown
expect(markdown).toContain("LongTurn");
@@ -365,10 +368,11 @@ describe("step read", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
// Read step - should return metadata only (no error)
const markdown = await cmdStepRead(tmpDir, stepHash, 4000);
const markdown = await cmdStepRead(tmpDir, stepHash, 4000, false);
// Assert metadata is present
expect(markdown).toContain(`# Step ${stepHash}`);
@@ -441,10 +445,11 @@ describe("step read", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
// Read step - should return metadata only (no error)
const markdown = await cmdStepRead(tmpDir, stepHash, 4000);
const markdown = await cmdStepRead(tmpDir, stepHash, 4000, false);
// Assert metadata is present
expect(markdown).toContain(`# Step ${stepHash}`);
@@ -515,9 +520,10 @@ describe("step read", () => {
agent: "uwf-hermes",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const markdown = await cmdStepRead(tmpDir, stepHash, 4000);
const markdown = await cmdStepRead(tmpDir, stepHash, 4000, false);
expect(markdown).toContain("**Turn role:** assistant");
expect(markdown).toContain("**terminal**");
@@ -588,10 +594,11 @@ describe("step read", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
// Read step
const markdown = await cmdStepRead(tmpDir, stepHash, 4000);
const markdown = await cmdStepRead(tmpDir, stepHash, 4000, false);
// Assert content is rendered correctly without corruption
expect(markdown).toContain("`backticks`");
@@ -116,6 +116,7 @@ async function createTestStep(
edgePrompt: "",
startedAtMs: Date.now(),
completedAtMs: Date.now() + 1000,
assembledPrompt: null,
cwd: "/tmp",
};
return store.put(schemas.stepNode, stepPayload);
@@ -85,6 +85,7 @@ describe("protocol types", () => {
edgePrompt: "",
startedAtMs: 1000,
completedAtMs: 2000,
assembledPrompt: null,
cwd: "/test/path",
};
expect(record.startedAtMs).toBe(1000);
@@ -153,6 +154,7 @@ describe("StepNode JSON schema", () => {
edgePrompt: "",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
expect(hash).toBeTruthy();
});
@@ -143,6 +143,7 @@ describe("thread read --quota flag", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
steps.push(stepHash);
}
@@ -225,6 +226,7 @@ describe("thread read --quota flag", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const step2Content = generateContent(600, "Second");
@@ -251,6 +253,7 @@ describe("thread read --quota flag", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const threadId = "01HX2Q3R4S5T6V7W8X9YZ1" as ThreadId;
@@ -336,6 +339,7 @@ describe("thread read --quota flag", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
steps.push(stepHash);
}
@@ -415,6 +419,7 @@ describe("thread read --quota flag", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const threadId = "01HX2Q3R4S5T6V7W8X9YZ4" as ThreadId;
@@ -492,6 +497,7 @@ describe("thread read --quota flag", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
steps.push(stepHash);
}
@@ -573,6 +579,7 @@ describe("thread read --quota flag", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
steps.push(stepHash);
}
@@ -141,6 +141,7 @@ describe("thread read XML tag isolation", () => {
agent: "uwf-claude-code",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const threadId = "01JTEST0000000000000001" as ThreadId;
@@ -218,6 +219,7 @@ describe("thread read XML tag isolation", () => {
agent: "uwf-claude-code",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const threadId = "01JTEST0000000000000002" as ThreadId;
@@ -280,6 +282,7 @@ describe("thread read XML tag isolation", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const step2 = await uwf.store.put(uwf.schemas.stepNode, {
@@ -291,6 +294,7 @@ describe("thread read XML tag isolation", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const threadId = "01JTEST0000000000000003" as ThreadId;
@@ -345,6 +349,7 @@ describe("thread read XML tag isolation", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const threadId = "01JTEST0000000000000004" as ThreadId;
@@ -399,6 +404,7 @@ describe("thread read XML tag isolation", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const threadId = "01JTEST0000000000000005" as ThreadId;
@@ -453,6 +459,7 @@ describe("thread read XML tag isolation", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const threadId = "01JTEST0000000000000006" as ThreadId;
@@ -527,6 +534,7 @@ describe("thread read XML tag isolation", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const step2 = await uwf.store.put(uwf.schemas.stepNode, {
@@ -538,6 +546,7 @@ describe("thread read XML tag isolation", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const step3 = await uwf.store.put(uwf.schemas.stepNode, {
@@ -549,6 +558,7 @@ describe("thread read XML tag isolation", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const threadId = "01JTEST0000000000000007" as ThreadId;
@@ -629,6 +639,7 @@ describe("thread read XML tag isolation", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
});
const threadId = "01JTEST0000000000000008" as ThreadId;
@@ -685,6 +696,7 @@ describe("thread read XML tag isolation", () => {
agent: "uwf-test",
startedAtMs: 1000000000000,
completedAtMs: 1000000005000,
assembledPrompt: null,
})) as CasRef;
steps.push(step);
prev = step;
+3 -2
View File
@@ -364,7 +364,8 @@ step
.description("Read a step's turns as human-readable markdown")
.argument("<step-hash>", "CAS hash of the StepNode")
.option("--quota <chars>", "Max output characters", "4000")
.action((stepHash: string, opts: { quota: string }) => {
.option("--prompt", "Show the assembled prompt sent to the agent instead of turns")
.action((stepHash: string, opts: { quota: string; prompt: boolean }) => {
const storageRoot = resolveStorageRoot();
runAction(async () => {
const quota = Number.parseInt(opts.quota, 10);
@@ -372,7 +373,7 @@ step
process.stderr.write("invalid --quota: must be a positive integer\n");
process.exit(1);
}
const markdown = await cmdStepRead(storageRoot, stepHash as CasRef, quota);
const markdown = await cmdStepRead(storageRoot, stepHash as CasRef, quota, opts.prompt === true);
process.stdout.write(markdown.endsWith("\n") ? markdown : `${markdown}\n`);
});
});
@@ -289,6 +289,7 @@ export async function cmdStepRead(
storageRoot: string,
stepHash: CasRef,
quota: number,
showPrompt: boolean,
): Promise<string> {
const uwf = await createUwfStore(storageRoot);
const node = uwf.store.get(stepHash);
@@ -300,6 +301,20 @@ export async function cmdStepRead(
}
const payload = node.payload as StepNodePayload;
// --prompt mode: show the assembled prompt that was sent to the agent
if (showPrompt) {
const promptRef = (payload as Record<string, unknown>).assembledPrompt;
if (typeof promptRef !== "string") {
return `# Step ${stepHash}\n\n_Prompt not recorded (legacy step)._`;
}
const promptNode = uwf.store.get(promptRef as CasRef);
if (promptNode === null) {
return `# Step ${stepHash}\n\n_Prompt CAS node not found: ${promptRef}_`;
}
const promptText = typeof promptNode.payload === "string" ? promptNode.payload : JSON.stringify(promptNode.payload);
return `# Step ${stepHash}\n\n**Role:** ${payload.role}\n**Agent:** ${payload.agent}\n\n## Prompt\n\n${promptText}`;
}
if (payload.detail === null) {
return formatStepMarkdown(stepHash, payload.role, payload.agent, [], []);
}
+2 -2
View File
@@ -82,7 +82,7 @@ async function runBuiltinWithMessages(
if (loopResult.turnCount === 0) {
log("5RWTK9NB", "no turns produced, returning empty output");
return { output: "", detailHash: "", sessionId: session.sessionId };
return { output: "", detailHash: "", sessionId: session.sessionId, assembledPrompt: "" };
}
// Read jsonl → persist turns to CAS → store detail
@@ -94,7 +94,7 @@ async function runBuiltinWithMessages(
session.startedAtMs,
);
return { output: stripPreamble(loopResult.finalText), detailHash, sessionId: session.sessionId };
return { output: stripPreamble(loopResult.finalText), detailHash, sessionId: session.sessionId, assembledPrompt: "" };
}
async function runBuiltin(ctx: AgentContext): Promise<AgentRunResult> {
@@ -120,12 +120,12 @@ function spawnClaudeResume(
return spawnClaude(args);
}
async function processClaudeOutput(stdout: string, store: Store): Promise<AgentRunResult> {
async function processClaudeOutput(stdout: string, store: Store, assembledPrompt: string): Promise<AgentRunResult> {
const parsed = parseClaudeCodeStreamOutput(stdout);
if (parsed !== null) {
const { detailHash, output, sessionId } = await storeClaudeCodeDetail(store, parsed);
return { output, detailHash, sessionId };
return { output, detailHash, sessionId, assembledPrompt };
}
throw new Error(
@@ -144,7 +144,7 @@ async function runClaudeCode(ctx: AgentContext): Promise<AgentRunResult> {
if (cachedSessionId !== null) {
try {
const { stdout } = await spawnClaudeResume(cachedSessionId, fullPrompt);
const result = await processClaudeOutput(stdout, ctx.store);
const result = await processClaudeOutput(stdout, ctx.store, fullPrompt);
if (result.sessionId !== undefined && result.sessionId !== "") {
await setCachedSessionId("claude-code", ctx.threadId, ctx.role, result.sessionId);
}
@@ -159,7 +159,7 @@ async function runClaudeCode(ctx: AgentContext): Promise<AgentRunResult> {
}
const { stdout } = await spawnClaudeRun(fullPrompt);
const result = await processClaudeOutput(stdout, ctx.store);
const result = await processClaudeOutput(stdout, ctx.store, fullPrompt);
if (result.sessionId !== undefined && result.sessionId !== "") {
await setCachedSessionId("claude-code", ctx.threadId, ctx.role, result.sessionId);
}
@@ -172,7 +172,7 @@ async function continueClaudeCode(
store: Store,
): Promise<AgentRunResult> {
const { stdout } = await spawnClaudeResume(sessionId, message);
return processClaudeOutput(stdout, store);
return processClaudeOutput(stdout, store, "");
}
/** Agent CLI factory: parses argv, runs Claude Code, extracts output, writes StepNode. */
+2 -2
View File
@@ -117,7 +117,7 @@ export function createHermesAgent(): () => Promise<void> {
await setCachedSessionId(ctx.threadId, ctx.role, sessionId);
}
return { output: text, detailHash, sessionId };
return { output: text, detailHash, sessionId, assembledPrompt: fullPrompt };
}
async function runHermes(ctx: AgentContext): Promise<AgentRunResult> {
@@ -148,7 +148,7 @@ export function createHermesAgent(): () => Promise<void> {
// so the agent sees the full conversation history (crucial for retries).
const { text, sessionId } = await client.prompt(message);
const { detailHash } = await storePromptResult(store, sessionId);
return { output: text, detailHash, sessionId };
return { output: text, detailHash, sessionId, assembledPrompt: "" };
}
const agentMain = createAgent({
@@ -25,6 +25,7 @@ describe("Protocol types for thread/edge location", () => {
edgePrompt: "Plan the implementation",
startedAtMs: Date.now(),
completedAtMs: Date.now() + 1000,
assembledPrompt: null,
cwd: "/home/user/project",
};
@@ -88,6 +88,9 @@ export const STEP_NODE_SCHEMA: JSONSchema = {
startedAtMs: { type: "integer" },
completedAtMs: { type: "integer" },
cwd: { type: "string" },
assembledPrompt: {
anyOf: [{ type: "string", format: "cas_ref" }, { type: "null" }],
},
},
additionalProperties: false,
};
+2
View File
@@ -20,6 +20,8 @@ export type StepRecord = {
completedAtMs: number;
/** Working directory where the agent executed. Missing in legacy nodes → "". */
cwd: string;
/** CAS ref to the fully assembled prompt sent to the agent. null for legacy steps. */
assembledPrompt: CasRef | null;
};
// ── 4.2 Workflow 定义 ───────────────────────────────────────────────
@@ -44,6 +44,7 @@ describe("adapter-stdout: A4 retry loop survives JSON output", () => {
body: secondAttempt!.body,
startedAtMs: 1000,
completedAtMs: 2000,
assembledPrompt: null,
};
const json = JSON.stringify(adapterOutput);
@@ -131,6 +131,7 @@ async function buildHistory(
startedAtMs: step.startedAtMs,
completedAtMs: step.completedAtMs,
cwd: step.cwd ?? "",
assembledPrompt: step.assembledPrompt ?? null,
content,
});
}
+13
View File
@@ -64,6 +64,7 @@ async function writeStepNode(options: {
edgePrompt: string;
startedAtMs: number;
completedAtMs: number;
assembledPromptHash: CasRef | null;
}): Promise<CasRef> {
const payload: StepNodePayload = {
start: options.startHash,
@@ -76,6 +77,7 @@ async function writeStepNode(options: {
startedAtMs: options.startedAtMs,
completedAtMs: options.completedAtMs,
cwd: process.cwd(),
assembledPrompt: options.assembledPromptHash,
};
const hash = await options.store.put(options.schemas.stepNode, payload);
const node = options.store.get(hash);
@@ -114,6 +116,7 @@ async function persistStep(options: {
agentName: string;
startedAtMs: number;
completedAtMs: number;
assembledPromptHash: CasRef | null;
}): Promise<CasRef> {
const { store, schemas, chain, headHash } = options.ctx.meta;
return writeStepNode({
@@ -128,6 +131,7 @@ async function persistStep(options: {
edgePrompt: options.ctx.edgePrompt,
startedAtMs: options.startedAtMs,
completedAtMs: options.completedAtMs,
assembledPromptHash: options.assembledPromptHash,
});
}
@@ -182,6 +186,14 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
);
}
const completedAtMs = Date.now();
// Store the assembled prompt in CAS for later inspection via `step read --prompt`
const promptText = agentResult.assembledPrompt;
const assembledPromptHash =
promptText !== ""
? await ctx.meta.store.put(ctx.meta.schemas.text, promptText).catch(() => null)
: null;
const stepHash = await persistStep({
ctx,
outputHash: extracted.outputHash,
@@ -189,6 +201,7 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
agentName: agentLabel(options.name),
startedAtMs,
completedAtMs,
assembledPromptHash,
});
const adapterOutput: AdapterOutput = {
+6 -2
View File
@@ -6,17 +6,21 @@ export type UwfAgentSchemaHashes = {
workflow: Hash;
startNode: Hash;
stepNode: Hash;
text: Hash;
};
const TEXT_SCHEMA = { type: "string" as const };
/**
* Register Workflow, StartNode, and StepNode JSON Schemas in the CAS store.
* Idempotent: safe to call on every agent invocation.
*/
export async function registerAgentSchemas(store: Store): Promise<UwfAgentSchemaHashes> {
const [workflow, startNode, stepNode] = await Promise.all([
const [workflow, startNode, stepNode, text] = await Promise.all([
putSchema(store, WORKFLOW_SCHEMA),
putSchema(store, START_NODE_SCHEMA),
putSchema(store, STEP_NODE_SCHEMA),
putSchema(store, TEXT_SCHEMA),
]);
return { workflow, startNode, stepNode };
return { workflow, startNode, stepNode, text };
}
@@ -27,6 +27,8 @@ export type AgentRunResult = {
output: string;
detailHash: string;
sessionId: string;
/** The fully assembled prompt that was sent to the agent. */
assembledPrompt: string;
};
export type AgentContinueFn = (