feat(step-read): store assembled prompt in CAS, add --prompt flag

Store the fully assembled prompt sent to each agent in CAS as a text node, referenced from StepNodePayload.assembledPrompt. This enables exact reproduction of what the agent received for debugging hallucinations. Changes: - workflow-protocol: StepRecord + STEP_NODE_SCHEMA add assembledPrompt field - workflow-util-agent: AgentRunResult includes assembledPrompt, run.ts stores it - workflow-util-agent: schemas register TEXT_SCHEMA for prompt storage - workflow-agent-claude-code: return assembled prompt from buildClaudeCodePrompt - workflow-agent-hermes: return assembled prompt from buildHermesPrompt - workflow-agent-builtin: return empty prompt (no prompt assembly) - cli-workflow: step read --prompt renders the stored prompt - All test fixtures updated for new field Legacy steps without assembledPrompt show 'Prompt not recorded' message. 小橘 🍊
2026-05-29 01:42:43 +00:00
parent 7612c97ae7
commit d310d43ab8
19 changed files with 94 additions and 21 deletions
@@ -28,7 +28,8 @@
    "@uncaged/workflow-agent-hermes": "workspace:*",
    "bun-types": "^1.3.13",
    "typescript": "^5.8.3",
-    "vitest": "^4.1.7"
+    "vitest": "^4.1.7",
+    "yaml": "^2.9.0"
  },
  "repository": {
    "type": "git",
@@ -146,10 +146,11 @@ describe("step read", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    // Read step with large quota
-    const markdown = await cmdStepRead(tmpDir, stepHash, 10000);
+    const markdown = await cmdStepRead(tmpDir, stepHash, 10000, false);

    // Assert structure
    expect(markdown).toContain(`# Step ${stepHash}`);
@@ -231,10 +232,11 @@ describe("step read", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    // Read step with limited quota (700 chars)
-    const markdown = await cmdStepRead(tmpDir, stepHash, 700);
+    const markdown = await cmdStepRead(tmpDir, stepHash, 700, false);

    // Assert only most recent turns fit
    expect(markdown).toContain(`# Step ${stepHash}`);
@@ -310,10 +312,11 @@ describe("step read", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    // Read step with minimal quota (1 char)
-    const markdown = await cmdStepRead(tmpDir, stepHash, 1);
+    const markdown = await cmdStepRead(tmpDir, stepHash, 1, false);

    // Assert at least one turn is always shown
    expect(markdown).toContain("LongTurn");
@@ -365,10 +368,11 @@ describe("step read", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    // Read step - should return metadata only (no error)
-    const markdown = await cmdStepRead(tmpDir, stepHash, 4000);
+    const markdown = await cmdStepRead(tmpDir, stepHash, 4000, false);

    // Assert metadata is present
    expect(markdown).toContain(`# Step ${stepHash}`);
@@ -441,10 +445,11 @@ describe("step read", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    // Read step - should return metadata only (no error)
-    const markdown = await cmdStepRead(tmpDir, stepHash, 4000);
+    const markdown = await cmdStepRead(tmpDir, stepHash, 4000, false);

    // Assert metadata is present
    expect(markdown).toContain(`# Step ${stepHash}`);
@@ -515,9 +520,10 @@ describe("step read", () => {
      agent: "uwf-hermes",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

-    const markdown = await cmdStepRead(tmpDir, stepHash, 4000);
+    const markdown = await cmdStepRead(tmpDir, stepHash, 4000, false);

    expect(markdown).toContain("**Turn role:** assistant");
    expect(markdown).toContain("**terminal**");
@@ -588,10 +594,11 @@ describe("step read", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    // Read step
-    const markdown = await cmdStepRead(tmpDir, stepHash, 4000);
+    const markdown = await cmdStepRead(tmpDir, stepHash, 4000, false);

    // Assert content is rendered correctly without corruption
    expect(markdown).toContain("`backticks`");
@@ -116,6 +116,7 @@ async function createTestStep(
    edgePrompt: "",
    startedAtMs: Date.now(),
    completedAtMs: Date.now() + 1000,
+    assembledPrompt: null,
    cwd: "/tmp",
  };
  return store.put(schemas.stepNode, stepPayload);
@@ -85,6 +85,7 @@ describe("protocol types", () => {
      edgePrompt: "",
      startedAtMs: 1000,
      completedAtMs: 2000,
+      assembledPrompt: null,
      cwd: "/test/path",
    };
    expect(record.startedAtMs).toBe(1000);
@@ -153,6 +154,7 @@ describe("StepNode JSON schema", () => {
      edgePrompt: "",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });
    expect(hash).toBeTruthy();
  });
@@ -143,6 +143,7 @@ describe("thread read --quota flag", () => {
        agent: "uwf-test",
        startedAtMs: 1000000000000,
        completedAtMs: 1000000005000,
+      assembledPrompt: null,
      });
      steps.push(stepHash);
    }
@@ -225,6 +226,7 @@ describe("thread read --quota flag", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const step2Content = generateContent(600, "Second");
@@ -251,6 +253,7 @@ describe("thread read --quota flag", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const threadId = "01HX2Q3R4S5T6V7W8X9YZ1" as ThreadId;
@@ -336,6 +339,7 @@ describe("thread read --quota flag", () => {
        agent: "uwf-test",
        startedAtMs: 1000000000000,
        completedAtMs: 1000000005000,
+      assembledPrompt: null,
      });
      steps.push(stepHash);
    }
@@ -415,6 +419,7 @@ describe("thread read --quota flag", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const threadId = "01HX2Q3R4S5T6V7W8X9YZ4" as ThreadId;
@@ -492,6 +497,7 @@ describe("thread read --quota flag", () => {
        agent: "uwf-test",
        startedAtMs: 1000000000000,
        completedAtMs: 1000000005000,
+      assembledPrompt: null,
      });
      steps.push(stepHash);
    }
@@ -573,6 +579,7 @@ describe("thread read --quota flag", () => {
        agent: "uwf-test",
        startedAtMs: 1000000000000,
        completedAtMs: 1000000005000,
+      assembledPrompt: null,
      });
      steps.push(stepHash);
    }
@@ -141,6 +141,7 @@ describe("thread read XML tag isolation", () => {
      agent: "uwf-claude-code",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const threadId = "01JTEST0000000000000001" as ThreadId;
@@ -218,6 +219,7 @@ describe("thread read XML tag isolation", () => {
      agent: "uwf-claude-code",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const threadId = "01JTEST0000000000000002" as ThreadId;
@@ -280,6 +282,7 @@ describe("thread read XML tag isolation", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const step2 = await uwf.store.put(uwf.schemas.stepNode, {
@@ -291,6 +294,7 @@ describe("thread read XML tag isolation", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const threadId = "01JTEST0000000000000003" as ThreadId;
@@ -345,6 +349,7 @@ describe("thread read XML tag isolation", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const threadId = "01JTEST0000000000000004" as ThreadId;
@@ -399,6 +404,7 @@ describe("thread read XML tag isolation", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const threadId = "01JTEST0000000000000005" as ThreadId;
@@ -453,6 +459,7 @@ describe("thread read XML tag isolation", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const threadId = "01JTEST0000000000000006" as ThreadId;
@@ -527,6 +534,7 @@ describe("thread read XML tag isolation", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const step2 = await uwf.store.put(uwf.schemas.stepNode, {
@@ -538,6 +546,7 @@ describe("thread read XML tag isolation", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const step3 = await uwf.store.put(uwf.schemas.stepNode, {
@@ -549,6 +558,7 @@ describe("thread read XML tag isolation", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const threadId = "01JTEST0000000000000007" as ThreadId;
@@ -629,6 +639,7 @@ describe("thread read XML tag isolation", () => {
      agent: "uwf-test",
      startedAtMs: 1000000000000,
      completedAtMs: 1000000005000,
+      assembledPrompt: null,
    });

    const threadId = "01JTEST0000000000000008" as ThreadId;
@@ -685,6 +696,7 @@ describe("thread read XML tag isolation", () => {
        agent: "uwf-test",
        startedAtMs: 1000000000000,
        completedAtMs: 1000000005000,
+      assembledPrompt: null,
      })) as CasRef;
      steps.push(step);
      prev = step;
@@ -364,7 +364,8 @@ step
  .description("Read a step's turns as human-readable markdown")
  .argument("<step-hash>", "CAS hash of the StepNode")
  .option("--quota <chars>", "Max output characters", "4000")
-  .action((stepHash: string, opts: { quota: string }) => {
+  .option("--prompt", "Show the assembled prompt sent to the agent instead of turns")
+  .action((stepHash: string, opts: { quota: string; prompt: boolean }) => {
    const storageRoot = resolveStorageRoot();
    runAction(async () => {
      const quota = Number.parseInt(opts.quota, 10);
@@ -372,7 +373,7 @@ step
        process.stderr.write("invalid --quota: must be a positive integer\n");
        process.exit(1);
      }
-      const markdown = await cmdStepRead(storageRoot, stepHash as CasRef, quota);
+      const markdown = await cmdStepRead(storageRoot, stepHash as CasRef, quota, opts.prompt === true);
      process.stdout.write(markdown.endsWith("\n") ? markdown : `${markdown}\n`);
    });
  });
@@ -289,6 +289,7 @@ export async function cmdStepRead(
  storageRoot: string,
  stepHash: CasRef,
  quota: number,
+  showPrompt: boolean,
 ): Promise<string> {
  const uwf = await createUwfStore(storageRoot);
  const node = uwf.store.get(stepHash);
@@ -300,6 +301,20 @@ export async function cmdStepRead(
  }
  const payload = node.payload as StepNodePayload;

+  // --prompt mode: show the assembled prompt that was sent to the agent
+  if (showPrompt) {
+    const promptRef = (payload as Record<string, unknown>).assembledPrompt;
+    if (typeof promptRef !== "string") {
+      return `# Step ${stepHash}\n\n_Prompt not recorded (legacy step)._`;
+    }
+    const promptNode = uwf.store.get(promptRef as CasRef);
+    if (promptNode === null) {
+      return `# Step ${stepHash}\n\n_Prompt CAS node not found: ${promptRef}_`;
+    }
+    const promptText = typeof promptNode.payload === "string" ? promptNode.payload : JSON.stringify(promptNode.payload);
+    return `# Step ${stepHash}\n\n**Role:** ${payload.role}\n**Agent:** ${payload.agent}\n\n## Prompt\n\n${promptText}`;
+  }
+
  if (payload.detail === null) {
    return formatStepMarkdown(stepHash, payload.role, payload.agent, [], []);
  }
@@ -82,7 +82,7 @@ async function runBuiltinWithMessages(

  if (loopResult.turnCount === 0) {
    log("5RWTK9NB", "no turns produced, returning empty output");
-    return { output: "", detailHash: "", sessionId: session.sessionId };
+    return { output: "", detailHash: "", sessionId: session.sessionId, assembledPrompt: "" };
  }

  // Read jsonl → persist turns to CAS → store detail
@@ -94,7 +94,7 @@ async function runBuiltinWithMessages(
    session.startedAtMs,
  );

-  return { output: stripPreamble(loopResult.finalText), detailHash, sessionId: session.sessionId };
+  return { output: stripPreamble(loopResult.finalText), detailHash, sessionId: session.sessionId, assembledPrompt: "" };
 }

 async function runBuiltin(ctx: AgentContext): Promise<AgentRunResult> {
@@ -120,12 +120,12 @@ function spawnClaudeResume(
  return spawnClaude(args);
 }

-async function processClaudeOutput(stdout: string, store: Store): Promise<AgentRunResult> {
+async function processClaudeOutput(stdout: string, store: Store, assembledPrompt: string): Promise<AgentRunResult> {
  const parsed = parseClaudeCodeStreamOutput(stdout);

  if (parsed !== null) {
    const { detailHash, output, sessionId } = await storeClaudeCodeDetail(store, parsed);
-    return { output, detailHash, sessionId };
+    return { output, detailHash, sessionId, assembledPrompt };
  }

  throw new Error(
@@ -144,7 +144,7 @@ async function runClaudeCode(ctx: AgentContext): Promise<AgentRunResult> {
    if (cachedSessionId !== null) {
      try {
        const { stdout } = await spawnClaudeResume(cachedSessionId, fullPrompt);
-        const result = await processClaudeOutput(stdout, ctx.store);
+        const result = await processClaudeOutput(stdout, ctx.store, fullPrompt);
        if (result.sessionId !== undefined && result.sessionId !== "") {
          await setCachedSessionId("claude-code", ctx.threadId, ctx.role, result.sessionId);
        }
@@ -159,7 +159,7 @@ async function runClaudeCode(ctx: AgentContext): Promise<AgentRunResult> {
  }

  const { stdout } = await spawnClaudeRun(fullPrompt);
-  const result = await processClaudeOutput(stdout, ctx.store);
+  const result = await processClaudeOutput(stdout, ctx.store, fullPrompt);
  if (result.sessionId !== undefined && result.sessionId !== "") {
    await setCachedSessionId("claude-code", ctx.threadId, ctx.role, result.sessionId);
  }
@@ -172,7 +172,7 @@ async function continueClaudeCode(
  store: Store,
 ): Promise<AgentRunResult> {
  const { stdout } = await spawnClaudeResume(sessionId, message);
-  return processClaudeOutput(stdout, store);
+  return processClaudeOutput(stdout, store, "");
 }

 /** Agent CLI factory: parses argv, runs Claude Code, extracts output, writes StepNode. */
@@ -117,7 +117,7 @@ export function createHermesAgent(): () => Promise<void> {
      await setCachedSessionId(ctx.threadId, ctx.role, sessionId);
    }

-    return { output: text, detailHash, sessionId };
+    return { output: text, detailHash, sessionId, assembledPrompt: fullPrompt };
  }

  async function runHermes(ctx: AgentContext): Promise<AgentRunResult> {
@@ -148,7 +148,7 @@ export function createHermesAgent(): () => Promise<void> {
    // so the agent sees the full conversation history (crucial for retries).
    const { text, sessionId } = await client.prompt(message);
    const { detailHash } = await storePromptResult(store, sessionId);
-    return { output: text, detailHash, sessionId };
+    return { output: text, detailHash, sessionId, assembledPrompt: "" };
  }

  const agentMain = createAgent({
@@ -25,6 +25,7 @@ describe("Protocol types for thread/edge location", () => {
        edgePrompt: "Plan the implementation",
        startedAtMs: Date.now(),
        completedAtMs: Date.now() + 1000,
+    assembledPrompt: null,
        cwd: "/home/user/project",
      };

@@ -88,6 +88,9 @@ export const STEP_NODE_SCHEMA: JSONSchema = {
    startedAtMs: { type: "integer" },
    completedAtMs: { type: "integer" },
    cwd: { type: "string" },
+    assembledPrompt: {
+      anyOf: [{ type: "string", format: "cas_ref" }, { type: "null" }],
+    },
  },
  additionalProperties: false,
 };
@@ -20,6 +20,8 @@ export type StepRecord = {
  completedAtMs: number;
  /** Working directory where the agent executed. Missing in legacy nodes → "". */
  cwd: string;
+  /** CAS ref to the fully assembled prompt sent to the agent. null for legacy steps. */
+  assembledPrompt: CasRef | null;
 };

 // ── 4.2 Workflow 定义 ───────────────────────────────────────────────
@@ -44,6 +44,7 @@ describe("adapter-stdout: A4 retry loop survives JSON output", () => {
      body: secondAttempt!.body,
      startedAtMs: 1000,
      completedAtMs: 2000,
+      assembledPrompt: null,
    };

    const json = JSON.stringify(adapterOutput);
@@ -131,6 +131,7 @@ async function buildHistory(
      startedAtMs: step.startedAtMs,
      completedAtMs: step.completedAtMs,
      cwd: step.cwd ?? "",
+      assembledPrompt: step.assembledPrompt ?? null,
      content,
    });
  }
@@ -64,6 +64,7 @@ async function writeStepNode(options: {
  edgePrompt: string;
  startedAtMs: number;
  completedAtMs: number;
+  assembledPromptHash: CasRef | null;
 }): Promise<CasRef> {
  const payload: StepNodePayload = {
    start: options.startHash,
@@ -76,6 +77,7 @@ async function writeStepNode(options: {
    startedAtMs: options.startedAtMs,
    completedAtMs: options.completedAtMs,
    cwd: process.cwd(),
+    assembledPrompt: options.assembledPromptHash,
  };
  const hash = await options.store.put(options.schemas.stepNode, payload);
  const node = options.store.get(hash);
@@ -114,6 +116,7 @@ async function persistStep(options: {
  agentName: string;
  startedAtMs: number;
  completedAtMs: number;
+  assembledPromptHash: CasRef | null;
 }): Promise<CasRef> {
  const { store, schemas, chain, headHash } = options.ctx.meta;
  return writeStepNode({
@@ -128,6 +131,7 @@ async function persistStep(options: {
    edgePrompt: options.ctx.edgePrompt,
    startedAtMs: options.startedAtMs,
    completedAtMs: options.completedAtMs,
+    assembledPromptHash: options.assembledPromptHash,
  });
 }

@@ -182,6 +186,14 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
      );
    }
    const completedAtMs = Date.now();
+
+    // Store the assembled prompt in CAS for later inspection via `step read --prompt`
+    const promptText = agentResult.assembledPrompt;
+    const assembledPromptHash =
+      promptText !== ""
+        ? await ctx.meta.store.put(ctx.meta.schemas.text, promptText).catch(() => null)
+        : null;
+
    const stepHash = await persistStep({
      ctx,
      outputHash: extracted.outputHash,
@@ -189,6 +201,7 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
      agentName: agentLabel(options.name),
      startedAtMs,
      completedAtMs,
+      assembledPromptHash,
    });

    const adapterOutput: AdapterOutput = {
@@ -6,17 +6,21 @@ export type UwfAgentSchemaHashes = {
  workflow: Hash;
  startNode: Hash;
  stepNode: Hash;
+  text: Hash;
 };

+const TEXT_SCHEMA = { type: "string" as const };
+
 /**
 * Register Workflow, StartNode, and StepNode JSON Schemas in the CAS store.
 * Idempotent: safe to call on every agent invocation.
 */
 export async function registerAgentSchemas(store: Store): Promise<UwfAgentSchemaHashes> {
-  const [workflow, startNode, stepNode] = await Promise.all([
+  const [workflow, startNode, stepNode, text] = await Promise.all([
    putSchema(store, WORKFLOW_SCHEMA),
    putSchema(store, START_NODE_SCHEMA),
    putSchema(store, STEP_NODE_SCHEMA),
+    putSchema(store, TEXT_SCHEMA),
  ]);
-  return { workflow, startNode, stepNode };
+  return { workflow, startNode, stepNode, text };
 }
@@ -27,6 +27,8 @@ export type AgentRunResult = {
  output: string;
  detailHash: string;
  sessionId: string;
+  /** The fully assembled prompt that was sent to the agent. */
+  assembledPrompt: string;
 };

 export type AgentContinueFn = (