Compare commits

..

1 Commits

Author SHA1 Message Date
xiaoju 7ca3ad5d65 debug: dump raw NDJSON for issue #439 investigation
Saves Claude Code stdout to /tmp/uwf-ndjson-dump/ before parsing.
Temporary — remove after root cause confirmed.

Refs #439
2026-05-23 12:24:05 +00:00
10 changed files with 87 additions and 426 deletions
+1 -1
View File
@@ -9,7 +9,7 @@
"check": "bunx tsc --build && biome check . && bash scripts/lint-log-tags.sh",
"typecheck": "bunx tsc --build",
"format": "biome format --write .",
"test": "bun run --filter './packages/*' test",
"test": "bun run --filter '*' test",
"changeset": "bunx changeset",
"version": "bunx changeset version",
"release": "bun run build && bun test && node scripts/publish-all.mjs"
@@ -266,7 +266,12 @@ describe("cmdThreadRead ### Content section", () => {
expect(markdown).toContain("### Content");
expect(markdown).toContain("The assistant response text");
expect(markdown).not.toContain("### Output");
const contentIdx = markdown.indexOf("### Content");
const outputIdx = markdown.indexOf("### Output");
expect(contentIdx).toBeGreaterThanOrEqual(0);
expect(outputIdx).toBeGreaterThanOrEqual(0);
expect(contentIdx).toBeLessThan(outputIdx);
});
test("omits ### Content when detail has no matching assistant turns", async () => {
@@ -309,7 +314,7 @@ describe("cmdThreadRead ### Content section", () => {
const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
expect(markdown).not.toContain("### Content");
expect(markdown).not.toContain("### Output");
expect(markdown).toContain("### Output");
});
});
@@ -387,87 +392,3 @@ describe("cmdThreadStepDetails", () => {
await expect(cmdThreadStepDetails(tmpDir, "nonexistenth0" as CasRef)).rejects.toThrow();
});
});
// ── cmdThreadRead: ### Prompt deduplication ───────────────────────────────────
describe("cmdThreadRead ### Prompt deduplication", () => {
async function makeThreadWithRoles(uwf: UwfStore, roles: string[]): Promise<string> {
const roleMap: Record<string, unknown> = {};
for (const r of [...new Set(roles)]) {
roleMap[r] = {
description: r,
goal: `Goal for ${r}`,
capabilities: [],
procedure: "Do stuff.",
output: "Output.",
meta: "placeholder00" as CasRef,
};
}
const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
name: "dedup-wf",
description: "desc",
roles: roleMap,
conditions: {},
graph: {},
});
const startHash = await uwf.store.put(uwf.schemas.startNode, {
workflow: workflowHash,
prompt: "Start",
});
const outputHash = await uwf.store.put(uwf.schemas.workflow, {
name: "out",
description: "",
roles: {},
conditions: {},
graph: {},
});
let prev: string | null = null;
let stepHash = "";
for (const role of roles) {
stepHash = await uwf.store.put(uwf.schemas.stepNode, {
start: startHash,
prev: prev as CasRef | null,
role,
output: outputHash,
detail: null,
agent: "uwf-test",
});
prev = stepHash;
}
return stepHash;
}
test("same consecutive role shows ### Prompt once", async () => {
const uwf = await makeUwfStore(tmpDir);
const headHash = await makeThreadWithRoles(uwf, ["writer", "writer"]);
const threadId = "01JTEST0000000000000003" as ThreadId;
await saveThreadsIndex(tmpDir, { [threadId]: headHash });
const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
const count = (markdown.match(/### Prompt/g) ?? []).length;
expect(count).toBe(1);
});
test("different consecutive roles each show ### Prompt", async () => {
const uwf = await makeUwfStore(tmpDir);
const headHash = await makeThreadWithRoles(uwf, ["planner", "coder"]);
const threadId = "01JTEST0000000000000004" as ThreadId;
await saveThreadsIndex(tmpDir, { [threadId]: headHash });
const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
const count = (markdown.match(/### Prompt/g) ?? []).length;
expect(count).toBe(2);
});
test("non-consecutive same role shows ### Prompt twice", async () => {
const uwf = await makeUwfStore(tmpDir);
const headHash = await makeThreadWithRoles(uwf, ["roleA", "roleB", "roleA"]);
const threadId = "01JTEST0000000000000005" as ThreadId;
await saveThreadsIndex(tmpDir, { [threadId]: headHash });
const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
const count = (markdown.match(/### Prompt/g) ?? []).length;
expect(count).toBe(2);
});
});
+3 -3
View File
@@ -655,11 +655,11 @@ function formatThreadReadMarkdown(options: {
// Step blocks
const startIndex = candidates.length - selected.length;
const shownPromptRoles = new Set<string>();
for (let i = 0; i < selected.length; i++) {
const item = selected[i];
if (item === undefined) continue;
const stepNum = startIndex + i + 1;
const outputYaml = formatYaml(expandOutput(uwf, item.payload.output));
const ts = new Date(item.timestamp)
.toISOString()
.replace("T", " ")
@@ -669,10 +669,9 @@ function formatThreadReadMarkdown(options: {
`**Agent:** ${item.payload.agent} | **Time:** ${ts}`,
];
const roleDef = workflow.roles[item.payload.role];
if (roleDef && !shownPromptRoles.has(item.payload.role)) {
if (roleDef) {
const prompt = roleDef.goal;
stepLines.push("", "### Prompt", "", prompt);
shownPromptRoles.add(item.payload.role);
}
if (item.payload.detail) {
const content = extractLastAssistantContent(uwf, item.payload.detail);
@@ -680,6 +679,7 @@ function formatThreadReadMarkdown(options: {
stepLines.push("", "### Content", "", content);
}
}
stepLines.push("", "### Output", "", "```yaml", outputYaml, "```");
parts.push(stepLines.join("\n"));
}
@@ -1,156 +0,0 @@
import { beforeEach, describe, expect, mock, test } from "bun:test";
const mockChatCompletionWithTools = mock(async () => ({
content: "---\nstatus: done\n---",
toolCalls: [],
}));
const mockAppendSessionTurn = mock(async () => {});
const mockExecuteBuiltinTool = mock(async () => "tool-result");
mock.module("../src/llm/index.js", () => ({
chatCompletionWithTools: mockChatCompletionWithTools,
}));
mock.module("../src/session.js", () => ({
appendSessionTurn: mockAppendSessionTurn,
}));
mock.module("../src/tools/index.js", () => ({
builtinToolsToOpenAi: () => [],
executeBuiltinTool: mockExecuteBuiltinTool,
getBuiltinTools: () => [],
}));
import { executeTurnTools, runBuiltinLoop, shouldNudge } from "../src/loop.js";
const fakeProvider = {} as any;
const fakeToolCtx = {} as any;
function makeOptions(overrides: Partial<Parameters<typeof runBuiltinLoop>[0]> = {}) {
return {
provider: fakeProvider,
messages: [{ role: "system" as const, content: "sys" }],
toolCtx: fakeToolCtx,
maxTurns: 5,
storageRoot: "/tmp",
sessionId: "sess",
noTools: false,
...overrides,
};
}
beforeEach(() => {
mockChatCompletionWithTools.mockReset();
mockAppendSessionTurn.mockReset();
mockExecuteBuiltinTool.mockReset();
});
describe("shouldNudge", () => {
test("2.1 returns true when all conditions met", () => {
expect(shouldNudge({ noTools: false, text: "some text", turn: 0, maxTurns: 5 })).toBe(true);
});
test("2.2 returns false when noTools=true", () => {
expect(shouldNudge({ noTools: true, text: "some text", turn: 0, maxTurns: 5 })).toBe(false);
});
test("2.3 returns false when text starts with ---", () => {
expect(shouldNudge({ noTools: false, text: "---\nstatus: done", turn: 0, maxTurns: 5 })).toBe(
false,
);
});
test("2.4 returns false on last turn", () => {
expect(shouldNudge({ noTools: false, text: "some text", turn: 4, maxTurns: 5 })).toBe(false);
});
test("2.5 returns true on second-to-last turn", () => {
expect(shouldNudge({ noTools: false, text: "some text", turn: 3, maxTurns: 5 })).toBe(true);
});
test("2.6 leading whitespace before --- suppresses nudge", () => {
expect(shouldNudge({ noTools: false, text: " ---\nstatus: done", turn: 0, maxTurns: 5 })).toBe(
false,
);
});
});
describe("executeTurnTools", () => {
test("4.1 executes each tool call and pushes tool result messages", async () => {
mockExecuteBuiltinTool.mockResolvedValue("result");
const messages: any[] = [];
const calls = [
{ id: "c1", name: "tool_a", arguments: "{}" },
{ id: "c2", name: "tool_b", arguments: "{}" },
];
const count = await executeTurnTools(calls, fakeToolCtx, messages, "/tmp", "sess");
expect(messages.length).toBe(2);
expect(messages[0].role).toBe("tool");
expect(messages[1].role).toBe("tool");
expect(count).toBe(2);
});
test("4.2 tool result content matches executeBuiltinTool return value", async () => {
mockExecuteBuiltinTool.mockResolvedValue("result-A");
const messages: any[] = [];
await executeTurnTools(
[{ id: "c1", name: "read_file", arguments: "{}" }],
fakeToolCtx,
messages,
"/tmp",
"sess",
);
expect(messages[0].content).toBe("result-A");
});
});
describe("runBuiltinLoop integration", () => {
test("3.1 single text-only response returns finalText immediately", async () => {
mockChatCompletionWithTools.mockResolvedValue({
content: "---\nstatus: done\n---",
toolCalls: [],
});
const result = await runBuiltinLoop(makeOptions());
expect(result.finalText).toBe("---\nstatus: done\n---");
expect(result.turnCount).toBe(1);
});
test("3.2 noTools=true suppresses tool calls", async () => {
mockChatCompletionWithTools.mockResolvedValue({
content: "ok",
toolCalls: [{ id: "c1", name: "read_file", arguments: "{}" }],
});
const result = await runBuiltinLoop(makeOptions({ noTools: true }));
expect(result.finalText).toBe("ok");
expect(result.turnCount).toBe(1);
});
test("3.3 tool call followed by text response", async () => {
mockChatCompletionWithTools
.mockResolvedValueOnce({
content: null,
toolCalls: [{ id: "c1", name: "read_file", arguments: "{}" }],
})
.mockResolvedValueOnce({ content: "---\nstatus: done\n---", toolCalls: [] });
mockExecuteBuiltinTool.mockResolvedValue("file contents");
const result = await runBuiltinLoop(makeOptions());
expect(result.finalText).toBe("---\nstatus: done\n---");
expect(result.turnCount).toBe(3);
});
test("3.4 nudge cycle inserts nudge message", async () => {
mockChatCompletionWithTools
.mockResolvedValueOnce({ content: "I am thinking", toolCalls: [] })
.mockResolvedValueOnce({ content: "---\nstatus: done\n---", toolCalls: [] });
const result = await runBuiltinLoop(makeOptions());
expect(result.finalText).toBe("---\nstatus: done\n---");
const nudgeMsg = result.messages.find(
(m) =>
m.role === "user" && typeof m.content === "string" && m.content.includes("frontmatter"),
);
expect(nudgeMsg).toBeDefined();
});
test("3.5 maxTurns exhaustion falls back to last assistant content", async () => {
mockChatCompletionWithTools.mockResolvedValue({ content: "still thinking", toolCalls: [] });
const result = await runBuiltinLoop(makeOptions({ maxTurns: 3 }));
expect(result.finalText).toBe("still thinking");
});
test("3.6 original messages array is not mutated", async () => {
mockChatCompletionWithTools.mockResolvedValue({
content: "---\nstatus: done\n---",
toolCalls: [],
});
const original = [{ role: "system" as const, content: "sys" }];
await runBuiltinLoop(makeOptions({ messages: original }));
expect(original.length).toBe(1);
});
});
+6 -24
View File
@@ -13,28 +13,10 @@ import { storeBuiltinDetail } from "./detail.js";
import type { ChatMessage } from "./llm/index.js";
import { BUILTIN_CONTINUE_MAX_TURNS, BUILTIN_MAX_TURNS, runBuiltinLoop } from "./loop.js";
import { buildBuiltinMessages } from "./prompt.js";
import { initSessionDir } from "./session.js";
import { initSessionDir, removeSession } from "./session.js";
const log = createLogger({ sink: { kind: "stderr" } });
const FRONTMATTER_FENCE = "---";
/**
* Strip any text before the first `---` fence.
* LLMs sometimes emit preamble text before the frontmatter block.
*/
function stripPreamble(text: string): string {
if (text.startsWith(FRONTMATTER_FENCE)) {
return text;
}
const idx = text.indexOf(`\n${FRONTMATTER_FENCE}\n`);
if (idx !== -1) {
log("6GWRP3QX", `stripped ${idx + 1} chars of preamble before frontmatter`);
return text.slice(idx + 1);
}
return text;
}
type SessionRecord = {
sessionId: string;
model: string;
@@ -66,7 +48,6 @@ async function runBuiltinWithMessages(
session: SessionRecord,
store: Store,
maxTurns: number,
noTools: boolean,
): Promise<AgentRunResult> {
const loopResult = await runBuiltinLoop({
provider,
@@ -75,13 +56,13 @@ async function runBuiltinWithMessages(
maxTurns,
storageRoot,
sessionId: session.sessionId,
noTools,
});
session.messages = loopResult.messages;
if (loopResult.turnCount === 0) {
log("5RWTK9NB", "no turns produced, returning empty output");
await removeSession(storageRoot, session.sessionId);
return { output: "", detailHash: "", sessionId: session.sessionId };
}
@@ -94,7 +75,10 @@ async function runBuiltinWithMessages(
session.startedAtMs,
);
return { output: stripPreamble(loopResult.finalText), detailHash, sessionId: session.sessionId };
// Clean up session jsonl
await removeSession(storageRoot, session.sessionId);
return { output: loopResult.finalText, detailHash, sessionId: session.sessionId };
}
async function runBuiltin(ctx: AgentContext): Promise<AgentRunResult> {
@@ -121,7 +105,6 @@ async function runBuiltin(ctx: AgentContext): Promise<AgentRunResult> {
session,
ctx.store,
BUILTIN_MAX_TURNS,
false,
);
}
@@ -144,7 +127,6 @@ async function continueBuiltin(
session,
store,
BUILTIN_CONTINUE_MAX_TURNS,
true,
);
}
+7 -11
View File
@@ -96,17 +96,8 @@ function serializeMessage(message: ChatMessage): Record<string, unknown> {
export async function chatCompletionWithTools(
provider: ResolvedLlmProvider,
messages: ChatMessage[],
tools: OpenAiToolDefinition[] | null,
tools: OpenAiToolDefinition[],
): Promise<LlmAssistantResponse> {
const body: Record<string, unknown> = {
model: provider.model,
messages: messages.map(serializeMessage),
};
if (tools !== null && tools.length > 0) {
body.tools = tools;
body.tool_choice = "auto";
}
let response: Response;
try {
response = await fetch(chatUrl(provider.baseUrl), {
@@ -115,7 +106,12 @@ export async function chatCompletionWithTools(
Authorization: `Bearer ${provider.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify(body),
body: JSON.stringify({
model: provider.model,
messages: messages.map(serializeMessage),
tools,
tool_choice: "auto",
}),
});
} catch (cause) {
const message = cause instanceof Error ? cause.message : String(cause);
+51 -123
View File
@@ -23,8 +23,6 @@ export type RunBuiltinLoopOptions = {
maxTurns: number;
storageRoot: string;
sessionId: string;
/** When true, do not provide tools — force LLM to emit text only. */
noTools: boolean;
};
export type RunBuiltinLoopResult = {
@@ -48,7 +46,7 @@ async function appendTurn(
await appendSessionTurn(storageRoot, sessionId, payload);
}
export async function executeTurnTools(
async function executeTurnTools(
calls: Array<{ id: string; name: string; arguments: string }>,
toolCtx: ToolContext,
messages: ChatMessage[],
@@ -70,140 +68,70 @@ export async function executeTurnTools(
return turnCount;
}
export type ShouldNudgeOptions = {
noTools: boolean;
text: string;
turn: number;
maxTurns: number;
};
export function shouldNudge({ noTools, text, turn, maxTurns }: ShouldNudgeOptions): boolean {
return !noTools && !text.trimStart().startsWith("---") && turn < maxTurns - 1;
}
async function handleTextTurn(
text: string,
turn: number,
noTools: boolean,
maxTurns: number,
storageRoot: string,
sessionId: string,
messages: ChatMessage[],
): Promise<{ done: boolean; finalText: string }> {
await appendTurn(storageRoot, sessionId, {
role: "assistant",
content: text,
toolCalls: null,
reasoning: null,
});
if (shouldNudge({ noTools, text, turn, maxTurns })) {
log("7FXQM2KN", "text-only turn without frontmatter, nudging LLM to continue");
const nudge =
"You stopped calling tools but your response does not start with the required `---` YAML frontmatter. " +
"Either continue using tools to complete your work, or output your final response starting with `---`.";
messages.push({ role: "user", content: nudge });
return { done: false, finalText: "" };
}
return { done: true, finalText: text };
}
async function handleToolTurn(
content: string,
toolCalls: LlmToolCall[],
toolCtx: ToolContext,
messages: ChatMessage[],
storageRoot: string,
sessionId: string,
): Promise<number> {
await appendTurn(storageRoot, sessionId, {
role: "assistant",
content,
toolCalls: mapToolCallsForPayload(toolCalls),
reasoning: null,
});
return executeTurnTools(toolCalls, toolCtx, messages, storageRoot, sessionId);
}
export function extractFinalText(messages: ChatMessage[]): string {
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i];
if (
msg !== undefined &&
msg.role === "assistant" &&
msg.content !== null &&
msg.content.trim() !== ""
) {
return msg.content;
}
}
return "";
}
type LoopTurnResult = { done: boolean; finalText: string; extraTurns: number };
async function runLoopTurn(
turn: number,
options: RunBuiltinLoopOptions,
messages: ChatMessage[],
openAiTools: ReturnType<typeof builtinToolsToOpenAi>,
): Promise<LoopTurnResult> {
log("8K2M4N7P", `builtin loop turn ${turn + 1}/${options.maxTurns}`);
const response = await chatCompletionWithTools(
options.provider,
messages,
openAiTools.length > 0 ? openAiTools : null,
);
const effectiveToolCalls = options.noTools ? null : (response.toolCalls ?? null);
messages.push({ role: "assistant", content: response.content, tool_calls: effectiveToolCalls });
if (effectiveToolCalls === null || effectiveToolCalls.length === 0) {
const text = response.content ?? "";
const result = await handleTextTurn(
text,
turn,
options.noTools,
options.maxTurns,
options.storageRoot,
options.sessionId,
messages,
);
return { done: result.done, finalText: result.finalText, extraTurns: 0 };
}
const extra = await handleToolTurn(
response.content ?? "",
effectiveToolCalls,
options.toolCtx,
messages,
options.storageRoot,
options.sessionId,
);
return { done: false, finalText: "", extraTurns: extra };
}
/** Agent run loop: LLM ↔ tools until no tool_calls or maxTurns. */
export async function runBuiltinLoop(
options: RunBuiltinLoopOptions,
): Promise<RunBuiltinLoopResult> {
const messages = [...options.messages];
const openAiTools = options.noTools ? [] : builtinToolsToOpenAi(getBuiltinTools());
const openAiTools = builtinToolsToOpenAi(getBuiltinTools());
let finalText = "";
let turnCount = 0;
for (let turn = 0; turn < options.maxTurns; turn++) {
const result = await runLoopTurn(turn, options, messages, openAiTools);
turnCount += 1 + result.extraTurns;
if (result.done) {
finalText = result.finalText;
log("8K2M4N7P", `builtin loop turn ${turn + 1}/${options.maxTurns}`);
const response = await chatCompletionWithTools(options.provider, messages, openAiTools);
const assistantMessage: ChatMessage = {
role: "assistant",
content: response.content,
tool_calls: response.toolCalls,
};
messages.push(assistantMessage);
if (response.toolCalls === null || response.toolCalls.length === 0) {
finalText = response.content ?? "";
await appendTurn(options.storageRoot, options.sessionId, {
role: "assistant",
content: response.content ?? "",
toolCalls: null,
reasoning: null,
});
turnCount += 1;
break;
}
// Assistant turn with tool calls
await appendTurn(options.storageRoot, options.sessionId, {
role: "assistant",
content: response.content ?? "",
toolCalls: mapToolCallsForPayload(response.toolCalls),
reasoning: null,
});
turnCount += 1;
// Execute tools
turnCount += await executeTurnTools(
response.toolCalls,
options.toolCtx,
messages,
options.storageRoot,
options.sessionId,
);
}
if (finalText === "" && messages.length > 0) {
finalText = extractFinalText(messages);
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i];
if (
msg !== undefined &&
msg.role === "assistant" &&
msg.content !== null &&
msg.content.trim() !== ""
) {
finalText = msg.content;
break;
}
}
}
return { finalText, messages, turnCount };
@@ -59,22 +59,6 @@ export function buildBuiltinMessages(ctx: AgentContext): ChatMessage[] {
}
systemParts.push(rolePrompt);
systemParts.push(
"",
"## Workflow",
"",
`Your working directory is: ${process.cwd()}`,
"",
"You have tools available (read_file, write_file, run_command). " +
"Use them to complete your task — read files, run commands, make changes as needed. " +
"Your task is described in the user message below — do NOT use uwf or workflow CLI commands to discover your task. " +
"When you are done, output your final response with the YAML frontmatter block as specified above. " +
"Do NOT output the frontmatter until you have completed all necessary work. " +
"If you are running low on turns and cannot finish, output the frontmatter with `status: failed` and explain what remains in the body. " +
"CRITICAL: Your final output MUST start with the `---` fence on the very first line — " +
"no preamble text, no explanation before it. The parser requires `---` at position 0.",
);
const messages: ChatMessage[] = [{ role: "system", content: systemParts.join("\n") }];
const roleVisitIndices: number[] = [];
@@ -1,4 +1,5 @@
import { spawn } from "node:child_process";
import { mkdirSync, writeFileSync } from "node:fs";
import type { Store } from "@uncaged/json-cas";
import {
type AgentContext,
@@ -117,7 +118,17 @@ function spawnClaudeResume(
]);
}
const NDJSON_DUMP_DIR = "/tmp/uwf-ndjson-dump";
async function processClaudeOutput(stdout: string, store: Store): Promise<AgentRunResult> {
// Debug dump: save raw NDJSON for issue #439 investigation
try {
mkdirSync(NDJSON_DUMP_DIR, { recursive: true });
writeFileSync(`${NDJSON_DUMP_DIR}/${Date.now()}.ndjson`, stdout);
} catch {
// ignore dump failures
}
const parsed = parseClaudeCodeStreamOutput(stdout);
if (parsed !== null) {
+1 -6
View File
@@ -121,11 +121,6 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
let agentResult = await runWithMessage("agent run failed", () => options.run(ctx));
// Preserve the primary detail from the first run — it contains the full
// tool-call turn history. Continuation retries only fix frontmatter
// formatting and their 1-turn detail is not meaningful.
const primaryDetailHash = agentResult.detailHash;
// Try to extract frontmatter; retry via continue if it fails
let outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
@@ -152,7 +147,7 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
const stepHash = await persistStep({
ctx,
outputHash,
detailHash: primaryDetailHash,
detailHash: agentResult.detailHash,
agentName: agentLabel(options.name),
});