fix(#447 ): fix biome format in loop.test.ts

fix(#447 ): format and sort imports in loop.test.ts
fix(#447 ): reduce cognitive complexity in loop.ts by extracting helpers
2026-05-23 22:53:50 +08:00 · 2026-05-23 22:52:58 +08:00 · 2026-05-23 22:50:06 +08:00 · 2026-05-23 22:45:09 +08:00 · 2026-05-23 22:44:42 +08:00 · 2026-05-23 22:35:20 +08:00
10 changed files with 426 additions and 87 deletions
@@ -9,7 +9,7 @@
    "check": "bunx tsc --build && biome check . && bash scripts/lint-log-tags.sh",
    "typecheck": "bunx tsc --build",
    "format": "biome format --write .",
-    "test": "bun run --filter '*' test",
+    "test": "bun run --filter './packages/*' test",
    "changeset": "bunx changeset",
    "version": "bunx changeset version",
    "release": "bun run build && bun test && node scripts/publish-all.mjs"
@@ -266,12 +266,7 @@ describe("cmdThreadRead ### Content section", () => {

    expect(markdown).toContain("### Content");
    expect(markdown).toContain("The assistant response text");
-
-    const contentIdx = markdown.indexOf("### Content");
-    const outputIdx = markdown.indexOf("### Output");
-    expect(contentIdx).toBeGreaterThanOrEqual(0);
-    expect(outputIdx).toBeGreaterThanOrEqual(0);
-    expect(contentIdx).toBeLessThan(outputIdx);
+    expect(markdown).not.toContain("### Output");
  });

  test("omits ### Content when detail has no matching assistant turns", async () => {
@@ -314,7 +309,7 @@ describe("cmdThreadRead ### Content section", () => {
    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);

    expect(markdown).not.toContain("### Content");
-    expect(markdown).toContain("### Output");
+    expect(markdown).not.toContain("### Output");
  });
 });

@@ -392,3 +387,87 @@ describe("cmdThreadStepDetails", () => {
    await expect(cmdThreadStepDetails(tmpDir, "nonexistenth0" as CasRef)).rejects.toThrow();
  });
 });
+
+// ── cmdThreadRead: ### Prompt deduplication ───────────────────────────────────
+
+describe("cmdThreadRead ### Prompt deduplication", () => {
+  async function makeThreadWithRoles(uwf: UwfStore, roles: string[]): Promise<string> {
+    const roleMap: Record<string, unknown> = {};
+    for (const r of [...new Set(roles)]) {
+      roleMap[r] = {
+        description: r,
+        goal: `Goal for ${r}`,
+        capabilities: [],
+        procedure: "Do stuff.",
+        output: "Output.",
+        meta: "placeholder00" as CasRef,
+      };
+    }
+    const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "dedup-wf",
+      description: "desc",
+      roles: roleMap,
+      conditions: {},
+      graph: {},
+    });
+    const startHash = await uwf.store.put(uwf.schemas.startNode, {
+      workflow: workflowHash,
+      prompt: "Start",
+    });
+    const outputHash = await uwf.store.put(uwf.schemas.workflow, {
+      name: "out",
+      description: "",
+      roles: {},
+      conditions: {},
+      graph: {},
+    });
+
+    let prev: string | null = null;
+    let stepHash = "";
+    for (const role of roles) {
+      stepHash = await uwf.store.put(uwf.schemas.stepNode, {
+        start: startHash,
+        prev: prev as CasRef | null,
+        role,
+        output: outputHash,
+        detail: null,
+        agent: "uwf-test",
+      });
+      prev = stepHash;
+    }
+    return stepHash;
+  }
+
+  test("same consecutive role shows ### Prompt once", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const headHash = await makeThreadWithRoles(uwf, ["writer", "writer"]);
+    const threadId = "01JTEST0000000000000003" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: headHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+    const count = (markdown.match(/### Prompt/g) ?? []).length;
+    expect(count).toBe(1);
+  });
+
+  test("different consecutive roles each show ### Prompt", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const headHash = await makeThreadWithRoles(uwf, ["planner", "coder"]);
+    const threadId = "01JTEST0000000000000004" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: headHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+    const count = (markdown.match(/### Prompt/g) ?? []).length;
+    expect(count).toBe(2);
+  });
+
+  test("non-consecutive same role shows ### Prompt twice", async () => {
+    const uwf = await makeUwfStore(tmpDir);
+    const headHash = await makeThreadWithRoles(uwf, ["roleA", "roleB", "roleA"]);
+    const threadId = "01JTEST0000000000000005" as ThreadId;
+    await saveThreadsIndex(tmpDir, { [threadId]: headHash });
+
+    const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
+    const count = (markdown.match(/### Prompt/g) ?? []).length;
+    expect(count).toBe(2);
+  });
+});
@@ -655,11 +655,11 @@ function formatThreadReadMarkdown(options: {

  // Step blocks
  const startIndex = candidates.length - selected.length;
+  const shownPromptRoles = new Set<string>();
  for (let i = 0; i < selected.length; i++) {
    const item = selected[i];
    if (item === undefined) continue;
    const stepNum = startIndex + i + 1;
-    const outputYaml = formatYaml(expandOutput(uwf, item.payload.output));
    const ts = new Date(item.timestamp)
      .toISOString()
      .replace("T", " ")
@@ -669,9 +669,10 @@ function formatThreadReadMarkdown(options: {
      `**Agent:** ${item.payload.agent} | **Time:** ${ts}`,
    ];
    const roleDef = workflow.roles[item.payload.role];
-    if (roleDef) {
+    if (roleDef && !shownPromptRoles.has(item.payload.role)) {
      const prompt = roleDef.goal;
      stepLines.push("", "### Prompt", "", prompt);
+      shownPromptRoles.add(item.payload.role);
    }
    if (item.payload.detail) {
      const content = extractLastAssistantContent(uwf, item.payload.detail);
@@ -679,7 +680,6 @@ function formatThreadReadMarkdown(options: {
        stepLines.push("", "### Content", "", content);
      }
    }
-    stepLines.push("", "### Output", "", "```yaml", outputYaml, "```");
    parts.push(stepLines.join("\n"));
  }

@@ -0,0 +1,156 @@
+import { beforeEach, describe, expect, mock, test } from "bun:test";
+
+const mockChatCompletionWithTools = mock(async () => ({
+  content: "---\nstatus: done\n---",
+  toolCalls: [],
+}));
+const mockAppendSessionTurn = mock(async () => {});
+const mockExecuteBuiltinTool = mock(async () => "tool-result");
+
+mock.module("../src/llm/index.js", () => ({
+  chatCompletionWithTools: mockChatCompletionWithTools,
+}));
+mock.module("../src/session.js", () => ({
+  appendSessionTurn: mockAppendSessionTurn,
+}));
+mock.module("../src/tools/index.js", () => ({
+  builtinToolsToOpenAi: () => [],
+  executeBuiltinTool: mockExecuteBuiltinTool,
+  getBuiltinTools: () => [],
+}));
+
+import { executeTurnTools, runBuiltinLoop, shouldNudge } from "../src/loop.js";
+
+const fakeProvider = {} as any;
+const fakeToolCtx = {} as any;
+
+function makeOptions(overrides: Partial<Parameters<typeof runBuiltinLoop>[0]> = {}) {
+  return {
+    provider: fakeProvider,
+    messages: [{ role: "system" as const, content: "sys" }],
+    toolCtx: fakeToolCtx,
+    maxTurns: 5,
+    storageRoot: "/tmp",
+    sessionId: "sess",
+    noTools: false,
+    ...overrides,
+  };
+}
+
+beforeEach(() => {
+  mockChatCompletionWithTools.mockReset();
+  mockAppendSessionTurn.mockReset();
+  mockExecuteBuiltinTool.mockReset();
+});
+
+describe("shouldNudge", () => {
+  test("2.1 returns true when all conditions met", () => {
+    expect(shouldNudge({ noTools: false, text: "some text", turn: 0, maxTurns: 5 })).toBe(true);
+  });
+  test("2.2 returns false when noTools=true", () => {
+    expect(shouldNudge({ noTools: true, text: "some text", turn: 0, maxTurns: 5 })).toBe(false);
+  });
+  test("2.3 returns false when text starts with ---", () => {
+    expect(shouldNudge({ noTools: false, text: "---\nstatus: done", turn: 0, maxTurns: 5 })).toBe(
+      false,
+    );
+  });
+  test("2.4 returns false on last turn", () => {
+    expect(shouldNudge({ noTools: false, text: "some text", turn: 4, maxTurns: 5 })).toBe(false);
+  });
+  test("2.5 returns true on second-to-last turn", () => {
+    expect(shouldNudge({ noTools: false, text: "some text", turn: 3, maxTurns: 5 })).toBe(true);
+  });
+  test("2.6 leading whitespace before --- suppresses nudge", () => {
+    expect(shouldNudge({ noTools: false, text: "  ---\nstatus: done", turn: 0, maxTurns: 5 })).toBe(
+      false,
+    );
+  });
+});
+
+describe("executeTurnTools", () => {
+  test("4.1 executes each tool call and pushes tool result messages", async () => {
+    mockExecuteBuiltinTool.mockResolvedValue("result");
+    const messages: any[] = [];
+    const calls = [
+      { id: "c1", name: "tool_a", arguments: "{}" },
+      { id: "c2", name: "tool_b", arguments: "{}" },
+    ];
+    const count = await executeTurnTools(calls, fakeToolCtx, messages, "/tmp", "sess");
+    expect(messages.length).toBe(2);
+    expect(messages[0].role).toBe("tool");
+    expect(messages[1].role).toBe("tool");
+    expect(count).toBe(2);
+  });
+  test("4.2 tool result content matches executeBuiltinTool return value", async () => {
+    mockExecuteBuiltinTool.mockResolvedValue("result-A");
+    const messages: any[] = [];
+    await executeTurnTools(
+      [{ id: "c1", name: "read_file", arguments: "{}" }],
+      fakeToolCtx,
+      messages,
+      "/tmp",
+      "sess",
+    );
+    expect(messages[0].content).toBe("result-A");
+  });
+});
+
+describe("runBuiltinLoop integration", () => {
+  test("3.1 single text-only response returns finalText immediately", async () => {
+    mockChatCompletionWithTools.mockResolvedValue({
+      content: "---\nstatus: done\n---",
+      toolCalls: [],
+    });
+    const result = await runBuiltinLoop(makeOptions());
+    expect(result.finalText).toBe("---\nstatus: done\n---");
+    expect(result.turnCount).toBe(1);
+  });
+  test("3.2 noTools=true suppresses tool calls", async () => {
+    mockChatCompletionWithTools.mockResolvedValue({
+      content: "ok",
+      toolCalls: [{ id: "c1", name: "read_file", arguments: "{}" }],
+    });
+    const result = await runBuiltinLoop(makeOptions({ noTools: true }));
+    expect(result.finalText).toBe("ok");
+    expect(result.turnCount).toBe(1);
+  });
+  test("3.3 tool call followed by text response", async () => {
+    mockChatCompletionWithTools
+      .mockResolvedValueOnce({
+        content: null,
+        toolCalls: [{ id: "c1", name: "read_file", arguments: "{}" }],
+      })
+      .mockResolvedValueOnce({ content: "---\nstatus: done\n---", toolCalls: [] });
+    mockExecuteBuiltinTool.mockResolvedValue("file contents");
+    const result = await runBuiltinLoop(makeOptions());
+    expect(result.finalText).toBe("---\nstatus: done\n---");
+    expect(result.turnCount).toBe(3);
+  });
+  test("3.4 nudge cycle inserts nudge message", async () => {
+    mockChatCompletionWithTools
+      .mockResolvedValueOnce({ content: "I am thinking", toolCalls: [] })
+      .mockResolvedValueOnce({ content: "---\nstatus: done\n---", toolCalls: [] });
+    const result = await runBuiltinLoop(makeOptions());
+    expect(result.finalText).toBe("---\nstatus: done\n---");
+    const nudgeMsg = result.messages.find(
+      (m) =>
+        m.role === "user" && typeof m.content === "string" && m.content.includes("frontmatter"),
+    );
+    expect(nudgeMsg).toBeDefined();
+  });
+  test("3.5 maxTurns exhaustion falls back to last assistant content", async () => {
+    mockChatCompletionWithTools.mockResolvedValue({ content: "still thinking", toolCalls: [] });
+    const result = await runBuiltinLoop(makeOptions({ maxTurns: 3 }));
+    expect(result.finalText).toBe("still thinking");
+  });
+  test("3.6 original messages array is not mutated", async () => {
+    mockChatCompletionWithTools.mockResolvedValue({
+      content: "---\nstatus: done\n---",
+      toolCalls: [],
+    });
+    const original = [{ role: "system" as const, content: "sys" }];
+    await runBuiltinLoop(makeOptions({ messages: original }));
+    expect(original.length).toBe(1);
+  });
+});
@@ -13,10 +13,28 @@ import { storeBuiltinDetail } from "./detail.js";
 import type { ChatMessage } from "./llm/index.js";
 import { BUILTIN_CONTINUE_MAX_TURNS, BUILTIN_MAX_TURNS, runBuiltinLoop } from "./loop.js";
 import { buildBuiltinMessages } from "./prompt.js";
-import { initSessionDir, removeSession } from "./session.js";
+import { initSessionDir } from "./session.js";

 const log = createLogger({ sink: { kind: "stderr" } });

+const FRONTMATTER_FENCE = "---";
+
+/**
+ * Strip any text before the first `---` fence.
+ * LLMs sometimes emit preamble text before the frontmatter block.
+ */
+function stripPreamble(text: string): string {
+  if (text.startsWith(FRONTMATTER_FENCE)) {
+    return text;
+  }
+  const idx = text.indexOf(`\n${FRONTMATTER_FENCE}\n`);
+  if (idx !== -1) {
+    log("6GWRP3QX", `stripped ${idx + 1} chars of preamble before frontmatter`);
+    return text.slice(idx + 1);
+  }
+  return text;
+}
+
 type SessionRecord = {
  sessionId: string;
  model: string;
@@ -48,6 +66,7 @@ async function runBuiltinWithMessages(
  session: SessionRecord,
  store: Store,
  maxTurns: number,
+  noTools: boolean,
 ): Promise<AgentRunResult> {
  const loopResult = await runBuiltinLoop({
    provider,
@@ -56,13 +75,13 @@ async function runBuiltinWithMessages(
    maxTurns,
    storageRoot,
    sessionId: session.sessionId,
+    noTools,
  });

  session.messages = loopResult.messages;

  if (loopResult.turnCount === 0) {
    log("5RWTK9NB", "no turns produced, returning empty output");
-    await removeSession(storageRoot, session.sessionId);
    return { output: "", detailHash: "", sessionId: session.sessionId };
  }

@@ -75,10 +94,7 @@ async function runBuiltinWithMessages(
    session.startedAtMs,
  );

-  // Clean up session jsonl
-  await removeSession(storageRoot, session.sessionId);
-
-  return { output: loopResult.finalText, detailHash, sessionId: session.sessionId };
+  return { output: stripPreamble(loopResult.finalText), detailHash, sessionId: session.sessionId };
 }

 async function runBuiltin(ctx: AgentContext): Promise<AgentRunResult> {
@@ -105,6 +121,7 @@ async function runBuiltin(ctx: AgentContext): Promise<AgentRunResult> {
    session,
    ctx.store,
    BUILTIN_MAX_TURNS,
+    false,
  );
 }

@@ -127,6 +144,7 @@ async function continueBuiltin(
    session,
    store,
    BUILTIN_CONTINUE_MAX_TURNS,
+    true,
  );
 }

@@ -96,8 +96,17 @@ function serializeMessage(message: ChatMessage): Record<string, unknown> {
 export async function chatCompletionWithTools(
  provider: ResolvedLlmProvider,
  messages: ChatMessage[],
-  tools: OpenAiToolDefinition[],
+  tools: OpenAiToolDefinition[] | null,
 ): Promise<LlmAssistantResponse> {
+  const body: Record<string, unknown> = {
+    model: provider.model,
+    messages: messages.map(serializeMessage),
+  };
+  if (tools !== null && tools.length > 0) {
+    body.tools = tools;
+    body.tool_choice = "auto";
+  }
+
  let response: Response;
  try {
    response = await fetch(chatUrl(provider.baseUrl), {
@@ -106,12 +115,7 @@ export async function chatCompletionWithTools(
        Authorization: `Bearer ${provider.apiKey}`,
        "Content-Type": "application/json",
      },
-      body: JSON.stringify({
-        model: provider.model,
-        messages: messages.map(serializeMessage),
-        tools,
-        tool_choice: "auto",
-      }),
+      body: JSON.stringify(body),
    });
  } catch (cause) {
    const message = cause instanceof Error ? cause.message : String(cause);
@@ -23,6 +23,8 @@ export type RunBuiltinLoopOptions = {
  maxTurns: number;
  storageRoot: string;
  sessionId: string;
+  /** When true, do not provide tools — force LLM to emit text only. */
+  noTools: boolean;
 };

 export type RunBuiltinLoopResult = {
@@ -46,7 +48,7 @@ async function appendTurn(
  await appendSessionTurn(storageRoot, sessionId, payload);
 }

-async function executeTurnTools(
+export async function executeTurnTools(
  calls: Array<{ id: string; name: string; arguments: string }>,
  toolCtx: ToolContext,
  messages: ChatMessage[],
@@ -68,70 +70,140 @@ async function executeTurnTools(
  return turnCount;
 }

+export type ShouldNudgeOptions = {
+  noTools: boolean;
+  text: string;
+  turn: number;
+  maxTurns: number;
+};
+
+export function shouldNudge({ noTools, text, turn, maxTurns }: ShouldNudgeOptions): boolean {
+  return !noTools && !text.trimStart().startsWith("---") && turn < maxTurns - 1;
+}
+
+async function handleTextTurn(
+  text: string,
+  turn: number,
+  noTools: boolean,
+  maxTurns: number,
+  storageRoot: string,
+  sessionId: string,
+  messages: ChatMessage[],
+): Promise<{ done: boolean; finalText: string }> {
+  await appendTurn(storageRoot, sessionId, {
+    role: "assistant",
+    content: text,
+    toolCalls: null,
+    reasoning: null,
+  });
+
+  if (shouldNudge({ noTools, text, turn, maxTurns })) {
+    log("7FXQM2KN", "text-only turn without frontmatter, nudging LLM to continue");
+    const nudge =
+      "You stopped calling tools but your response does not start with the required `---` YAML frontmatter. " +
+      "Either continue using tools to complete your work, or output your final response starting with `---`.";
+    messages.push({ role: "user", content: nudge });
+    return { done: false, finalText: "" };
+  }
+
+  return { done: true, finalText: text };
+}
+
+async function handleToolTurn(
+  content: string,
+  toolCalls: LlmToolCall[],
+  toolCtx: ToolContext,
+  messages: ChatMessage[],
+  storageRoot: string,
+  sessionId: string,
+): Promise<number> {
+  await appendTurn(storageRoot, sessionId, {
+    role: "assistant",
+    content,
+    toolCalls: mapToolCallsForPayload(toolCalls),
+    reasoning: null,
+  });
+  return executeTurnTools(toolCalls, toolCtx, messages, storageRoot, sessionId);
+}
+
+export function extractFinalText(messages: ChatMessage[]): string {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (
+      msg !== undefined &&
+      msg.role === "assistant" &&
+      msg.content !== null &&
+      msg.content.trim() !== ""
+    ) {
+      return msg.content;
+    }
+  }
+  return "";
+}
+
+type LoopTurnResult = { done: boolean; finalText: string; extraTurns: number };
+
+async function runLoopTurn(
+  turn: number,
+  options: RunBuiltinLoopOptions,
+  messages: ChatMessage[],
+  openAiTools: ReturnType<typeof builtinToolsToOpenAi>,
+): Promise<LoopTurnResult> {
+  log("8K2M4N7P", `builtin loop turn ${turn + 1}/${options.maxTurns}`);
+  const response = await chatCompletionWithTools(
+    options.provider,
+    messages,
+    openAiTools.length > 0 ? openAiTools : null,
+  );
+
+  const effectiveToolCalls = options.noTools ? null : (response.toolCalls ?? null);
+  messages.push({ role: "assistant", content: response.content, tool_calls: effectiveToolCalls });
+
+  if (effectiveToolCalls === null || effectiveToolCalls.length === 0) {
+    const text = response.content ?? "";
+    const result = await handleTextTurn(
+      text,
+      turn,
+      options.noTools,
+      options.maxTurns,
+      options.storageRoot,
+      options.sessionId,
+      messages,
+    );
+    return { done: result.done, finalText: result.finalText, extraTurns: 0 };
+  }
+
+  const extra = await handleToolTurn(
+    response.content ?? "",
+    effectiveToolCalls,
+    options.toolCtx,
+    messages,
+    options.storageRoot,
+    options.sessionId,
+  );
+  return { done: false, finalText: "", extraTurns: extra };
+}
+
 /** Agent run loop: LLM ↔ tools until no tool_calls or maxTurns. */
 export async function runBuiltinLoop(
  options: RunBuiltinLoopOptions,
 ): Promise<RunBuiltinLoopResult> {
  const messages = [...options.messages];
-  const openAiTools = builtinToolsToOpenAi(getBuiltinTools());
+  const openAiTools = options.noTools ? [] : builtinToolsToOpenAi(getBuiltinTools());
  let finalText = "";
  let turnCount = 0;

  for (let turn = 0; turn < options.maxTurns; turn++) {
-    log("8K2M4N7P", `builtin loop turn ${turn + 1}/${options.maxTurns}`);
-    const response = await chatCompletionWithTools(options.provider, messages, openAiTools);
-
-    const assistantMessage: ChatMessage = {
-      role: "assistant",
-      content: response.content,
-      tool_calls: response.toolCalls,
-    };
-    messages.push(assistantMessage);
-
-    if (response.toolCalls === null || response.toolCalls.length === 0) {
-      finalText = response.content ?? "";
-      await appendTurn(options.storageRoot, options.sessionId, {
-        role: "assistant",
-        content: response.content ?? "",
-        toolCalls: null,
-        reasoning: null,
-      });
-      turnCount += 1;
+    const result = await runLoopTurn(turn, options, messages, openAiTools);
+    turnCount += 1 + result.extraTurns;
+    if (result.done) {
+      finalText = result.finalText;
      break;
    }
-
-    // Assistant turn with tool calls
-    await appendTurn(options.storageRoot, options.sessionId, {
-      role: "assistant",
-      content: response.content ?? "",
-      toolCalls: mapToolCallsForPayload(response.toolCalls),
-      reasoning: null,
-    });
-    turnCount += 1;
-
-    // Execute tools
-    turnCount += await executeTurnTools(
-      response.toolCalls,
-      options.toolCtx,
-      messages,
-      options.storageRoot,
-      options.sessionId,
-    );
  }

  if (finalText === "" && messages.length > 0) {
-    for (let i = messages.length - 1; i >= 0; i--) {
-      const msg = messages[i];
-      if (
-        msg !== undefined &&
-        msg.role === "assistant" &&
-        msg.content !== null &&
-        msg.content.trim() !== ""
-      ) {
-        finalText = msg.content;
-        break;
-      }
-    }
+    finalText = extractFinalText(messages);
  }

  return { finalText, messages, turnCount };
@@ -59,6 +59,22 @@ export function buildBuiltinMessages(ctx: AgentContext): ChatMessage[] {
  }
  systemParts.push(rolePrompt);

+  systemParts.push(
+    "",
+    "## Workflow",
+    "",
+    `Your working directory is: ${process.cwd()}`,
+    "",
+    "You have tools available (read_file, write_file, run_command). " +
+      "Use them to complete your task — read files, run commands, make changes as needed. " +
+      "Your task is described in the user message below — do NOT use uwf or workflow CLI commands to discover your task. " +
+      "When you are done, output your final response with the YAML frontmatter block as specified above. " +
+      "Do NOT output the frontmatter until you have completed all necessary work. " +
+      "If you are running low on turns and cannot finish, output the frontmatter with `status: failed` and explain what remains in the body. " +
+      "CRITICAL: Your final output MUST start with the `---` fence on the very first line — " +
+      "no preamble text, no explanation before it. The parser requires `---` at position 0.",
+  );
+
  const messages: ChatMessage[] = [{ role: "system", content: systemParts.join("\n") }];

  const roleVisitIndices: number[] = [];
@@ -1,5 +1,4 @@
 import { spawn } from "node:child_process";
-import { mkdirSync, writeFileSync } from "node:fs";
 import type { Store } from "@uncaged/json-cas";
 import {
  type AgentContext,
@@ -118,17 +117,7 @@ function spawnClaudeResume(
  ]);
 }

-const NDJSON_DUMP_DIR = "/tmp/uwf-ndjson-dump";
-
 async function processClaudeOutput(stdout: string, store: Store): Promise<AgentRunResult> {
-  // Debug dump: save raw NDJSON for issue #439 investigation
-  try {
-    mkdirSync(NDJSON_DUMP_DIR, { recursive: true });
-    writeFileSync(`${NDJSON_DUMP_DIR}/${Date.now()}.ndjson`, stdout);
-  } catch {
-    // ignore dump failures
-  }
-
  const parsed = parseClaudeCodeStreamOutput(stdout);

  if (parsed !== null) {
@@ -121,6 +121,11 @@ export function createAgent(options: AgentOptions): () => Promise<void> {

    let agentResult = await runWithMessage("agent run failed", () => options.run(ctx));

+    // Preserve the primary detail from the first run — it contains the full
+    // tool-call turn history.  Continuation retries only fix frontmatter
+    // formatting and their 1-turn detail is not meaningful.
+    const primaryDetailHash = agentResult.detailHash;
+
    // Try to extract frontmatter; retry via continue if it fails
    let outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);

@@ -147,7 +152,7 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
    const stepHash = await persistStep({
      ctx,
      outputHash,
-      detailHash: agentResult.detailHash,
+      detailHash: primaryDetailHash,
      agentName: agentLabel(options.name),
    });
Author	SHA1	Message	Date
xingyue	842e479784	fix(#447 ): fix biome format in loop.test.ts	2026-05-23 22:53:50 +08:00
xingyue	f63c670cd9	fix(#447 ): format and sort imports in loop.test.ts	2026-05-23 22:52:58 +08:00
xingyue	64c5122453	fix(#447 ): reduce cognitive complexity in loop.ts by extracting helpers	2026-05-23 22:50:06 +08:00
xingyue	5b68359dfc	fix #447 : extract shouldNudge and export executeTurnTools from loop.ts, add tests	2026-05-23 22:45:09 +08:00
xingyue	c2ddfb8558	fix(builtin): deadline warning + graceful exit on turn limit - Inject user message when 3 turns remain, telling agent to wrap up - Prompt tells agent to use status:failed if it can't finish in time - Prevents wasting all turns without producing any frontmatter output - Remove stale test file from dogfood agent run	2026-05-23 22:44:42 +08:00
xingyue	603018caf2	fix(builtin): force-strip tool_calls when noTools is set copilot-api returns tool_calls even when tools field is omitted from the request (infers from message history). Now the loop explicitly nullifies tool_calls when noTools=true.	2026-05-23 22:35:20 +08:00
xiaomo	aff0ee6fea	Merge pull request 'fix(thread-read): remove ### Output section and deduplicate ### Prompt globally' (#442 ) from fix/440-thread-read-prompt-dedup into main	2026-05-23 14:15:40 +00:00
xiaomo	d37fa1393a	Merge pull request 'fix: preserve primary detail hash across frontmatter retries' (#443 ) from fix/439-detail-merge-and-acp into main	2026-05-23 14:14:53 +00:00
xiaoju	759c784267	fix: preserve primary detail hash across frontmatter retries When the agent's first run output fails frontmatter extraction, the retry loop (via options.continue) would replace agentResult entirely, causing the 1-turn continuation detail to overwrite the original multi-turn detail containing all tool-call history. Now we capture primaryDetailHash from the first run and always use it for the persisted StepNode, regardless of how many retries occur. Fixes #439	2026-05-23 14:02:51 +00:00
xingyue	52ffc7dcc1	fix(thread-read): remove ### Output section and deduplicate ### Prompt globally	2026-05-23 22:01:24 +08:00
xingyue	ac55a3e3d9	fix(builtin): nudge LLM when it stops tools without frontmatter LLM sometimes emits plain text (e.g. 'Now I'll write the tests...') without calling tools, which the loop treated as final output. Now the loop detects this and injects a user message nudging the LLM to either continue using tools or output frontmatter with ---.	2026-05-23 21:49:07 +08:00
xingyue	edb979baa9	fix(builtin): disable tools during continue/retry to force frontmatter output Agent was using all continue turns to keep calling tools instead of outputting the required frontmatter. Now continue runs with noTools=true, forcing LLM to emit text-only response. Also supports null tools in chatCompletionWithTools to omit tools from the API request entirely.	2026-05-23 21:40:30 +08:00
xingyue	3d1850ddbe	fix(builtin): tell agent not to use uwf CLI to discover its task Agent was wasting all 30 turns using uwf/tea CLI to explore threads instead of reading the task from its own user message.	2026-05-23 21:30:59 +08:00
xingyue	3c1f4a6dfa	fix(builtin): include cwd in system prompt Agent was wasting turns exploring the filesystem because it didn't know its working directory. Now the system prompt includes: 'Your working directory is: /path/to/cwd'	2026-05-23 21:27:24 +08:00
xiaomo	f07a6daa30	Merge pull request 'fix(builtin): session lifecycle + frontmatter preamble stripping' (#441 ) from fix/builtin-session-lifecycle into main	2026-05-23 13:20:04 +00:00
xingyue	0eeb4a8ed8	fix(builtin): strip preamble before frontmatter + stronger prompt - Add stripPreamble() to handle LLM output with text before --- - Strengthen system prompt: CRITICAL instruction for --- at position 0 - Fixes frontmatter parsing failures on first output turn	2026-05-23 20:37:14 +08:00
xingyue	a3fac708b6	fix(builtin-agent): don't delete session jsonl until process exits Previously runBuiltinWithMessages deleted the session jsonl after each run/continue call. This meant the createAgent retry mechanism (which calls continue on frontmatter validation failure) would lose all previous turn data — each continue started with an empty jsonl. Now the session jsonl accumulates across run + continue calls, so the final storeBuiltinDetail captures all turns. The jsonl file is left behind for debugging; it's small and can be cleaned up on next startup. Also add a workflow hint to the system prompt reminding the LLM to use tools before outputting frontmatter, preventing premature text-only responses on the first turn.	2026-05-23 20:32:38 +08:00