fix(#447 ): fix biome format in loop.test.ts

fix(#447 ): format and sort imports in loop.test.ts
fix(#447 ): reduce cognitive complexity in loop.ts by extracting helpers
2026-05-23 22:53:50 +08:00 · 2026-05-23 22:52:58 +08:00 · 2026-05-23 22:50:06 +08:00 · 2026-05-23 22:45:09 +08:00 · 2026-05-23 22:44:42 +08:00 · 2026-05-23 22:35:20 +08:00
4 changed files with 283 additions and 68 deletions
@@ -0,0 +1,156 @@
+import { beforeEach, describe, expect, mock, test } from "bun:test";
+
+const mockChatCompletionWithTools = mock(async () => ({
+  content: "---\nstatus: done\n---",
+  toolCalls: [],
+}));
+const mockAppendSessionTurn = mock(async () => {});
+const mockExecuteBuiltinTool = mock(async () => "tool-result");
+
+mock.module("../src/llm/index.js", () => ({
+  chatCompletionWithTools: mockChatCompletionWithTools,
+}));
+mock.module("../src/session.js", () => ({
+  appendSessionTurn: mockAppendSessionTurn,
+}));
+mock.module("../src/tools/index.js", () => ({
+  builtinToolsToOpenAi: () => [],
+  executeBuiltinTool: mockExecuteBuiltinTool,
+  getBuiltinTools: () => [],
+}));
+
+import { executeTurnTools, runBuiltinLoop, shouldNudge } from "../src/loop.js";
+
+const fakeProvider = {} as any;
+const fakeToolCtx = {} as any;
+
+function makeOptions(overrides: Partial<Parameters<typeof runBuiltinLoop>[0]> = {}) {
+  return {
+    provider: fakeProvider,
+    messages: [{ role: "system" as const, content: "sys" }],
+    toolCtx: fakeToolCtx,
+    maxTurns: 5,
+    storageRoot: "/tmp",
+    sessionId: "sess",
+    noTools: false,
+    ...overrides,
+  };
+}
+
+beforeEach(() => {
+  mockChatCompletionWithTools.mockReset();
+  mockAppendSessionTurn.mockReset();
+  mockExecuteBuiltinTool.mockReset();
+});
+
+describe("shouldNudge", () => {
+  test("2.1 returns true when all conditions met", () => {
+    expect(shouldNudge({ noTools: false, text: "some text", turn: 0, maxTurns: 5 })).toBe(true);
+  });
+  test("2.2 returns false when noTools=true", () => {
+    expect(shouldNudge({ noTools: true, text: "some text", turn: 0, maxTurns: 5 })).toBe(false);
+  });
+  test("2.3 returns false when text starts with ---", () => {
+    expect(shouldNudge({ noTools: false, text: "---\nstatus: done", turn: 0, maxTurns: 5 })).toBe(
+      false,
+    );
+  });
+  test("2.4 returns false on last turn", () => {
+    expect(shouldNudge({ noTools: false, text: "some text", turn: 4, maxTurns: 5 })).toBe(false);
+  });
+  test("2.5 returns true on second-to-last turn", () => {
+    expect(shouldNudge({ noTools: false, text: "some text", turn: 3, maxTurns: 5 })).toBe(true);
+  });
+  test("2.6 leading whitespace before --- suppresses nudge", () => {
+    expect(shouldNudge({ noTools: false, text: "  ---\nstatus: done", turn: 0, maxTurns: 5 })).toBe(
+      false,
+    );
+  });
+});
+
+describe("executeTurnTools", () => {
+  test("4.1 executes each tool call and pushes tool result messages", async () => {
+    mockExecuteBuiltinTool.mockResolvedValue("result");
+    const messages: any[] = [];
+    const calls = [
+      { id: "c1", name: "tool_a", arguments: "{}" },
+      { id: "c2", name: "tool_b", arguments: "{}" },
+    ];
+    const count = await executeTurnTools(calls, fakeToolCtx, messages, "/tmp", "sess");
+    expect(messages.length).toBe(2);
+    expect(messages[0].role).toBe("tool");
+    expect(messages[1].role).toBe("tool");
+    expect(count).toBe(2);
+  });
+  test("4.2 tool result content matches executeBuiltinTool return value", async () => {
+    mockExecuteBuiltinTool.mockResolvedValue("result-A");
+    const messages: any[] = [];
+    await executeTurnTools(
+      [{ id: "c1", name: "read_file", arguments: "{}" }],
+      fakeToolCtx,
+      messages,
+      "/tmp",
+      "sess",
+    );
+    expect(messages[0].content).toBe("result-A");
+  });
+});
+
+describe("runBuiltinLoop integration", () => {
+  test("3.1 single text-only response returns finalText immediately", async () => {
+    mockChatCompletionWithTools.mockResolvedValue({
+      content: "---\nstatus: done\n---",
+      toolCalls: [],
+    });
+    const result = await runBuiltinLoop(makeOptions());
+    expect(result.finalText).toBe("---\nstatus: done\n---");
+    expect(result.turnCount).toBe(1);
+  });
+  test("3.2 noTools=true suppresses tool calls", async () => {
+    mockChatCompletionWithTools.mockResolvedValue({
+      content: "ok",
+      toolCalls: [{ id: "c1", name: "read_file", arguments: "{}" }],
+    });
+    const result = await runBuiltinLoop(makeOptions({ noTools: true }));
+    expect(result.finalText).toBe("ok");
+    expect(result.turnCount).toBe(1);
+  });
+  test("3.3 tool call followed by text response", async () => {
+    mockChatCompletionWithTools
+      .mockResolvedValueOnce({
+        content: null,
+        toolCalls: [{ id: "c1", name: "read_file", arguments: "{}" }],
+      })
+      .mockResolvedValueOnce({ content: "---\nstatus: done\n---", toolCalls: [] });
+    mockExecuteBuiltinTool.mockResolvedValue("file contents");
+    const result = await runBuiltinLoop(makeOptions());
+    expect(result.finalText).toBe("---\nstatus: done\n---");
+    expect(result.turnCount).toBe(3);
+  });
+  test("3.4 nudge cycle inserts nudge message", async () => {
+    mockChatCompletionWithTools
+      .mockResolvedValueOnce({ content: "I am thinking", toolCalls: [] })
+      .mockResolvedValueOnce({ content: "---\nstatus: done\n---", toolCalls: [] });
+    const result = await runBuiltinLoop(makeOptions());
+    expect(result.finalText).toBe("---\nstatus: done\n---");
+    const nudgeMsg = result.messages.find(
+      (m) =>
+        m.role === "user" && typeof m.content === "string" && m.content.includes("frontmatter"),
+    );
+    expect(nudgeMsg).toBeDefined();
+  });
+  test("3.5 maxTurns exhaustion falls back to last assistant content", async () => {
+    mockChatCompletionWithTools.mockResolvedValue({ content: "still thinking", toolCalls: [] });
+    const result = await runBuiltinLoop(makeOptions({ maxTurns: 3 }));
+    expect(result.finalText).toBe("still thinking");
+  });
+  test("3.6 original messages array is not mutated", async () => {
+    mockChatCompletionWithTools.mockResolvedValue({
+      content: "---\nstatus: done\n---",
+      toolCalls: [],
+    });
+    const original = [{ role: "system" as const, content: "sys" }];
+    await runBuiltinLoop(makeOptions({ messages: original }));
+    expect(original.length).toBe(1);
+  });
+});
@@ -48,7 +48,7 @@ async function appendTurn(
  await appendSessionTurn(storageRoot, sessionId, payload);
 }

-async function executeTurnTools(
+export async function executeTurnTools(
  calls: Array<{ id: string; name: string; arguments: string }>,
  toolCtx: ToolContext,
  messages: ChatMessage[],
@@ -70,6 +70,120 @@ async function executeTurnTools(
  return turnCount;
 }

+export type ShouldNudgeOptions = {
+  noTools: boolean;
+  text: string;
+  turn: number;
+  maxTurns: number;
+};
+
+export function shouldNudge({ noTools, text, turn, maxTurns }: ShouldNudgeOptions): boolean {
+  return !noTools && !text.trimStart().startsWith("---") && turn < maxTurns - 1;
+}
+
+async function handleTextTurn(
+  text: string,
+  turn: number,
+  noTools: boolean,
+  maxTurns: number,
+  storageRoot: string,
+  sessionId: string,
+  messages: ChatMessage[],
+): Promise<{ done: boolean; finalText: string }> {
+  await appendTurn(storageRoot, sessionId, {
+    role: "assistant",
+    content: text,
+    toolCalls: null,
+    reasoning: null,
+  });
+
+  if (shouldNudge({ noTools, text, turn, maxTurns })) {
+    log("7FXQM2KN", "text-only turn without frontmatter, nudging LLM to continue");
+    const nudge =
+      "You stopped calling tools but your response does not start with the required `---` YAML frontmatter. " +
+      "Either continue using tools to complete your work, or output your final response starting with `---`.";
+    messages.push({ role: "user", content: nudge });
+    return { done: false, finalText: "" };
+  }
+
+  return { done: true, finalText: text };
+}
+
+async function handleToolTurn(
+  content: string,
+  toolCalls: LlmToolCall[],
+  toolCtx: ToolContext,
+  messages: ChatMessage[],
+  storageRoot: string,
+  sessionId: string,
+): Promise<number> {
+  await appendTurn(storageRoot, sessionId, {
+    role: "assistant",
+    content,
+    toolCalls: mapToolCallsForPayload(toolCalls),
+    reasoning: null,
+  });
+  return executeTurnTools(toolCalls, toolCtx, messages, storageRoot, sessionId);
+}
+
+export function extractFinalText(messages: ChatMessage[]): string {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (
+      msg !== undefined &&
+      msg.role === "assistant" &&
+      msg.content !== null &&
+      msg.content.trim() !== ""
+    ) {
+      return msg.content;
+    }
+  }
+  return "";
+}
+
+type LoopTurnResult = { done: boolean; finalText: string; extraTurns: number };
+
+async function runLoopTurn(
+  turn: number,
+  options: RunBuiltinLoopOptions,
+  messages: ChatMessage[],
+  openAiTools: ReturnType<typeof builtinToolsToOpenAi>,
+): Promise<LoopTurnResult> {
+  log("8K2M4N7P", `builtin loop turn ${turn + 1}/${options.maxTurns}`);
+  const response = await chatCompletionWithTools(
+    options.provider,
+    messages,
+    openAiTools.length > 0 ? openAiTools : null,
+  );
+
+  const effectiveToolCalls = options.noTools ? null : (response.toolCalls ?? null);
+  messages.push({ role: "assistant", content: response.content, tool_calls: effectiveToolCalls });
+
+  if (effectiveToolCalls === null || effectiveToolCalls.length === 0) {
+    const text = response.content ?? "";
+    const result = await handleTextTurn(
+      text,
+      turn,
+      options.noTools,
+      options.maxTurns,
+      options.storageRoot,
+      options.sessionId,
+      messages,
+    );
+    return { done: result.done, finalText: result.finalText, extraTurns: 0 };
+  }
+
+  const extra = await handleToolTurn(
+    response.content ?? "",
+    effectiveToolCalls,
+    options.toolCtx,
+    messages,
+    options.storageRoot,
+    options.sessionId,
+  );
+  return { done: false, finalText: "", extraTurns: extra };
+}
+
 /** Agent run loop: LLM ↔ tools until no tool_calls or maxTurns. */
 export async function runBuiltinLoop(
  options: RunBuiltinLoopOptions,
@@ -80,77 +194,16 @@ export async function runBuiltinLoop(
  let turnCount = 0;

  for (let turn = 0; turn < options.maxTurns; turn++) {
-    log("8K2M4N7P", `builtin loop turn ${turn + 1}/${options.maxTurns}`);
-    const response = await chatCompletionWithTools(
-      options.provider,
-      messages,
-      openAiTools.length > 0 ? openAiTools : null,
-    );
-
-    const assistantMessage: ChatMessage = {
-      role: "assistant",
-      content: response.content,
-      tool_calls: response.toolCalls,
-    };
-    messages.push(assistantMessage);
-
-    if (response.toolCalls === null || response.toolCalls.length === 0) {
-      const text = response.content ?? "";
-      await appendTurn(options.storageRoot, options.sessionId, {
-        role: "assistant",
-        content: text,
-        toolCalls: null,
-        reasoning: null,
-      });
-      turnCount += 1;
-
-      // If tools are available but LLM stopped calling them without producing
-      // frontmatter, nudge it to continue working or output frontmatter.
-      if (!options.noTools && !text.trimStart().startsWith("---") && turn < options.maxTurns - 1) {
-        log("7FXQM2KN", "text-only turn without frontmatter, nudging LLM to continue");
-        const nudge =
-          "You stopped calling tools but your response does not start with the required `---` YAML frontmatter. " +
-          "Either continue using tools to complete your work, or output your final response starting with `---`.";
-        messages.push({ role: "user", content: nudge });
-        continue;
-      }
-
-      finalText = text;
+    const result = await runLoopTurn(turn, options, messages, openAiTools);
+    turnCount += 1 + result.extraTurns;
+    if (result.done) {
+      finalText = result.finalText;
      break;
    }
-
-    // Assistant turn with tool calls
-    await appendTurn(options.storageRoot, options.sessionId, {
-      role: "assistant",
-      content: response.content ?? "",
-      toolCalls: mapToolCallsForPayload(response.toolCalls),
-      reasoning: null,
-    });
-    turnCount += 1;
-
-    // Execute tools
-    turnCount += await executeTurnTools(
-      response.toolCalls,
-      options.toolCtx,
-      messages,
-      options.storageRoot,
-      options.sessionId,
-    );
  }

  if (finalText === "" && messages.length > 0) {
-    for (let i = messages.length - 1; i >= 0; i--) {
-      const msg = messages[i];
-      if (
-        msg !== undefined &&
-        msg.role === "assistant" &&
-        msg.content !== null &&
-        msg.content.trim() !== ""
-      ) {
-        finalText = msg.content;
-        break;
-      }
-    }
+    finalText = extractFinalText(messages);
  }

  return { finalText, messages, turnCount };
@@ -70,6 +70,7 @@ export function buildBuiltinMessages(ctx: AgentContext): ChatMessage[] {
      "Your task is described in the user message below — do NOT use uwf or workflow CLI commands to discover your task. " +
      "When you are done, output your final response with the YAML frontmatter block as specified above. " +
      "Do NOT output the frontmatter until you have completed all necessary work. " +
+      "If you are running low on turns and cannot finish, output the frontmatter with `status: failed` and explain what remains in the body. " +
      "CRITICAL: Your final output MUST start with the `---` fence on the very first line — " +
      "no preamble text, no explanation before it. The parser requires `---` at position 0.",
  );
@@ -121,6 +121,11 @@ export function createAgent(options: AgentOptions): () => Promise<void> {

    let agentResult = await runWithMessage("agent run failed", () => options.run(ctx));

+    // Preserve the primary detail from the first run — it contains the full
+    // tool-call turn history.  Continuation retries only fix frontmatter
+    // formatting and their 1-turn detail is not meaningful.
+    const primaryDetailHash = agentResult.detailHash;
+
    // Try to extract frontmatter; retry via continue if it fails
    let outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);

@@ -147,7 +152,7 @@ export function createAgent(options: AgentOptions): () => Promise<void> {
    const stepHash = await persistStep({
      ctx,
      outputHash,
-      detailHash: agentResult.detailHash,
+      detailHash: primaryDetailHash,
      agentName: agentLabel(options.name),
    });
Author	SHA1	Message	Date
xingyue	842e479784	fix(#447 ): fix biome format in loop.test.ts	2026-05-23 22:53:50 +08:00
xingyue	f63c670cd9	fix(#447 ): format and sort imports in loop.test.ts	2026-05-23 22:52:58 +08:00
xingyue	64c5122453	fix(#447 ): reduce cognitive complexity in loop.ts by extracting helpers	2026-05-23 22:50:06 +08:00
xingyue	5b68359dfc	fix #447 : extract shouldNudge and export executeTurnTools from loop.ts, add tests	2026-05-23 22:45:09 +08:00
xingyue	c2ddfb8558	fix(builtin): deadline warning + graceful exit on turn limit - Inject user message when 3 turns remain, telling agent to wrap up - Prompt tells agent to use status:failed if it can't finish in time - Prevents wasting all turns without producing any frontmatter output - Remove stale test file from dogfood agent run	2026-05-23 22:44:42 +08:00
xingyue	603018caf2	fix(builtin): force-strip tool_calls when noTools is set copilot-api returns tool_calls even when tools field is omitted from the request (infers from message history). Now the loop explicitly nullifies tool_calls when noTools=true.	2026-05-23 22:35:20 +08:00
xiaomo	aff0ee6fea	Merge pull request 'fix(thread-read): remove ### Output section and deduplicate ### Prompt globally' (#442 ) from fix/440-thread-read-prompt-dedup into main	2026-05-23 14:15:40 +00:00
xiaomo	d37fa1393a	Merge pull request 'fix: preserve primary detail hash across frontmatter retries' (#443 ) from fix/439-detail-merge-and-acp into main	2026-05-23 14:14:53 +00:00
xiaoju	759c784267	fix: preserve primary detail hash across frontmatter retries When the agent's first run output fails frontmatter extraction, the retry loop (via options.continue) would replace agentResult entirely, causing the 1-turn continuation detail to overwrite the original multi-turn detail containing all tool-call history. Now we capture primaryDetailHash from the first run and always use it for the persisted StepNode, regardless of how many retries occur. Fixes #439	2026-05-23 14:02:51 +00:00