diff --git a/packages/workflow-agent-builtin/__tests__/loop.test.ts b/packages/workflow-agent-builtin/__tests__/loop.test.ts index c1ea654..832e84d 100644 --- a/packages/workflow-agent-builtin/__tests__/loop.test.ts +++ b/packages/workflow-agent-builtin/__tests__/loop.test.ts @@ -19,7 +19,14 @@ mock.module("../src/tools/index.js", () => ({ getBuiltinTools: () => [], })); -import { executeTurnTools, runBuiltinLoop, shouldNudge } from "../src/loop.js"; +import { + executeTurnTools, + extractFinalText, + runBuiltinLoop, + shouldInjectDeadlineWarning, + shouldNudge, + shouldProcessToolCalls, +} from "../src/loop.js"; const fakeProvider = {} as any; const fakeToolCtx = {} as any; @@ -154,3 +161,96 @@ describe("runBuiltinLoop integration", () => { expect(original.length).toBe(1); }); }); + +describe("shouldInjectDeadlineWarning", () => { + test("5.1 returns true when turn count reaches warning threshold and not yet warned", () => { + expect(shouldInjectDeadlineWarning(7, 10, false, false)).toBe(true); + }); + test("5.2 returns false when already warned", () => { + expect(shouldInjectDeadlineWarning(7, 10, true, false)).toBe(false); + }); + test("5.3 returns false when noTools is true", () => { + expect(shouldInjectDeadlineWarning(7, 10, false, true)).toBe(false); + }); + test("5.4 returns false when turns remaining > DEADLINE_WARNING_TURNS", () => { + expect(shouldInjectDeadlineWarning(5, 10, false, false)).toBe(false); + }); + test("5.5 returns true when exactly at warning threshold", () => { + expect(shouldInjectDeadlineWarning(7, 10, false, false)).toBe(true); + }); + test("5.6 returns false when turns remaining is 0", () => { + expect(shouldInjectDeadlineWarning(10, 10, false, false)).toBe(false); + }); +}); + +describe("shouldProcessToolCalls", () => { + test("6.1 returns true when toolCalls present and noTools=false", () => { + expect(shouldProcessToolCalls([{ id: "x", name: "read", arguments: "{}" }], false)).toBe(true); + }); + test("6.2 returns false when toolCalls is null", () => { + expect(shouldProcessToolCalls(null, false)).toBe(false); + }); + test("6.3 returns false when toolCalls is empty array", () => { + expect(shouldProcessToolCalls([], false)).toBe(false); + }); + test("6.4 returns false when noTools=true", () => { + expect(shouldProcessToolCalls([{ id: "x", name: "read", arguments: "{}" }], true)).toBe(false); + }); + test("6.5 returns true when multiple tool calls present", () => { + expect( + shouldProcessToolCalls( + [ + { id: "x1", name: "read", arguments: "{}" }, + { id: "x2", name: "write", arguments: "{}" }, + ], + false, + ), + ).toBe(true); + }); +}); + +describe("extractFinalText", () => { + test("7.1 returns last assistant message content", () => { + const messages = [ + { role: "system" as const, content: "sys", tool_calls: null }, + { role: "assistant" as const, content: "first", tool_calls: null }, + { role: "assistant" as const, content: "last", tool_calls: null }, + ]; + expect(extractFinalText(messages)).toBe("last"); + }); + test("7.2 returns empty string when no assistant messages", () => { + expect(extractFinalText([{ role: "system" as const, content: "sys", tool_calls: null }])).toBe( + "", + ); + }); + test("7.3 skips assistant messages with null content", () => { + const messages = [ + { role: "assistant" as const, content: "first", tool_calls: null }, + { + role: "assistant" as const, + content: null, + tool_calls: [{ id: "x", name: "t", arguments: "{}" }], + }, + { role: "assistant" as const, content: "second", tool_calls: null }, + ]; + expect(extractFinalText(messages)).toBe("second"); + }); + test("7.4 skips assistant messages with empty content", () => { + const messages = [ + { role: "assistant" as const, content: "first", tool_calls: null }, + { role: "assistant" as const, content: "", tool_calls: null }, + { role: "user" as const, content: "nudge", tool_calls: null }, + ]; + expect(extractFinalText(messages)).toBe("first"); + }); + test("7.5 handles empty messages array", () => { + expect(extractFinalText([])).toBe(""); + }); + test("7.6 handles messages with only user and system roles", () => { + const messages = [ + { role: "system" as const, content: "sys", tool_calls: null }, + { role: "user" as const, content: "query", tool_calls: null }, + ]; + expect(extractFinalText(messages)).toBe(""); + }); +}); diff --git a/packages/workflow-agent-builtin/src/loop.ts b/packages/workflow-agent-builtin/src/loop.ts index 3eef599..602e36c 100644 --- a/packages/workflow-agent-builtin/src/loop.ts +++ b/packages/workflow-agent-builtin/src/loop.ts @@ -1,7 +1,12 @@ import type { ResolvedLlmProvider } from "@uncaged/workflow-agent-kit"; import { createLogger } from "@uncaged/workflow-util"; -import { type ChatMessage, chatCompletionWithTools, type LlmToolCall } from "./llm/index.js"; +import { + type ChatMessage, + chatCompletionWithTools, + type LlmToolCall, + type OpenAiToolDefinition, +} from "./llm/index.js"; import { appendSessionTurn } from "./session.js"; import { builtinToolsToOpenAi, @@ -80,10 +85,184 @@ export type ShouldNudgeOptions = { const MAX_NUDGES = 3; const DEADLINE_WARNING_TURNS = 3; +export function shouldInjectDeadlineWarning( + turn: number, + maxTurns: number, + alreadyWarned: boolean, + noTools: boolean, +): boolean { + const turnsRemaining = maxTurns - turn; + return ( + !noTools && !alreadyWarned && turnsRemaining > 0 && turnsRemaining <= DEADLINE_WARNING_TURNS + ); +} + +export function shouldProcessToolCalls(toolCalls: LlmToolCall[] | null, noTools: boolean): boolean { + return !noTools && toolCalls !== null && toolCalls.length > 0; +} + +export function extractFinalText(messages: ChatMessage[]): string { + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if ( + msg !== undefined && + msg.role === "assistant" && + msg.content !== null && + msg.content.trim() !== "" + ) { + return msg.content; + } + } + return ""; +} + +function injectDeadlineWarning(messages: ChatMessage[], turnsRemaining: number): void { + log("4NRXW6KT", `${turnsRemaining} turns remaining, injecting deadline warning`); + messages.push({ + role: "user", + content: + `⚠️ You have ${turnsRemaining} turns remaining. ` + + "Wrap up your work and output the YAML frontmatter starting with `---`. " + + "If you cannot finish in time, output frontmatter with `status: failed` and describe what remains.", + }); +} + +type HandleTextOnlyTurnResult = { + shouldBreak: boolean; + finalText: string; + turnCount: number; + nudgeCount: number; + turnAdjustment: number; +}; + +async function handleTextOnlyTurn( + text: string, + messages: ChatMessage[], + storageRoot: string, + sessionId: string, + noTools: boolean, + turn: number, + maxTurns: number, + currentNudgeCount: number, +): Promise { + await appendTurn(storageRoot, sessionId, { + role: "assistant", + content: text, + toolCalls: null, + reasoning: null, + }); + const turnCount = 1; + let nudgeCount = currentNudgeCount; + let turnAdjustment = 0; + + if (shouldNudge({ noTools, text, turn, maxTurns })) { + nudgeCount += 1; + log("7FXQM2KN", `text-only turn without frontmatter, nudge ${nudgeCount}/${MAX_NUDGES}`); + const nudge = + "You stopped calling tools but your response does not start with the required `---` YAML frontmatter. " + + "Either continue using tools to complete your work, or output your final response starting with `---`."; + messages.push({ role: "user", content: nudge }); + // Nudge doesn't consume turn budget (up to MAX_NUDGES) + if (nudgeCount <= MAX_NUDGES) { + turnAdjustment = -1; + } + return { shouldBreak: false, finalText: "", turnCount, nudgeCount, turnAdjustment }; + } + + return { shouldBreak: true, finalText: text, turnCount, nudgeCount, turnAdjustment }; +} + +async function handleToolCallTurn( + content: string, + toolCalls: LlmToolCall[], + messages: ChatMessage[], + storageRoot: string, + sessionId: string, + toolCtx: ToolContext, +): Promise { + await appendTurn(storageRoot, sessionId, { + role: "assistant", + content, + toolCalls: mapToolCallsForPayload(toolCalls), + reasoning: null, + }); + let turnCount = 1; + + // Execute tools + turnCount += await executeTurnTools(toolCalls, toolCtx, messages, storageRoot, sessionId); + + return turnCount; +} + export function shouldNudge({ noTools, text, turn, maxTurns }: ShouldNudgeOptions): boolean { return !noTools && !text.trimStart().startsWith("---") && turn < maxTurns - 1; } +type ProcessLoopIterationResult = { + shouldBreak: boolean; + finalText: string; + turnCount: number; + nudgeCount: number; + turnAdjustment: number; +}; + +async function processLoopIteration( + options: RunBuiltinLoopOptions, + messages: ChatMessage[], + openAiTools: OpenAiToolDefinition[], + turn: number, + nudgeCount: number, +): Promise { + const response = await chatCompletionWithTools( + options.provider, + messages, + openAiTools.length > 0 ? openAiTools : null, + ); + + // When noTools is set, ignore any tool_calls the LLM might still return + const effectiveToolCalls = options.noTools ? null : (response.toolCalls ?? null); + + const assistantMessage: ChatMessage = { + role: "assistant", + content: response.content, + tool_calls: effectiveToolCalls, + }; + messages.push(assistantMessage); + + if (!shouldProcessToolCalls(effectiveToolCalls, options.noTools)) { + const text = response.content ?? ""; + const result = await handleTextOnlyTurn( + text, + messages, + options.storageRoot, + options.sessionId, + options.noTools, + turn, + options.maxTurns, + nudgeCount, + ); + return result; + } + + // At this point, effectiveToolCalls is guaranteed to be non-null and non-empty + const turnCount = await handleToolCallTurn( + response.content ?? "", + effectiveToolCalls as LlmToolCall[], + messages, + options.storageRoot, + options.sessionId, + options.toolCtx, + ); + + return { + shouldBreak: false, + finalText: "", + turnCount, + nudgeCount, + turnAdjustment: 0, + }; +} + /** Agent run loop: LLM ↔ tools until no tool_calls or maxTurns. */ export async function runBuiltinLoop( options: RunBuiltinLoopOptions, @@ -99,95 +278,25 @@ export async function runBuiltinLoop( log("8K2M4N7P", `builtin loop turn ${turn + 1}/${options.maxTurns}`); // Warn agent when approaching turn limit - const turnsRemaining = options.maxTurns - turn; - if (!options.noTools && !deadlineWarned && turnsRemaining <= DEADLINE_WARNING_TURNS) { + if (shouldInjectDeadlineWarning(turn, options.maxTurns, deadlineWarned, options.noTools)) { deadlineWarned = true; - log("4NRXW6KT", `${turnsRemaining} turns remaining, injecting deadline warning`); - messages.push({ - role: "user", - content: - `⚠️ You have ${turnsRemaining} turns remaining. ` + - "Wrap up your work and output the YAML frontmatter starting with `---`. " + - "If you cannot finish in time, output frontmatter with `status: failed` and describe what remains.", - }); + const turnsRemaining = options.maxTurns - turn; + injectDeadlineWarning(messages, turnsRemaining); } - const response = await chatCompletionWithTools( - options.provider, - messages, - openAiTools.length > 0 ? openAiTools : null, - ); + const result = await processLoopIteration(options, messages, openAiTools, turn, nudgeCount); + turnCount += result.turnCount; + nudgeCount = result.nudgeCount; + turn += result.turnAdjustment; - // When noTools is set, ignore any tool_calls the LLM might still return - const effectiveToolCalls = options.noTools ? null : (response.toolCalls ?? null); - - const assistantMessage: ChatMessage = { - role: "assistant", - content: response.content, - tool_calls: effectiveToolCalls, - }; - messages.push(assistantMessage); - - if (effectiveToolCalls === null || effectiveToolCalls.length === 0) { - const text = response.content ?? ""; - await appendTurn(options.storageRoot, options.sessionId, { - role: "assistant", - content: text, - toolCalls: null, - reasoning: null, - }); - turnCount += 1; - - if (shouldNudge({ noTools: options.noTools, text, turn, maxTurns: options.maxTurns })) { - nudgeCount += 1; - log("7FXQM2KN", `text-only turn without frontmatter, nudge ${nudgeCount}/${MAX_NUDGES}`); - const nudge = - "You stopped calling tools but your response does not start with the required `---` YAML frontmatter. " + - "Either continue using tools to complete your work, or output your final response starting with `---`."; - messages.push({ role: "user", content: nudge }); - // Nudge doesn't consume turn budget (up to MAX_NUDGES) - if (nudgeCount <= MAX_NUDGES) { - turn -= 1; - } - continue; - } - - finalText = text; + if (result.shouldBreak) { + finalText = result.finalText; break; } - - // Assistant turn with tool calls - await appendTurn(options.storageRoot, options.sessionId, { - role: "assistant", - content: response.content ?? "", - toolCalls: mapToolCallsForPayload(effectiveToolCalls), - reasoning: null, - }); - turnCount += 1; - - // Execute tools - turnCount += await executeTurnTools( - effectiveToolCalls, - options.toolCtx, - messages, - options.storageRoot, - options.sessionId, - ); } - if (finalText === "" && messages.length > 0) { - for (let i = messages.length - 1; i >= 0; i--) { - const msg = messages[i]; - if ( - msg !== undefined && - msg.role === "assistant" && - msg.content !== null && - msg.content.trim() !== "" - ) { - finalText = msg.content; - break; - } - } + if (finalText === "") { + finalText = extractFinalText(messages); } return { finalText, messages, turnCount };