refactor(agent-builtin): reduce cognitive complexity in loop.ts

Refactored runBuiltinLoop function to reduce cognitive complexity from 30 to below 15 by extracting helper functions: - shouldInjectDeadlineWarning: checks if deadline warning should be shown - shouldProcessToolCalls: determines if tool calls should be processed - extractFinalText: extracts last assistant message content - injectDeadlineWarning: injects deadline warning message - handleTextOnlyTurn: handles text-only turn logic - handleToolCallTurn: handles tool call turn logic - processLoopIteration: processes a single loop iteration Added 24 new unit tests for the extracted helper functions, bringing total test count to 41 (all passing). All existing behavior is preserved. Fixes #444 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Merge pull request 'feat(cli): thread step --background + thread running' (#457 ) from fix/456-thread-step-background into main
2026-05-24 05:53:55 +00:00 · 2026-05-24 05:33:56 +00:00 · 2026-05-24 05:28:29 +00:00 · 2026-05-24 04:31:44 +00:00 · 2026-05-24 04:17:29 +00:00 · 2026-05-24 03:44:08 +00:00
19 changed files with 1081 additions and 291 deletions
@@ -1,28 +0,0 @@
-name: CI
-
-on:
-  push:
-    branches: ['*']
-  pull_request:
-    branches: [main]
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Setup Bun
-        uses: oven-sh/setup-bun@v2
-
-      - name: Install dependencies
-        run: bun install
-
-      - name: Lint
-        run: bun run lint
-
-      - name: Type check
-        run: bun run typecheck
-
-      - name: Test
-        run: bun test
@@ -0,0 +1,147 @@
+import { mkdir, readdir, readFile, rename, rm, writeFile } from "node:fs/promises";
+import { join } from "node:path";
+import type { RunningThreadItem, ThreadId } from "@uncaged/workflow-protocol";
+
+import type { RunningMarker } from "./types.js";
+
+/**
+ * Get the path to the running markers directory.
+ */
+export function getRunningDir(storageRoot: string): string {
+  return join(storageRoot, "running");
+}
+
+/**
+ * Get the path to a specific thread's marker file.
+ */
+export function getMarkerPath(storageRoot: string, threadId: ThreadId): string {
+  return join(getRunningDir(storageRoot), `${threadId}.json`);
+}
+
+/**
+ * Check if a PID is still running.
+ * Returns true if the process exists, false otherwise.
+ */
+export function isPidAlive(pid: number): boolean {
+  try {
+    // process.kill with signal 0 checks existence without killing
+    process.kill(pid, 0);
+    return true;
+  } catch {
+    // ESRCH means process doesn't exist
+    return false;
+  }
+}
+
+/**
+ * Create a marker file for a running thread.
+ * Writes to a temp file in the same directory, then atomically renames.
+ */
+export async function createMarker(storageRoot: string, marker: RunningMarker): Promise<void> {
+  const runningDir = getRunningDir(storageRoot);
+  await mkdir(runningDir, { recursive: true });
+
+  const markerPath = getMarkerPath(storageRoot, marker.thread);
+  const tempPath = join(runningDir, `.${marker.thread}-${process.pid}.tmp`);
+
+  const content = JSON.stringify(marker, null, 2);
+  await writeFile(tempPath, content, "utf8");
+  await rename(tempPath, markerPath);
+}
+
+/**
+ * Delete a marker file for a thread.
+ */
+export async function deleteMarker(storageRoot: string, threadId: ThreadId): Promise<void> {
+  const markerPath = getMarkerPath(storageRoot, threadId);
+  try {
+    await rm(markerPath);
+  } catch {
+    // Ignore errors if file doesn't exist
+  }
+}
+
+/**
+ * Read a marker file. Returns null if file doesn't exist or is invalid.
+ */
+export async function readMarker(
+  storageRoot: string,
+  threadId: ThreadId,
+): Promise<RunningMarker | null> {
+  const markerPath = getMarkerPath(storageRoot, threadId);
+  try {
+    const content = await readFile(markerPath, "utf8");
+    const marker = JSON.parse(content) as RunningMarker;
+    return marker;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * List all running threads, filtering out stale markers.
+ */
+export async function listRunningThreads(storageRoot: string): Promise<RunningThreadItem[]> {
+  const runningDir = getRunningDir(storageRoot);
+
+  let files: string[];
+  try {
+    files = await readdir(runningDir);
+  } catch {
+    // Directory doesn't exist or can't be read
+    return [];
+  }
+
+  const results: RunningThreadItem[] = [];
+
+  for (const filename of files) {
+    if (!filename.endsWith(".json")) {
+      continue;
+    }
+
+    const threadId = filename.slice(0, -5) as ThreadId;
+    const marker = await readMarker(storageRoot, threadId);
+
+    if (marker === null) {
+      // Invalid marker file
+      continue;
+    }
+
+    if (!isPidAlive(marker.pid)) {
+      // Stale marker - process no longer exists
+      await deleteMarker(storageRoot, threadId);
+      continue;
+    }
+
+    results.push({
+      thread: marker.thread,
+      workflow: marker.workflow,
+      pid: marker.pid,
+      startedAt: marker.startedAt,
+    });
+  }
+
+  return results;
+}
+
+/**
+ * Check if a thread is currently executing in the background.
+ * Returns the marker if running, null otherwise.
+ */
+export async function isThreadRunning(
+  storageRoot: string,
+  threadId: ThreadId,
+): Promise<RunningMarker | null> {
+  const marker = await readMarker(storageRoot, threadId);
+  if (marker === null) {
+    return null;
+  }
+
+  if (!isPidAlive(marker.pid)) {
+    // Stale marker
+    await deleteMarker(storageRoot, threadId);
+    return null;
+  }
+
+  return marker;
+}
@@ -0,0 +1,11 @@
+export {
+  createMarker,
+  deleteMarker,
+  getMarkerPath,
+  getRunningDir,
+  isPidAlive,
+  isThreadRunning,
+  listRunningThreads,
+  readMarker,
+} from "./background.js";
+export type { RunningMarker } from "./types.js";
@@ -0,0 +1,9 @@
+import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
+
+/** Marker file stored at ~/.uncaged/workflow/running/<thread-id>.json */
+export type RunningMarker = {
+  thread: ThreadId;
+  workflow: CasRef;
+  pid: number;
+  startedAt: number;
+};
@@ -22,6 +22,7 @@ import {
  cmdThreadKill,
  cmdThreadList,
  cmdThreadRead,
+  cmdThreadRunning,
  cmdThreadShow,
  cmdThreadStart,
  cmdThreadStep,
@@ -114,19 +115,41 @@ thread
  .argument("<thread-id>", "Thread ULID")
  .option("--agent <cmd>", "Override agent command")
  .option("-c, --count <number>", "Number of steps to run (default: 1)")
-  .action((threadId: string, opts: { agent: string | undefined; count: string | undefined }) => {
-    const storageRoot = resolveStorageRoot();
-    runAction(async () => {
-      const agentOverride = opts.agent ?? null;
-      const count = opts.count !== undefined ? Number(opts.count) : 1;
-      const results = await cmdThreadStep(storageRoot, threadId, agentOverride, count);
-      if (results.length === 1) {
-        writeOutput(results[0]);
-      } else {
-        writeOutput(results);
-      }
-    });
-  });
+  .option("--background", "Run in background and return immediately")
+  .option("--_background-worker", "Internal flag for background worker process", false)
+  .action(
+    (
+      threadId: string,
+      opts: {
+        agent: string | undefined;
+        count: string | undefined;
+        background: boolean;
+        _backgroundWorker: boolean;
+      },
+    ) => {
+      const storageRoot = resolveStorageRoot();
+      runAction(async () => {
+        const agentOverride = opts.agent ?? null;
+        const count = opts.count !== undefined ? Number(opts.count) : 1;
+        const background = opts.background ?? false;
+        const backgroundWorker = opts._backgroundWorker ?? false;
+
+        const results = await cmdThreadStep(
+          storageRoot,
+          threadId,
+          agentOverride,
+          count,
+          background,
+          backgroundWorker,
+        );
+        if (results.length === 1) {
+          writeOutput(results[0]);
+        } else {
+          writeOutput(results);
+        }
+      });
+    },
+  );

 thread
  .command("show")
@@ -152,6 +175,17 @@ thread
    });
  });

+thread
+  .command("running")
+  .description("List threads currently executing in the background")
+  .action(() => {
+    const storageRoot = resolveStorageRoot();
+    runAction(async () => {
+      const result = await cmdThreadRunning(storageRoot);
+      writeOutput(result);
+    });
+  });
+
 thread
  .command("kill")
  .description("Terminate and archive a thread")
@@ -1,4 +1,4 @@
-import { execFileSync } from "node:child_process";
+import { execFileSync, spawn } from "node:child_process";
 import { access, readFile } from "node:fs/promises";
 import { dirname, isAbsolute, resolve as resolvePath } from "node:path";
 import type { Store as CasStore, JSONSchema } from "@uncaged/json-cas";
@@ -10,6 +10,7 @@ import type {
  AgentConfig,
  CasRef,
  ModeratorContext,
+  RunningThreadsOutput,
  StartEntry,
  StartNodePayload,
  StartOutput,
@@ -27,7 +28,12 @@ import type {
 import { createProcessLogger, generateUlid, type ProcessLogger } from "@uncaged/workflow-util";
 import { config as loadDotenv } from "dotenv";
 import { parse, stringify } from "yaml";
-
+import {
+  createMarker,
+  deleteMarker,
+  isThreadRunning,
+  listRunningThreads,
+} from "../background/index.js";
 import {
  appendThreadHistory,
  createUwfStore,
@@ -52,6 +58,7 @@ const PL_AGENT_SPAWN = "R5J2W8N4";
 const PL_AGENT_DONE = "C6P9E3H7";
 const PL_THREAD_ARCHIVED = "F4D8Q2K5";
 const PL_STEP_ERROR = "B8T5N1V6";
+const PL_BACKGROUND_START = "X7Q4W9M2";

 function failStep(plog: ProcessLogger, message: string): never {
  plog.log(PL_STEP_ERROR, message, null);
@@ -321,6 +328,7 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
      thread: threadId,
      head: activeHead,
      done: false,
+      background: null,
    };
  }

@@ -331,6 +339,7 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
      thread: threadId,
      head: hist.head,
      done: true,
+      background: null,
    };
  }

@@ -804,13 +813,11 @@ function spawnAgent(
  role: string,
  edgePrompt: string,
 ): CasRef {
-  const argv = [...agent.args, threadId, role];
-  const env = { ...process.env, UWF_EDGE_PROMPT: edgePrompt };
+  const argv = [...agent.args, "--thread", threadId, "--role", role, "--prompt", edgePrompt];
  let stdout: string;
  try {
    stdout = execFileSync(agent.command, argv, {
      encoding: "utf8",
-      env,
      stdio: ["ignore", "pipe", "pipe"],
      maxBuffer: 50 * 1024 * 1024, // 50 MB — stream-json output can be large
    });
@@ -855,26 +862,60 @@ export async function cmdThreadStep(
  threadId: ThreadId,
  agentOverride: string | null,
  count: number,
+  background: boolean,
+  backgroundWorker: boolean,
 ): Promise<StepOutput[]> {
  if (count < 1 || !Number.isInteger(count)) {
    fail(`--count must be a positive integer, got: ${count}`);
  }

+  // Check if thread is already running in background (unless we ARE the background worker)
+  if (!backgroundWorker) {
+    const runningMarker = await isThreadRunning(storageRoot, threadId);
+    if (runningMarker !== null) {
+      fail(`thread already executing in background (PID: ${runningMarker.pid})`);
+    }
+  }
+
  const workflowHash = await resolveActiveThreadWorkflowHash(storageRoot, threadId);
  const plog = createProcessLogger({
    storageRoot,
    context: { thread: threadId, workflow: workflowHash },
  });

-  const results: StepOutput[] = [];
-  for (let i = 0; i < count; i++) {
-    const result = await cmdThreadStepOnce(storageRoot, threadId, agentOverride, plog);
-    results.push(result);
-    if (result.done) {
-      break;
+  if (background && !backgroundWorker) {
+    // Spawn background process
+    return cmdThreadStepBackground(storageRoot, threadId, agentOverride, count, plog, workflowHash);
+  }
+
+  // If we're the background worker, create marker before execution
+  let markerCreated = false;
+  if (backgroundWorker) {
+    await createMarker(storageRoot, {
+      thread: threadId,
+      workflow: workflowHash,
+      pid: process.pid,
+      startedAt: Date.now(),
+    });
+    markerCreated = true;
+  }
+
+  try {
+    const results: StepOutput[] = [];
+    for (let i = 0; i < count; i++) {
+      const result = await cmdThreadStepOnce(storageRoot, threadId, agentOverride, plog);
+      results.push(result);
+      if (result.done) {
+        break;
+      }
+    }
+    return results;
+  } finally {
+    // Cleanup marker if we created one
+    if (markerCreated) {
+      await deleteMarker(storageRoot, threadId);
    }
  }
-  return results;
 }

 async function resolveActiveThreadWorkflowHash(
@@ -891,6 +932,57 @@ async function resolveActiveThreadWorkflowHash(
  return chain.start.workflow;
 }

+async function cmdThreadStepBackground(
+  storageRoot: string,
+  threadId: ThreadId,
+  agentOverride: string | null,
+  count: number,
+  plog: ProcessLogger,
+  workflowHash: CasRef,
+): Promise<StepOutput[]> {
+  // Get current head to return to caller
+  const index = await loadThreadsIndex(storageRoot);
+  const headHash = index[threadId];
+  if (headHash === undefined) {
+    failStep(plog, `thread not active: ${threadId}`);
+  }
+
+  // Spawn detached background process
+  const scriptPath = process.argv[1];
+  if (scriptPath === undefined) {
+    failStep(plog, "unable to determine script path for background execution");
+  }
+
+  const args = ["thread", "step", threadId, "--count", String(count)];
+
+  if (agentOverride !== null) {
+    args.push("--agent", agentOverride);
+  }
+
+  // Internal flag to signal the background worker to create/cleanup markers
+  args.push("--_background-worker");
+
+  plog.log(PL_BACKGROUND_START, `spawning background process count=${count}`, null);
+
+  const child = spawn(scriptPath, args, {
+    detached: true,
+    stdio: "ignore",
+  });
+
+  child.unref();
+
+  // Return immediately with current state and background flag
+  return [
+    {
+      workflow: workflowHash,
+      thread: threadId,
+      head: headHash,
+      done: false,
+      background: true,
+    },
+  ];
+}
+
 async function cmdThreadStepOnce(
  storageRoot: string,
  threadId: ThreadId,
@@ -928,6 +1020,7 @@ async function cmdThreadStepOnce(
      thread: threadId,
      head: headHash,
      done: true,
+      background: null,
    };
  }

@@ -975,6 +1068,7 @@ async function cmdThreadStepOnce(
    thread: threadId,
    head: newHead,
    done,
+    background: null,
  };
 }

@@ -1111,6 +1205,17 @@ export async function cmdThreadKill(storageRoot: string, threadId: ThreadId): Pr
    fail(`thread not active: ${threadId}`);
  }

+  // Check if thread is running in background and terminate it
+  const runningMarker = await isThreadRunning(storageRoot, threadId);
+  if (runningMarker !== null) {
+    try {
+      process.kill(runningMarker.pid, "SIGTERM");
+    } catch {
+      // Process may have already exited, ignore error
+    }
+    await deleteMarker(storageRoot, threadId);
+  }
+
  const uwf = await createUwfStore(storageRoot);
  const workflow = resolveWorkflowFromHead(uwf, head);
  if (workflow === null) {
@@ -1130,3 +1235,8 @@ export async function cmdThreadKill(storageRoot: string, threadId: ThreadId): Pr

  return { thread: threadId, archived: true };
 }
+
+export async function cmdThreadRunning(storageRoot: string): Promise<RunningThreadsOutput> {
+  const threads = await listRunningThreads(storageRoot);
+  return { threads };
+}
@@ -19,7 +19,14 @@ mock.module("../src/tools/index.js", () => ({
  getBuiltinTools: () => [],
 }));

-import { executeTurnTools, runBuiltinLoop, shouldNudge } from "../src/loop.js";
+import {
+  executeTurnTools,
+  extractFinalText,
+  runBuiltinLoop,
+  shouldInjectDeadlineWarning,
+  shouldNudge,
+  shouldProcessToolCalls,
+} from "../src/loop.js";

 const fakeProvider = {} as any;
 const fakeToolCtx = {} as any;
@@ -154,3 +161,96 @@ describe("runBuiltinLoop integration", () => {
    expect(original.length).toBe(1);
  });
 });
+
+describe("shouldInjectDeadlineWarning", () => {
+  test("5.1 returns true when turn count reaches warning threshold and not yet warned", () => {
+    expect(shouldInjectDeadlineWarning(7, 10, false, false)).toBe(true);
+  });
+  test("5.2 returns false when already warned", () => {
+    expect(shouldInjectDeadlineWarning(7, 10, true, false)).toBe(false);
+  });
+  test("5.3 returns false when noTools is true", () => {
+    expect(shouldInjectDeadlineWarning(7, 10, false, true)).toBe(false);
+  });
+  test("5.4 returns false when turns remaining > DEADLINE_WARNING_TURNS", () => {
+    expect(shouldInjectDeadlineWarning(5, 10, false, false)).toBe(false);
+  });
+  test("5.5 returns true when exactly at warning threshold", () => {
+    expect(shouldInjectDeadlineWarning(7, 10, false, false)).toBe(true);
+  });
+  test("5.6 returns false when turns remaining is 0", () => {
+    expect(shouldInjectDeadlineWarning(10, 10, false, false)).toBe(false);
+  });
+});
+
+describe("shouldProcessToolCalls", () => {
+  test("6.1 returns true when toolCalls present and noTools=false", () => {
+    expect(shouldProcessToolCalls([{ id: "x", name: "read", arguments: "{}" }], false)).toBe(true);
+  });
+  test("6.2 returns false when toolCalls is null", () => {
+    expect(shouldProcessToolCalls(null, false)).toBe(false);
+  });
+  test("6.3 returns false when toolCalls is empty array", () => {
+    expect(shouldProcessToolCalls([], false)).toBe(false);
+  });
+  test("6.4 returns false when noTools=true", () => {
+    expect(shouldProcessToolCalls([{ id: "x", name: "read", arguments: "{}" }], true)).toBe(false);
+  });
+  test("6.5 returns true when multiple tool calls present", () => {
+    expect(
+      shouldProcessToolCalls(
+        [
+          { id: "x1", name: "read", arguments: "{}" },
+          { id: "x2", name: "write", arguments: "{}" },
+        ],
+        false,
+      ),
+    ).toBe(true);
+  });
+});
+
+describe("extractFinalText", () => {
+  test("7.1 returns last assistant message content", () => {
+    const messages = [
+      { role: "system" as const, content: "sys", tool_calls: null },
+      { role: "assistant" as const, content: "first", tool_calls: null },
+      { role: "assistant" as const, content: "last", tool_calls: null },
+    ];
+    expect(extractFinalText(messages)).toBe("last");
+  });
+  test("7.2 returns empty string when no assistant messages", () => {
+    expect(extractFinalText([{ role: "system" as const, content: "sys", tool_calls: null }])).toBe(
+      "",
+    );
+  });
+  test("7.3 skips assistant messages with null content", () => {
+    const messages = [
+      { role: "assistant" as const, content: "first", tool_calls: null },
+      {
+        role: "assistant" as const,
+        content: null,
+        tool_calls: [{ id: "x", name: "t", arguments: "{}" }],
+      },
+      { role: "assistant" as const, content: "second", tool_calls: null },
+    ];
+    expect(extractFinalText(messages)).toBe("second");
+  });
+  test("7.4 skips assistant messages with empty content", () => {
+    const messages = [
+      { role: "assistant" as const, content: "first", tool_calls: null },
+      { role: "assistant" as const, content: "", tool_calls: null },
+      { role: "user" as const, content: "nudge", tool_calls: null },
+    ];
+    expect(extractFinalText(messages)).toBe("first");
+  });
+  test("7.5 handles empty messages array", () => {
+    expect(extractFinalText([])).toBe("");
+  });
+  test("7.6 handles messages with only user and system roles", () => {
+    const messages = [
+      { role: "system" as const, content: "sys", tool_calls: null },
+      { role: "user" as const, content: "query", tool_calls: null },
+    ];
+    expect(extractFinalText(messages)).toBe("");
+  });
+});
@@ -1,7 +1,12 @@
 import type { ResolvedLlmProvider } from "@uncaged/workflow-agent-kit";
 import { createLogger } from "@uncaged/workflow-util";

-import { type ChatMessage, chatCompletionWithTools, type LlmToolCall } from "./llm/index.js";
+import {
+  type ChatMessage,
+  chatCompletionWithTools,
+  type LlmToolCall,
+  type OpenAiToolDefinition,
+} from "./llm/index.js";
 import { appendSessionTurn } from "./session.js";
 import {
  builtinToolsToOpenAi,
@@ -80,10 +85,184 @@ export type ShouldNudgeOptions = {
 const MAX_NUDGES = 3;
 const DEADLINE_WARNING_TURNS = 3;

+export function shouldInjectDeadlineWarning(
+  turn: number,
+  maxTurns: number,
+  alreadyWarned: boolean,
+  noTools: boolean,
+): boolean {
+  const turnsRemaining = maxTurns - turn;
+  return (
+    !noTools && !alreadyWarned && turnsRemaining > 0 && turnsRemaining <= DEADLINE_WARNING_TURNS
+  );
+}
+
+export function shouldProcessToolCalls(toolCalls: LlmToolCall[] | null, noTools: boolean): boolean {
+  return !noTools && toolCalls !== null && toolCalls.length > 0;
+}
+
+export function extractFinalText(messages: ChatMessage[]): string {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (
+      msg !== undefined &&
+      msg.role === "assistant" &&
+      msg.content !== null &&
+      msg.content.trim() !== ""
+    ) {
+      return msg.content;
+    }
+  }
+  return "";
+}
+
+function injectDeadlineWarning(messages: ChatMessage[], turnsRemaining: number): void {
+  log("4NRXW6KT", `${turnsRemaining} turns remaining, injecting deadline warning`);
+  messages.push({
+    role: "user",
+    content:
+      `⚠️ You have ${turnsRemaining} turns remaining. ` +
+      "Wrap up your work and output the YAML frontmatter starting with `---`. " +
+      "If you cannot finish in time, output frontmatter with `status: failed` and describe what remains.",
+  });
+}
+
+type HandleTextOnlyTurnResult = {
+  shouldBreak: boolean;
+  finalText: string;
+  turnCount: number;
+  nudgeCount: number;
+  turnAdjustment: number;
+};
+
+async function handleTextOnlyTurn(
+  text: string,
+  messages: ChatMessage[],
+  storageRoot: string,
+  sessionId: string,
+  noTools: boolean,
+  turn: number,
+  maxTurns: number,
+  currentNudgeCount: number,
+): Promise<HandleTextOnlyTurnResult> {
+  await appendTurn(storageRoot, sessionId, {
+    role: "assistant",
+    content: text,
+    toolCalls: null,
+    reasoning: null,
+  });
+  const turnCount = 1;
+  let nudgeCount = currentNudgeCount;
+  let turnAdjustment = 0;
+
+  if (shouldNudge({ noTools, text, turn, maxTurns })) {
+    nudgeCount += 1;
+    log("7FXQM2KN", `text-only turn without frontmatter, nudge ${nudgeCount}/${MAX_NUDGES}`);
+    const nudge =
+      "You stopped calling tools but your response does not start with the required `---` YAML frontmatter. " +
+      "Either continue using tools to complete your work, or output your final response starting with `---`.";
+    messages.push({ role: "user", content: nudge });
+    // Nudge doesn't consume turn budget (up to MAX_NUDGES)
+    if (nudgeCount <= MAX_NUDGES) {
+      turnAdjustment = -1;
+    }
+    return { shouldBreak: false, finalText: "", turnCount, nudgeCount, turnAdjustment };
+  }
+
+  return { shouldBreak: true, finalText: text, turnCount, nudgeCount, turnAdjustment };
+}
+
+async function handleToolCallTurn(
+  content: string,
+  toolCalls: LlmToolCall[],
+  messages: ChatMessage[],
+  storageRoot: string,
+  sessionId: string,
+  toolCtx: ToolContext,
+): Promise<number> {
+  await appendTurn(storageRoot, sessionId, {
+    role: "assistant",
+    content,
+    toolCalls: mapToolCallsForPayload(toolCalls),
+    reasoning: null,
+  });
+  let turnCount = 1;
+
+  // Execute tools
+  turnCount += await executeTurnTools(toolCalls, toolCtx, messages, storageRoot, sessionId);
+
+  return turnCount;
+}
+
 export function shouldNudge({ noTools, text, turn, maxTurns }: ShouldNudgeOptions): boolean {
  return !noTools && !text.trimStart().startsWith("---") && turn < maxTurns - 1;
 }

+type ProcessLoopIterationResult = {
+  shouldBreak: boolean;
+  finalText: string;
+  turnCount: number;
+  nudgeCount: number;
+  turnAdjustment: number;
+};
+
+async function processLoopIteration(
+  options: RunBuiltinLoopOptions,
+  messages: ChatMessage[],
+  openAiTools: OpenAiToolDefinition[],
+  turn: number,
+  nudgeCount: number,
+): Promise<ProcessLoopIterationResult> {
+  const response = await chatCompletionWithTools(
+    options.provider,
+    messages,
+    openAiTools.length > 0 ? openAiTools : null,
+  );
+
+  // When noTools is set, ignore any tool_calls the LLM might still return
+  const effectiveToolCalls = options.noTools ? null : (response.toolCalls ?? null);
+
+  const assistantMessage: ChatMessage = {
+    role: "assistant",
+    content: response.content,
+    tool_calls: effectiveToolCalls,
+  };
+  messages.push(assistantMessage);
+
+  if (!shouldProcessToolCalls(effectiveToolCalls, options.noTools)) {
+    const text = response.content ?? "";
+    const result = await handleTextOnlyTurn(
+      text,
+      messages,
+      options.storageRoot,
+      options.sessionId,
+      options.noTools,
+      turn,
+      options.maxTurns,
+      nudgeCount,
+    );
+    return result;
+  }
+
+  // At this point, effectiveToolCalls is guaranteed to be non-null and non-empty
+  const turnCount = await handleToolCallTurn(
+    response.content ?? "",
+    effectiveToolCalls as LlmToolCall[],
+    messages,
+    options.storageRoot,
+    options.sessionId,
+    options.toolCtx,
+  );
+
+  return {
+    shouldBreak: false,
+    finalText: "",
+    turnCount,
+    nudgeCount,
+    turnAdjustment: 0,
+  };
+}
+
 /** Agent run loop: LLM ↔ tools until no tool_calls or maxTurns. */
 export async function runBuiltinLoop(
  options: RunBuiltinLoopOptions,
@@ -99,95 +278,25 @@ export async function runBuiltinLoop(
    log("8K2M4N7P", `builtin loop turn ${turn + 1}/${options.maxTurns}`);

    // Warn agent when approaching turn limit
-    const turnsRemaining = options.maxTurns - turn;
-    if (!options.noTools && !deadlineWarned && turnsRemaining <= DEADLINE_WARNING_TURNS) {
+    if (shouldInjectDeadlineWarning(turn, options.maxTurns, deadlineWarned, options.noTools)) {
      deadlineWarned = true;
-      log("4NRXW6KT", `${turnsRemaining} turns remaining, injecting deadline warning`);
-      messages.push({
-        role: "user",
-        content:
-          `⚠️ You have ${turnsRemaining} turns remaining. ` +
-          "Wrap up your work and output the YAML frontmatter starting with `---`. " +
-          "If you cannot finish in time, output frontmatter with `status: failed` and describe what remains.",
-      });
+      const turnsRemaining = options.maxTurns - turn;
+      injectDeadlineWarning(messages, turnsRemaining);
    }

-    const response = await chatCompletionWithTools(
-      options.provider,
-      messages,
-      openAiTools.length > 0 ? openAiTools : null,
-    );
+    const result = await processLoopIteration(options, messages, openAiTools, turn, nudgeCount);
+    turnCount += result.turnCount;
+    nudgeCount = result.nudgeCount;
+    turn += result.turnAdjustment;

-    // When noTools is set, ignore any tool_calls the LLM might still return
-    const effectiveToolCalls = options.noTools ? null : (response.toolCalls ?? null);
-
-    const assistantMessage: ChatMessage = {
-      role: "assistant",
-      content: response.content,
-      tool_calls: effectiveToolCalls,
-    };
-    messages.push(assistantMessage);
-
-    if (effectiveToolCalls === null || effectiveToolCalls.length === 0) {
-      const text = response.content ?? "";
-      await appendTurn(options.storageRoot, options.sessionId, {
-        role: "assistant",
-        content: text,
-        toolCalls: null,
-        reasoning: null,
-      });
-      turnCount += 1;
-
-      if (shouldNudge({ noTools: options.noTools, text, turn, maxTurns: options.maxTurns })) {
-        nudgeCount += 1;
-        log("7FXQM2KN", `text-only turn without frontmatter, nudge ${nudgeCount}/${MAX_NUDGES}`);
-        const nudge =
-          "You stopped calling tools but your response does not start with the required `---` YAML frontmatter. " +
-          "Either continue using tools to complete your work, or output your final response starting with `---`.";
-        messages.push({ role: "user", content: nudge });
-        // Nudge doesn't consume turn budget (up to MAX_NUDGES)
-        if (nudgeCount <= MAX_NUDGES) {
-          turn -= 1;
-        }
-        continue;
-      }
-
-      finalText = text;
+    if (result.shouldBreak) {
+      finalText = result.finalText;
      break;
    }
-
-    // Assistant turn with tool calls
-    await appendTurn(options.storageRoot, options.sessionId, {
-      role: "assistant",
-      content: response.content ?? "",
-      toolCalls: mapToolCallsForPayload(effectiveToolCalls),
-      reasoning: null,
-    });
-    turnCount += 1;
-
-    // Execute tools
-    turnCount += await executeTurnTools(
-      effectiveToolCalls,
-      options.toolCtx,
-      messages,
-      options.storageRoot,
-      options.sessionId,
-    );
  }

-  if (finalText === "" && messages.length > 0) {
-    for (let i = messages.length - 1; i >= 0; i--) {
-      const msg = messages[i];
-      if (
-        msg !== undefined &&
-        msg.role === "assistant" &&
-        msg.content !== null &&
-        msg.content.trim() !== ""
-      ) {
-        finalText = msg.content;
-        break;
-      }
-    }
+  if (finalText === "") {
+    finalText = extractFinalText(messages);
  }

  return { finalText, messages, turnCount };
@@ -154,6 +154,99 @@ describe("parseClaudeCodeStreamOutput", () => {
  });
 });

+describe("parseClaudeCodeStreamOutput — helper extraction", () => {
+  test("processSystemLine sets model from system message", () => {
+    const lines = [
+      JSON.stringify({ type: "system", model: "claude-opus-4" }),
+      JSON.stringify({
+        type: "result",
+        subtype: "success",
+        result: "ok",
+        session_id: "s1",
+        num_turns: 0,
+        total_cost_usd: 0,
+        duration_ms: 0,
+        stop_reason: "end_turn",
+      }),
+    ];
+    const parsed = parseClaudeCodeStreamOutput(lines.join("\n"));
+    expect(parsed).not.toBeNull();
+    expect(parsed!.model).toBe("claude-opus-4");
+  });
+
+  test("processAssistantLine skips empty content", () => {
+    const lines = [
+      JSON.stringify({ type: "assistant", message: { role: "assistant", content: [] } }),
+      JSON.stringify({
+        type: "result",
+        subtype: "success",
+        result: "ok",
+        session_id: "s1",
+        num_turns: 0,
+        total_cost_usd: 0,
+        duration_ms: 0,
+        stop_reason: "end_turn",
+      }),
+    ];
+    const parsed = parseClaudeCodeStreamOutput(lines.join("\n"));
+    expect(parsed).not.toBeNull();
+    expect(parsed!.turns).toHaveLength(0);
+  });
+
+  test("processUserLine skips when no tool_result items", () => {
+    const lines = [
+      JSON.stringify({
+        type: "user",
+        message: { role: "user", content: [{ type: "text", text: "hi" }] },
+      }),
+      JSON.stringify({
+        type: "result",
+        subtype: "success",
+        result: "ok",
+        session_id: "s1",
+        num_turns: 0,
+        total_cost_usd: 0,
+        duration_ms: 0,
+        stop_reason: "end_turn",
+      }),
+    ];
+    const parsed = parseClaudeCodeStreamOutput(lines.join("\n"));
+    expect(parsed).not.toBeNull();
+    expect(parsed!.turns).toHaveLength(0);
+  });
+
+  test("turn indices are sequential across mixed assistant and user lines", () => {
+    const lines = [
+      JSON.stringify({
+        type: "assistant",
+        message: { role: "assistant", content: [{ type: "text", text: "A" }] },
+      }),
+      JSON.stringify({
+        type: "user",
+        message: { role: "user", content: [{ type: "tool_result", content: "R" }] },
+      }),
+      JSON.stringify({
+        type: "assistant",
+        message: { role: "assistant", content: [{ type: "text", text: "B" }] },
+      }),
+      JSON.stringify({
+        type: "result",
+        subtype: "success",
+        result: "ok",
+        session_id: "s1",
+        num_turns: 3,
+        total_cost_usd: 0,
+        duration_ms: 0,
+        stop_reason: "end_turn",
+      }),
+    ];
+    const parsed = parseClaudeCodeStreamOutput(lines.join("\n"));
+    expect(parsed).not.toBeNull();
+    expect(parsed!.turns).toHaveLength(3);
+    expect(parsed!.turns.map((t) => t.index)).toEqual([0, 1, 2]);
+  });
+});
+
 describe("storeClaudeCodeDetail", () => {
  const baseParsed: ClaudeCodeParsedResult = {
    type: "result",
@@ -34,7 +34,7 @@ export const CLAUDE_CODE_DETAIL_SCHEMA: JSONSchema = {
    },
    turns: {
      type: "array",
-      items: { type: "string" },
+      items: { type: "string", format: "cas_ref" },
    },
  },
  additionalProperties: false,
@@ -67,101 +67,105 @@ function extractToolResultContent(content: unknown[]): string {
  return results.join("\n");
 }

-/**
- * Parse Claude Code stream-json (NDJSON) output.
- * Each line is a JSON object with type: "system" | "assistant" | "user" | "result".
- */
-export function parseClaudeCodeStreamOutput(stdout: string): ClaudeCodeParsedResult | null {
-  const lines = stdout.trim().split("\n");
-  const turns: ClaudeCodeTurnPayload[] = [];
-  let resultLine: Record<string, unknown> | null = null;
-  let model = "";
-  let turnIndex = 0;
+type ParseState = {
+  turns: ClaudeCodeTurnPayload[];
+  resultLine: Record<string, unknown> | null;
+  model: string;
+  turnIndex: number;
+};

-  for (const line of lines) {
-    let parsed: unknown;
-    try {
-      parsed = JSON.parse(line);
-    } catch {
-      continue;
-    }
-    if (!isRecord(parsed)) continue;
-
-    const type = parsed.type;
-
-    if (type === "system" && typeof parsed.model === "string") {
-      model = parsed.model;
-    }
-
-    if (type === "assistant" && isRecord(parsed.message)) {
-      const msg = parsed.message;
-      const content = Array.isArray(msg.content) ? msg.content : [];
-      const textContent = extractTextContent(content as unknown[]);
-      const toolCalls = extractToolCalls(content as unknown[]);
-
-      // Only record turns that have actual content
-      if (textContent !== "" || toolCalls.length > 0) {
-        turns.push({
-          index: turnIndex++,
-          role: "assistant",
-          content: textContent,
-          toolCalls: toolCalls.length > 0 ? toolCalls : null,
-        });
-      }
-    }
-
-    if (type === "user" && isRecord(parsed.message)) {
-      const msg = parsed.message;
-      const content = Array.isArray(msg.content) ? msg.content : [];
-      const resultContent = extractToolResultContent(content as unknown[]);
-
-      if (resultContent !== "") {
-        turns.push({
-          index: turnIndex++,
-          role: "tool_result",
-          content: resultContent,
-          toolCalls: null,
-        });
-      }
-    }
-
-    if (type === "result") {
-      resultLine = parsed;
-    }
+function processSystemLine(parsed: Record<string, unknown>, state: ParseState): void {
+  if (typeof parsed.model === "string") {
+    state.model = parsed.model;
  }
+}

-  if (resultLine === null) return null;
+function processAssistantLine(parsed: Record<string, unknown>, state: ParseState): void {
+  if (!isRecord(parsed.message)) return;
+  const content = Array.isArray(parsed.message.content) ? parsed.message.content : [];
+  const textContent = extractTextContent(content as unknown[]);
+  const toolCalls = extractToolCalls(content as unknown[]);
+  if (textContent !== "" || toolCalls.length > 0) {
+    state.turns.push({
+      index: state.turnIndex++,
+      role: "assistant",
+      content: textContent,
+      toolCalls: toolCalls.length > 0 ? toolCalls : null,
+    });
+  }
+}

-  const sessionId = resultLine.session_id;
-  const result = resultLine.result;
-  const subtype = resultLine.subtype;
+function processUserLine(parsed: Record<string, unknown>, state: ParseState): void {
+  if (!isRecord(parsed.message)) return;
+  const content = Array.isArray(parsed.message.content) ? parsed.message.content : [];
+  const resultContent = extractToolResultContent(content as unknown[]);
+  if (resultContent !== "") {
+    state.turns.push({
+      index: state.turnIndex++,
+      role: "tool_result",
+      content: resultContent,
+      toolCalls: null,
+    });
+  }
+}

+function processLine(line: string, state: ParseState): void {
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(line);
+  } catch {
+    return;
+  }
+  if (!isRecord(parsed)) return;
+  const type = parsed.type;
+  if (type === "system") processSystemLine(parsed, state);
+  else if (type === "assistant") processAssistantLine(parsed, state);
+  else if (type === "user") processUserLine(parsed, state);
+  else if (type === "result") state.resultLine = parsed;
+}
+
+function assembleResult(state: ParseState): ClaudeCodeParsedResult | null {
+  if (state.resultLine === null) return null;
+  const sessionId = state.resultLine.session_id;
+  const result = state.resultLine.result;
+  const subtype = state.resultLine.subtype;
  if (typeof sessionId !== "string" || typeof result !== "string" || typeof subtype !== "string") {
    return null;
  }
-
-  const usage = isRecord(resultLine.usage) ? resultLine.usage : {};
-
+  const usage = isRecord(state.resultLine.usage) ? state.resultLine.usage : {};
  return {
-    type: safeString(resultLine.type, "result"),
+    type: safeString(state.resultLine.type, "result"),
    subtype: subtype as ClaudeCodeParsedResult["subtype"],
    result,
    sessionId,
-    numTurns: safeNumber(resultLine.num_turns),
-    totalCostUsd: safeNumber(resultLine.total_cost_usd),
-    durationMs: safeNumber(resultLine.duration_ms),
-    model,
-    stopReason: safeString(resultLine.stop_reason),
+    numTurns: safeNumber(state.resultLine.num_turns),
+    totalCostUsd: safeNumber(state.resultLine.total_cost_usd),
+    durationMs: safeNumber(state.resultLine.duration_ms),
+    model: state.model,
+    stopReason: safeString(state.resultLine.stop_reason),
    usage: {
      inputTokens: safeNumber(usage.input_tokens),
      outputTokens: safeNumber(usage.output_tokens),
      cacheReadInputTokens: safeNumber(usage.cache_read_input_tokens),
      cacheCreationInputTokens: safeNumber(usage.cache_creation_input_tokens),
    },
-    turns,
+    turns: state.turns,
  };
 }

+/**
+ * Parse Claude Code stream-json (NDJSON) output.
+ * Each line is a JSON object with type: "system" | "assistant" | "user" | "result".
+ */
+export function parseClaudeCodeStreamOutput(stdout: string): ClaudeCodeParsedResult | null {
+  const lines = stdout.trim().split("\n");
+  const state: ParseState = { turns: [], resultLine: null, model: "", turnIndex: 0 };
+  for (const line of lines) {
+    processLine(line, state);
+  }
+  return assembleResult(state);
+}
+
 /**
 * Legacy: parse Claude Code plain JSON output (non-streaming).
 * Falls back when stream-json is not available.
@@ -4,6 +4,96 @@ import { HermesAcpClient } from "../src/acp-client.js";

 const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;

+describe("handleSessionUpdate — helper extraction", () => {
+  let client: HermesAcpClient;
+
+  beforeEach(() => {
+    client = new HermesAcpClient();
+  });
+
+  afterEach(async () => {
+    await client.close();
+  });
+
+  it("agent_message_chunk accumulates text in messageChunks", () => {
+    (client as any).handleSessionUpdate({
+      sessionUpdate: "agent_message_chunk",
+      content: { type: "text", text: "hello" },
+    });
+    (client as any).handleSessionUpdate({
+      sessionUpdate: "agent_message_chunk",
+      content: { type: "text", text: " world" },
+    });
+    expect((client as any).messageChunks).toEqual(["hello", " world"]);
+  });
+
+  it("agent_thought_chunk accumulates reasoning in reasoningChunks", () => {
+    (client as any).handleSessionUpdate({
+      sessionUpdate: "agent_thought_chunk",
+      content: { type: "text", text: "thinking" },
+    });
+    expect((client as any).reasoningChunks).toEqual(["thinking"]);
+  });
+
+  it("tool_call registers a pending tool and flushes message chunks", () => {
+    (client as any).messageChunks = ["pre-tool text"];
+    (client as any).handleSessionUpdate({
+      sessionUpdate: "tool_call",
+      title: "Bash",
+      rawInput: { command: "ls" },
+      toolCallId: "tc-1",
+    });
+    expect((client as any).pendingTools.get("tc-1")).toEqual({
+      name: "Bash",
+      args: JSON.stringify({ command: "ls" }),
+    });
+    expect((client as any).messageChunks).toEqual([]);
+    expect((client as any).messages).toHaveLength(1);
+    expect((client as any).messages[0].role).toBe("assistant");
+  });
+
+  it("tool_call_update completed pushes tool_call and tool messages", () => {
+    (client as any).pendingTools.set("tc-2", { name: "Read", args: '{"path":"/foo"}' });
+    (client as any).handleSessionUpdate({
+      sessionUpdate: "tool_call_update",
+      status: "completed",
+      toolCallId: "tc-2",
+      rawOutput: "file contents",
+    });
+    const msgs = (client as any).messages as Array<{
+      role: string;
+      tool_calls: unknown;
+      content: string | null;
+    }>;
+    expect(msgs).toHaveLength(2);
+    expect(msgs[0].role).toBe("assistant");
+    expect(msgs[0].tool_calls).toEqual([
+      { function: { name: "Read", arguments: '{"path":"/foo"}' } },
+    ]);
+    expect(msgs[1].role).toBe("tool");
+    expect(msgs[1].content).toBe("file contents");
+    expect((client as any).pendingTools.has("tc-2")).toBe(false);
+  });
+
+  it("tool_call_update with non-string rawOutput JSON-stringifies it", () => {
+    (client as any).pendingTools.set("tc-3", { name: "Fetch", args: "" });
+    (client as any).handleSessionUpdate({
+      sessionUpdate: "tool_call_update",
+      status: "completed",
+      toolCallId: "tc-3",
+      rawOutput: { html: "<p>page</p>" },
+    });
+    const msgs = (client as any).messages as Array<{ role: string; content: string | null }>;
+    expect(msgs[1].content).toBe(JSON.stringify({ html: "<p>page</p>" }));
+  });
+
+  it("unknown updateType is a no-op", () => {
+    (client as any).handleSessionUpdate({ sessionUpdate: "unknown_type", data: {} });
+    expect((client as any).messages).toHaveLength(0);
+    expect((client as any).messageChunks).toHaveLength(0);
+  });
+});
+
 describe("HermesAcpClient", () => {
  let client: HermesAcpClient;

@@ -245,72 +245,75 @@ export class HermesAcpClient {
  // ---- Session update → structured messages ----

  private handleSessionUpdate(update: Record<string, unknown>): void {
-    const updateType = update.sessionUpdate as string;
-
-    switch (updateType) {
-      case "agent_message_chunk": {
-        const content = update.content as { type?: string; text?: string } | undefined;
-        if (content?.type === "text" && typeof content.text === "string") {
-          this.messageChunks.push(content.text);
-        }
+    switch (update.sessionUpdate as string) {
+      case "agent_message_chunk":
+        this.handleAgentMessageChunk(update);
        break;
-      }
-
-      case "agent_thought_chunk": {
-        const content = update.content as { type?: string; text?: string } | undefined;
-        if (content?.type === "text" && typeof content.text === "string") {
-          this.reasoningChunks.push(content.text);
-        }
+      case "agent_thought_chunk":
+        this.handleAgentThoughtChunk(update);
        break;
-      }
-
-      case "tool_call": {
-        const title = (update.title as string) ?? "";
-        const rawInput = update.rawInput;
-        const args = rawInput !== undefined && rawInput !== null ? JSON.stringify(rawInput) : "";
-        const toolCallId = update.toolCallId as string;
-        this.pendingTools.set(toolCallId, { name: title, args });
-
-        // Flush accumulated assistant text before tool call
-        this.flushAssistantMessage();
+      case "tool_call":
+        this.handleToolCall(update);
        break;
-      }
-
-      case "tool_call_update": {
-        const status = update.status as string | undefined;
-        if (status === "completed" || status === "failed") {
-          const toolCallId = update.toolCallId as string;
-          const pending = this.pendingTools.get(toolCallId);
-          const toolName = pending?.name ?? toolCallId;
-          const rawOutput = update.rawOutput;
-          const outputStr =
-            rawOutput !== undefined && rawOutput !== null
-              ? typeof rawOutput === "string"
-                ? rawOutput
-                : JSON.stringify(rawOutput)
-              : "";
-          this.messages.push({
-            role: "assistant",
-            content: null,
-            reasoning: null,
-            tool_calls: [{ function: { name: toolName, arguments: pending?.args ?? "" } }],
-          });
-          this.messages.push({
-            role: "tool",
-            content: outputStr,
-            reasoning: null,
-            tool_calls: null,
-          });
-          this.pendingTools.delete(toolCallId);
-        }
+      case "tool_call_update":
+        this.handleToolCallUpdate(update);
        break;
-      }
-
      default:
        break;
    }
  }

+  private handleAgentMessageChunk(update: Record<string, unknown>): void {
+    const content = update.content as { type?: string; text?: string } | undefined;
+    if (content?.type === "text" && typeof content.text === "string") {
+      this.messageChunks.push(content.text);
+    }
+  }
+
+  private handleAgentThoughtChunk(update: Record<string, unknown>): void {
+    const content = update.content as { type?: string; text?: string } | undefined;
+    if (content?.type === "text" && typeof content.text === "string") {
+      this.reasoningChunks.push(content.text);
+    }
+  }
+
+  private handleToolCall(update: Record<string, unknown>): void {
+    const title = (update.title as string) ?? "";
+    const rawInput = update.rawInput;
+    const args = rawInput !== undefined && rawInput !== null ? JSON.stringify(rawInput) : "";
+    const toolCallId = update.toolCallId as string;
+    this.pendingTools.set(toolCallId, { name: title, args });
+    this.flushAssistantMessage();
+  }
+
+  private handleToolCallUpdate(update: Record<string, unknown>): void {
+    const status = update.status as string | undefined;
+    if (status !== "completed" && status !== "failed") return;
+    const toolCallId = update.toolCallId as string;
+    const pending = this.pendingTools.get(toolCallId);
+    const toolName = pending?.name ?? toolCallId;
+    const rawOutput = update.rawOutput;
+    const outputStr =
+      rawOutput !== undefined && rawOutput !== null
+        ? typeof rawOutput === "string"
+          ? rawOutput
+          : JSON.stringify(rawOutput)
+        : "";
+    this.messages.push({
+      role: "assistant",
+      content: null,
+      reasoning: null,
+      tool_calls: [{ function: { name: toolName, arguments: pending?.args ?? "" } }],
+    });
+    this.messages.push({
+      role: "tool",
+      content: outputStr,
+      reasoning: null,
+      tool_calls: null,
+    });
+    this.pendingTools.delete(toolCallId);
+  }
+
  /** Flush any accumulated text/reasoning into an assistant message. */
  private flushAssistantMessage(): void {
    const text = this.messageChunks.join("");
@@ -21,14 +21,6 @@ function fail(message: string): never {
  throw new Error(message);
 }

-function readEdgePrompt(): string {
-  const value = process.env.UWF_EDGE_PROMPT;
-  if (value === undefined || value === "") {
-    fail("UWF_EDGE_PROMPT environment variable is required");
-  }
-  return value;
-}
-
 function walkChain(store: Store, schemas: AgentStore["schemas"], headHash: CasRef): ChainState {
  const headNode = store.get(headHash);
  if (headNode === null) {
@@ -123,7 +115,11 @@ async function loadWorkflow(store: Store, schemas: AgentStore["schemas"], workfl
 * Build agent execution context from thread head in threads.yaml.
 * Walks the CAS chain from head to StartNode and expands step outputs.
 */
-export async function buildContext(threadId: ThreadId, role: string): Promise<AgentContext> {
+export async function buildContext(
+  threadId: ThreadId,
+  role: string,
+  edgePrompt: string,
+): Promise<AgentContext> {
  const storageRoot = resolveStorageRoot();
  const agentStore = await createAgentStore(storageRoot);
  const { store, schemas } = agentStore;
@@ -142,7 +138,6 @@ export async function buildContext(threadId: ThreadId, role: string): Promise<Ag
  }

  const steps = await buildHistory(store, chain.stepsNewestFirst);
-  const edgePrompt = readEdgePrompt();
  const isFirstVisit = !steps.some((s) => s.role === role);

  return {
@@ -172,6 +167,7 @@ export type BuildContextMeta = {
 export async function buildContextWithMeta(
  threadId: ThreadId,
  role: string,
+  edgePrompt: string,
 ): Promise<AgentContext & { meta: BuildContextMeta }> {
  const storageRoot = resolveStorageRoot();
  const agentStore = await createAgentStore(storageRoot);
@@ -191,7 +187,6 @@ export async function buildContextWithMeta(
  }

  const steps = await buildHistory(store, chain.stepsNewestFirst);
-  const edgePrompt = readEdgePrompt();
  const isFirstVisit = !steps.some((s) => s.role === role);

  return {
@@ -22,16 +22,24 @@ function agentLabel(name: string): string {
  return `uwf-${name}`;
 }

-function parseArgv(argv: string[]): { threadId: ThreadId; role: string } {
-  const threadId = argv[2];
-  const role = argv[3];
-  if (threadId === undefined || threadId === "") {
-    fail("usage: <agent-cli> <thread-id> <role>");
+const USAGE = "usage: <agent-cli> --thread <id> --role <role> --prompt <text>";
+
+function getNamedArg(argv: string[], name: string): string {
+  const idx = argv.indexOf(name);
+  if (idx === -1 || idx + 1 >= argv.length) {
+    return "";
  }
-  if (role === undefined || role === "") {
-    fail("usage: <agent-cli> <thread-id> <role>");
-  }
-  return { threadId: threadId as ThreadId, role };
+  return argv[idx + 1];
+}
+
+function parseArgv(argv: string[]): { threadId: ThreadId; role: string; prompt: string } {
+  const threadId = getNamedArg(argv, "--thread");
+  const role = getNamedArg(argv, "--role");
+  const prompt = getNamedArg(argv, "--prompt");
+  if (threadId === "") fail(USAGE);
+  if (role === "") fail(USAGE);
+  if (prompt === "") fail(USAGE);
+  return { threadId: threadId as ThreadId, role, prompt };
 }

 function runWithMessage<T>(label: string, fn: () => Promise<T>): Promise<T> {
@@ -103,11 +111,11 @@ async function persistStep(options: {

 export function createAgent(options: AgentOptions): () => Promise<void> {
  return async function main(): Promise<void> {
-    const { threadId, role } = parseArgv(process.argv);
+    const { threadId, role, prompt } = parseArgv(process.argv);
    const storageRoot = resolveStorageRoot();
    loadDotenv({ path: getEnvPath(storageRoot) });

-    const ctx = await runWithMessage("context", () => buildContextWithMeta(threadId, role));
+    const ctx = await runWithMessage("context", () => buildContextWithMeta(threadId, role, prompt));

    const roleDef = ctx.workflow.roles[role];
    if (roleDef === undefined) {
@@ -13,7 +13,7 @@ export type AgentContext = ModeratorContext & {
   */
  outputFormatInstruction: string;
  /**
-   * Edge prompt from the graph transition that led to this role (UWF_EDGE_PROMPT).
+   * Edge prompt from the graph transition that led to this role (--prompt CLI arg).
   * Always the real moderator instruction for this step.
   */
  edgePrompt: string;
@@ -15,6 +15,8 @@ export type {
  ProviderConfig,
  RoleDefinition,
  RoleName,
+  RunningThreadItem,
+  RunningThreadsOutput,
  Scenario,
  StartEntry,
  StartNodePayload,
@@ -84,6 +84,7 @@ export type StepOutput = {
  thread: ThreadId;
  head: CasRef;
  done: boolean;
+  background: boolean | null;
 };

 /** uwf thread steps — single step entry */
@@ -126,6 +127,19 @@ export type ThreadListItem = {
  head: CasRef;
 };

+/** uwf thread running — single running thread entry */
+export type RunningThreadItem = {
+  thread: ThreadId;
+  workflow: CasRef;
+  pid: number;
+  startedAt: number;
+};
+
+/** uwf thread running output */
+export type RunningThreadsOutput = {
+  threads: RunningThreadItem[];
+};
+
 // ── 4.6 配置 ────────────────────────────────────────────────────────

 /** Alias types for config references */
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+# batch-solve.sh — solve multiple Gitea issues via solve-issue workflow
+#
+# Usage:
+#   ./scripts/batch-solve.sh [--agent CMD] [--repo OWNER/REPO] [--count N] ISSUE_NUM...
+#
+# Examples:
+#   ./scripts/batch-solve.sh 448 449
+#   ./scripts/batch-solve.sh --agent "bun run $(pwd)/packages/workflow-agent-claude-code/src/cli.ts" 448 449
+#   ./scripts/batch-solve.sh --repo uncaged/workflow --count 15 448 449
+
+set -euo pipefail
+
+AGENT=""
+REPO="uncaged/workflow"
+COUNT=10
+ISSUES=()
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --agent)  AGENT="$2"; shift 2 ;;
+    --repo)   REPO="$2"; shift 2 ;;
+    --count)  COUNT="$2"; shift 2 ;;
+    *)        ISSUES+=("$1"); shift ;;
+  esac
+done
+
+if [[ ${#ISSUES[@]} -eq 0 ]]; then
+  echo "Usage: $0 [--agent CMD] [--repo OWNER/REPO] [--count N] ISSUE_NUM..." >&2
+  exit 1
+fi
+
+AGENT_FLAG=""
+if [[ -n "$AGENT" ]]; then
+  AGENT_FLAG="--agent $AGENT"
+fi
+
+TOTAL=${#ISSUES[@]}
+PASSED=0
+FAILED=0
+RESULTS=()
+
+echo "━━━ Batch solve: ${TOTAL} issues ━━━"
+echo ""
+
+for i in "${!ISSUES[@]}"; do
+  ISSUE="${ISSUES[$i]}"
+  NUM=$((i + 1))
+  echo "┌─── [$NUM/$TOTAL] Issue #${ISSUE} ───"
+
+  # Read issue title
+  TITLE=$(tea issues "$ISSUE" -r "$REPO" 2>/dev/null | head -1 | sed 's/^# #[0-9]* //' | sed 's/ (.*//' || echo "unknown")
+  echo "│ Title: $TITLE"
+
+  # Start thread
+  PROMPT="Fix issue #${ISSUE} in ${REPO}. Read the issue first with 'tea issues ${ISSUE} -r ${REPO}' for full spec."
+  THREAD_JSON=$(uwf thread start solve-issue -p "$PROMPT" 2>&1)
+  THREAD_ID=$(echo "$THREAD_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin)['thread'])")
+  echo "│ Thread: $THREAD_ID"
+
+  # Run steps
+  echo "│ Running (max $COUNT steps)..."
+  # shellcheck disable=SC2086
+  if STEP_OUTPUT=$(uwf thread step "$THREAD_ID" $AGENT_FLAG -c "$COUNT" 2>&1); then
+    # Check if done
+    LAST_DONE=$(echo "$STEP_OUTPUT" | python3 -c "import json,sys; lines=sys.stdin.read().strip(); data=json.loads(lines); print(data[-1].get('done', False))")
+    if [[ "$LAST_DONE" == "True" ]]; then
+      echo "│ ✅ Done!"
+      PASSED=$((PASSED + 1))
+      RESULTS+=("✅ #${ISSUE} — ${TITLE}")
+    else
+      echo "│ ⚠️  Ran out of steps (not done)"
+      FAILED=$((FAILED + 1))
+      RESULTS+=("⚠️  #${ISSUE} — ${TITLE} (incomplete)")
+    fi
+  else
+    echo "│ ❌ Failed"
+    FAILED=$((FAILED + 1))
+    RESULTS+=("❌ #${ISSUE} — ${TITLE} (error)")
+  fi
+
+  echo "└───"
+  echo ""
+done
+
+echo "━━━ Results: ${PASSED}/${TOTAL} passed, ${FAILED} failed ━━━"
+for R in "${RESULTS[@]}"; do
+  echo "  $R"
+done
Author	SHA1	Message	Date
xiaoju	f96d6eb7c4	refactor(agent-builtin): reduce cognitive complexity in loop.ts Refactored runBuiltinLoop function to reduce cognitive complexity from 30 to below 15 by extracting helper functions: - shouldInjectDeadlineWarning: checks if deadline warning should be shown - shouldProcessToolCalls: determines if tool calls should be processed - extractFinalText: extracts last assistant message content - injectDeadlineWarning: injects deadline warning message - handleTextOnlyTurn: handles text-only turn logic - handleToolCallTurn: handles tool call turn logic - processLoopIteration: processes a single loop iteration Added 24 new unit tests for the extracted helper functions, bringing total test count to 41 (all passing). All existing behavior is preserved. Fixes #444 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-24 05:53:55 +00:00
xiaomo	95102941f1	Merge pull request 'feat(cli): thread step --background + thread running' (#457 ) from fix/456-thread-step-background into main	2026-05-24 05:33:56 +00:00
xiaoju	521d908719	feat(cli): add background thread execution and running threads query This commit implements issue #456, adding two related capabilities to the uwf CLI: 1. Background execution mode for `uwf thread step` (via `--background` flag) - Spawns agent execution in a detached background process - Returns immediately with thread ID and background status - Maintains marker files to track running processes - Supports `--count` option to run multiple steps in background - Prevents concurrent execution of the same thread 2. Running threads query command (`uwf thread running`) - Lists all threads currently executing in background - Returns thread ID, workflow, current role, PID, and start time - Automatically filters out stale markers (dead processes) - Empty list when no threads are running Key changes: - workflow-protocol: Added `RunningThreadItem`, `RunningThreadsOutput` types Updated `StepOutput` to include `background: boolean \| null` field - cli-workflow/background: New module for process management - Marker file creation/deletion (atomic operations) - PID liveness checking - Stale marker cleanup - Running threads query - cli-workflow/commands/thread: - Updated `cmdThreadStep` to support `--background` and `--_background-worker` flags - Added `cmdThreadStepBackground` for spawning detached processes - Added `cmdThreadRunning` to list running threads - Updated `cmdThreadKill` to terminate background processes - cli-workflow/cli: Added CLI routing for new commands and flags Integration: - `uwf thread kill` now terminates background processes before archiving - Foreground execution checks for existing background process and fails if found - Background worker creates/cleans up marker files automatically - Marker files stored in `~/.uncaged/workflow/running/*.json` Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-24 05:28:29 +00:00
xiaoju	02a2c00175	refactor: replace UWF_EDGE_PROMPT env var with named CLI args Agent adapters now use named parameters: uwf-<agent> --thread <id> --role <role> --prompt <text> Instead of positional args + env var: UWF_EDGE_PROMPT=... uwf-<agent> <thread-id> <role> Changes: - workflow-agent-kit/src/run.ts: parseArgv uses named --thread/--role/--prompt - workflow-agent-kit/src/context.ts: edgePrompt passed as parameter, not read from env - cli-workflow/src/commands/thread.ts: spawnAgent passes named args 小橘 <xiaoju@shazhou.work>	2026-05-24 04:31:44 +00:00
xiaoju	8ca7708a12	fix: add cas_ref format to claude-code-detail turns schema The turns array items in CLAUDE_CODE_DETAIL_SCHEMA were missing format: 'cas_ref', so expandDeep in step-details couldn't resolve turn hashes to their payloads. Hermes schema already had this. 小橘 <xiaoju@shazhou.work>	2026-05-24 04:17:29 +00:00
xiaomo	0fdc0fdec3	Merge pull request 'refactor(workflow-dashboard): reduce cyclomatic complexity in editor' (#455 ) from fix/449-reduce-dashboard-complexity into main	2026-05-24 03:44:08 +00:00
xiaomo	d6eaf3fdc7	Merge pull request 'refactor: reduce cognitive complexity in session-detail and acp-client' (#454 ) from fix/448-reduce-complexity into main	2026-05-24 03:44:06 +00:00
xingyue	221919448e	refactor: reduce cognitive complexity in session-detail and acp-client Extract helper functions to bring parseClaudeCodeStreamOutput (37→≤15) and handleSessionUpdate (24→≤15) within complexity limits. Add tests. Fixes #448	2026-05-24 00:41:39 +08:00