Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f96d6eb7c4 | |||
| 95102941f1 | |||
| 521d908719 | |||
| 02a2c00175 | |||
| 8ca7708a12 | |||
| 0fdc0fdec3 | |||
| d6eaf3fdc7 | |||
| 221919448e |
@@ -1,28 +0,0 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ['*']
|
||||
pull_request:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Bun
|
||||
uses: oven-sh/setup-bun@v2
|
||||
|
||||
- name: Install dependencies
|
||||
run: bun install
|
||||
|
||||
- name: Lint
|
||||
run: bun run lint
|
||||
|
||||
- name: Type check
|
||||
run: bun run typecheck
|
||||
|
||||
- name: Test
|
||||
run: bun test
|
||||
@@ -0,0 +1,147 @@
|
||||
import { mkdir, readdir, readFile, rename, rm, writeFile } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
import type { RunningThreadItem, ThreadId } from "@uncaged/workflow-protocol";
|
||||
|
||||
import type { RunningMarker } from "./types.js";
|
||||
|
||||
/**
|
||||
* Get the path to the running markers directory.
|
||||
*/
|
||||
export function getRunningDir(storageRoot: string): string {
|
||||
return join(storageRoot, "running");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the path to a specific thread's marker file.
|
||||
*/
|
||||
export function getMarkerPath(storageRoot: string, threadId: ThreadId): string {
|
||||
return join(getRunningDir(storageRoot), `${threadId}.json`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a PID is still running.
|
||||
* Returns true if the process exists, false otherwise.
|
||||
*/
|
||||
export function isPidAlive(pid: number): boolean {
|
||||
try {
|
||||
// process.kill with signal 0 checks existence without killing
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch {
|
||||
// ESRCH means process doesn't exist
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a marker file for a running thread.
|
||||
* Writes to a temp file in the same directory, then atomically renames.
|
||||
*/
|
||||
export async function createMarker(storageRoot: string, marker: RunningMarker): Promise<void> {
|
||||
const runningDir = getRunningDir(storageRoot);
|
||||
await mkdir(runningDir, { recursive: true });
|
||||
|
||||
const markerPath = getMarkerPath(storageRoot, marker.thread);
|
||||
const tempPath = join(runningDir, `.${marker.thread}-${process.pid}.tmp`);
|
||||
|
||||
const content = JSON.stringify(marker, null, 2);
|
||||
await writeFile(tempPath, content, "utf8");
|
||||
await rename(tempPath, markerPath);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete a marker file for a thread.
|
||||
*/
|
||||
export async function deleteMarker(storageRoot: string, threadId: ThreadId): Promise<void> {
|
||||
const markerPath = getMarkerPath(storageRoot, threadId);
|
||||
try {
|
||||
await rm(markerPath);
|
||||
} catch {
|
||||
// Ignore errors if file doesn't exist
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a marker file. Returns null if file doesn't exist or is invalid.
|
||||
*/
|
||||
export async function readMarker(
|
||||
storageRoot: string,
|
||||
threadId: ThreadId,
|
||||
): Promise<RunningMarker | null> {
|
||||
const markerPath = getMarkerPath(storageRoot, threadId);
|
||||
try {
|
||||
const content = await readFile(markerPath, "utf8");
|
||||
const marker = JSON.parse(content) as RunningMarker;
|
||||
return marker;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* List all running threads, filtering out stale markers.
|
||||
*/
|
||||
export async function listRunningThreads(storageRoot: string): Promise<RunningThreadItem[]> {
|
||||
const runningDir = getRunningDir(storageRoot);
|
||||
|
||||
let files: string[];
|
||||
try {
|
||||
files = await readdir(runningDir);
|
||||
} catch {
|
||||
// Directory doesn't exist or can't be read
|
||||
return [];
|
||||
}
|
||||
|
||||
const results: RunningThreadItem[] = [];
|
||||
|
||||
for (const filename of files) {
|
||||
if (!filename.endsWith(".json")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const threadId = filename.slice(0, -5) as ThreadId;
|
||||
const marker = await readMarker(storageRoot, threadId);
|
||||
|
||||
if (marker === null) {
|
||||
// Invalid marker file
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isPidAlive(marker.pid)) {
|
||||
// Stale marker - process no longer exists
|
||||
await deleteMarker(storageRoot, threadId);
|
||||
continue;
|
||||
}
|
||||
|
||||
results.push({
|
||||
thread: marker.thread,
|
||||
workflow: marker.workflow,
|
||||
pid: marker.pid,
|
||||
startedAt: marker.startedAt,
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a thread is currently executing in the background.
|
||||
* Returns the marker if running, null otherwise.
|
||||
*/
|
||||
export async function isThreadRunning(
|
||||
storageRoot: string,
|
||||
threadId: ThreadId,
|
||||
): Promise<RunningMarker | null> {
|
||||
const marker = await readMarker(storageRoot, threadId);
|
||||
if (marker === null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!isPidAlive(marker.pid)) {
|
||||
// Stale marker
|
||||
await deleteMarker(storageRoot, threadId);
|
||||
return null;
|
||||
}
|
||||
|
||||
return marker;
|
||||
}
|
||||
@@ -0,0 +1,11 @@
|
||||
export {
|
||||
createMarker,
|
||||
deleteMarker,
|
||||
getMarkerPath,
|
||||
getRunningDir,
|
||||
isPidAlive,
|
||||
isThreadRunning,
|
||||
listRunningThreads,
|
||||
readMarker,
|
||||
} from "./background.js";
|
||||
export type { RunningMarker } from "./types.js";
|
||||
@@ -0,0 +1,9 @@
|
||||
import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
|
||||
|
||||
/** Marker file stored at ~/.uncaged/workflow/running/<thread-id>.json */
|
||||
export type RunningMarker = {
|
||||
thread: ThreadId;
|
||||
workflow: CasRef;
|
||||
pid: number;
|
||||
startedAt: number;
|
||||
};
|
||||
@@ -22,6 +22,7 @@ import {
|
||||
cmdThreadKill,
|
||||
cmdThreadList,
|
||||
cmdThreadRead,
|
||||
cmdThreadRunning,
|
||||
cmdThreadShow,
|
||||
cmdThreadStart,
|
||||
cmdThreadStep,
|
||||
@@ -114,19 +115,41 @@ thread
|
||||
.argument("<thread-id>", "Thread ULID")
|
||||
.option("--agent <cmd>", "Override agent command")
|
||||
.option("-c, --count <number>", "Number of steps to run (default: 1)")
|
||||
.action((threadId: string, opts: { agent: string | undefined; count: string | undefined }) => {
|
||||
const storageRoot = resolveStorageRoot();
|
||||
runAction(async () => {
|
||||
const agentOverride = opts.agent ?? null;
|
||||
const count = opts.count !== undefined ? Number(opts.count) : 1;
|
||||
const results = await cmdThreadStep(storageRoot, threadId, agentOverride, count);
|
||||
if (results.length === 1) {
|
||||
writeOutput(results[0]);
|
||||
} else {
|
||||
writeOutput(results);
|
||||
}
|
||||
});
|
||||
});
|
||||
.option("--background", "Run in background and return immediately")
|
||||
.option("--_background-worker", "Internal flag for background worker process", false)
|
||||
.action(
|
||||
(
|
||||
threadId: string,
|
||||
opts: {
|
||||
agent: string | undefined;
|
||||
count: string | undefined;
|
||||
background: boolean;
|
||||
_backgroundWorker: boolean;
|
||||
},
|
||||
) => {
|
||||
const storageRoot = resolveStorageRoot();
|
||||
runAction(async () => {
|
||||
const agentOverride = opts.agent ?? null;
|
||||
const count = opts.count !== undefined ? Number(opts.count) : 1;
|
||||
const background = opts.background ?? false;
|
||||
const backgroundWorker = opts._backgroundWorker ?? false;
|
||||
|
||||
const results = await cmdThreadStep(
|
||||
storageRoot,
|
||||
threadId,
|
||||
agentOverride,
|
||||
count,
|
||||
background,
|
||||
backgroundWorker,
|
||||
);
|
||||
if (results.length === 1) {
|
||||
writeOutput(results[0]);
|
||||
} else {
|
||||
writeOutput(results);
|
||||
}
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
thread
|
||||
.command("show")
|
||||
@@ -152,6 +175,17 @@ thread
|
||||
});
|
||||
});
|
||||
|
||||
thread
|
||||
.command("running")
|
||||
.description("List threads currently executing in the background")
|
||||
.action(() => {
|
||||
const storageRoot = resolveStorageRoot();
|
||||
runAction(async () => {
|
||||
const result = await cmdThreadRunning(storageRoot);
|
||||
writeOutput(result);
|
||||
});
|
||||
});
|
||||
|
||||
thread
|
||||
.command("kill")
|
||||
.description("Terminate and archive a thread")
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { execFileSync, spawn } from "node:child_process";
|
||||
import { access, readFile } from "node:fs/promises";
|
||||
import { dirname, isAbsolute, resolve as resolvePath } from "node:path";
|
||||
import type { Store as CasStore, JSONSchema } from "@uncaged/json-cas";
|
||||
@@ -10,6 +10,7 @@ import type {
|
||||
AgentConfig,
|
||||
CasRef,
|
||||
ModeratorContext,
|
||||
RunningThreadsOutput,
|
||||
StartEntry,
|
||||
StartNodePayload,
|
||||
StartOutput,
|
||||
@@ -27,7 +28,12 @@ import type {
|
||||
import { createProcessLogger, generateUlid, type ProcessLogger } from "@uncaged/workflow-util";
|
||||
import { config as loadDotenv } from "dotenv";
|
||||
import { parse, stringify } from "yaml";
|
||||
|
||||
import {
|
||||
createMarker,
|
||||
deleteMarker,
|
||||
isThreadRunning,
|
||||
listRunningThreads,
|
||||
} from "../background/index.js";
|
||||
import {
|
||||
appendThreadHistory,
|
||||
createUwfStore,
|
||||
@@ -52,6 +58,7 @@ const PL_AGENT_SPAWN = "R5J2W8N4";
|
||||
const PL_AGENT_DONE = "C6P9E3H7";
|
||||
const PL_THREAD_ARCHIVED = "F4D8Q2K5";
|
||||
const PL_STEP_ERROR = "B8T5N1V6";
|
||||
const PL_BACKGROUND_START = "X7Q4W9M2";
|
||||
|
||||
function failStep(plog: ProcessLogger, message: string): never {
|
||||
plog.log(PL_STEP_ERROR, message, null);
|
||||
@@ -321,6 +328,7 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
|
||||
thread: threadId,
|
||||
head: activeHead,
|
||||
done: false,
|
||||
background: null,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -331,6 +339,7 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
|
||||
thread: threadId,
|
||||
head: hist.head,
|
||||
done: true,
|
||||
background: null,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -804,13 +813,11 @@ function spawnAgent(
|
||||
role: string,
|
||||
edgePrompt: string,
|
||||
): CasRef {
|
||||
const argv = [...agent.args, threadId, role];
|
||||
const env = { ...process.env, UWF_EDGE_PROMPT: edgePrompt };
|
||||
const argv = [...agent.args, "--thread", threadId, "--role", role, "--prompt", edgePrompt];
|
||||
let stdout: string;
|
||||
try {
|
||||
stdout = execFileSync(agent.command, argv, {
|
||||
encoding: "utf8",
|
||||
env,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
maxBuffer: 50 * 1024 * 1024, // 50 MB — stream-json output can be large
|
||||
});
|
||||
@@ -855,26 +862,60 @@ export async function cmdThreadStep(
|
||||
threadId: ThreadId,
|
||||
agentOverride: string | null,
|
||||
count: number,
|
||||
background: boolean,
|
||||
backgroundWorker: boolean,
|
||||
): Promise<StepOutput[]> {
|
||||
if (count < 1 || !Number.isInteger(count)) {
|
||||
fail(`--count must be a positive integer, got: ${count}`);
|
||||
}
|
||||
|
||||
// Check if thread is already running in background (unless we ARE the background worker)
|
||||
if (!backgroundWorker) {
|
||||
const runningMarker = await isThreadRunning(storageRoot, threadId);
|
||||
if (runningMarker !== null) {
|
||||
fail(`thread already executing in background (PID: ${runningMarker.pid})`);
|
||||
}
|
||||
}
|
||||
|
||||
const workflowHash = await resolveActiveThreadWorkflowHash(storageRoot, threadId);
|
||||
const plog = createProcessLogger({
|
||||
storageRoot,
|
||||
context: { thread: threadId, workflow: workflowHash },
|
||||
});
|
||||
|
||||
const results: StepOutput[] = [];
|
||||
for (let i = 0; i < count; i++) {
|
||||
const result = await cmdThreadStepOnce(storageRoot, threadId, agentOverride, plog);
|
||||
results.push(result);
|
||||
if (result.done) {
|
||||
break;
|
||||
if (background && !backgroundWorker) {
|
||||
// Spawn background process
|
||||
return cmdThreadStepBackground(storageRoot, threadId, agentOverride, count, plog, workflowHash);
|
||||
}
|
||||
|
||||
// If we're the background worker, create marker before execution
|
||||
let markerCreated = false;
|
||||
if (backgroundWorker) {
|
||||
await createMarker(storageRoot, {
|
||||
thread: threadId,
|
||||
workflow: workflowHash,
|
||||
pid: process.pid,
|
||||
startedAt: Date.now(),
|
||||
});
|
||||
markerCreated = true;
|
||||
}
|
||||
|
||||
try {
|
||||
const results: StepOutput[] = [];
|
||||
for (let i = 0; i < count; i++) {
|
||||
const result = await cmdThreadStepOnce(storageRoot, threadId, agentOverride, plog);
|
||||
results.push(result);
|
||||
if (result.done) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return results;
|
||||
} finally {
|
||||
// Cleanup marker if we created one
|
||||
if (markerCreated) {
|
||||
await deleteMarker(storageRoot, threadId);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
async function resolveActiveThreadWorkflowHash(
|
||||
@@ -891,6 +932,57 @@ async function resolveActiveThreadWorkflowHash(
|
||||
return chain.start.workflow;
|
||||
}
|
||||
|
||||
async function cmdThreadStepBackground(
|
||||
storageRoot: string,
|
||||
threadId: ThreadId,
|
||||
agentOverride: string | null,
|
||||
count: number,
|
||||
plog: ProcessLogger,
|
||||
workflowHash: CasRef,
|
||||
): Promise<StepOutput[]> {
|
||||
// Get current head to return to caller
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const headHash = index[threadId];
|
||||
if (headHash === undefined) {
|
||||
failStep(plog, `thread not active: ${threadId}`);
|
||||
}
|
||||
|
||||
// Spawn detached background process
|
||||
const scriptPath = process.argv[1];
|
||||
if (scriptPath === undefined) {
|
||||
failStep(plog, "unable to determine script path for background execution");
|
||||
}
|
||||
|
||||
const args = ["thread", "step", threadId, "--count", String(count)];
|
||||
|
||||
if (agentOverride !== null) {
|
||||
args.push("--agent", agentOverride);
|
||||
}
|
||||
|
||||
// Internal flag to signal the background worker to create/cleanup markers
|
||||
args.push("--_background-worker");
|
||||
|
||||
plog.log(PL_BACKGROUND_START, `spawning background process count=${count}`, null);
|
||||
|
||||
const child = spawn(scriptPath, args, {
|
||||
detached: true,
|
||||
stdio: "ignore",
|
||||
});
|
||||
|
||||
child.unref();
|
||||
|
||||
// Return immediately with current state and background flag
|
||||
return [
|
||||
{
|
||||
workflow: workflowHash,
|
||||
thread: threadId,
|
||||
head: headHash,
|
||||
done: false,
|
||||
background: true,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
async function cmdThreadStepOnce(
|
||||
storageRoot: string,
|
||||
threadId: ThreadId,
|
||||
@@ -928,6 +1020,7 @@ async function cmdThreadStepOnce(
|
||||
thread: threadId,
|
||||
head: headHash,
|
||||
done: true,
|
||||
background: null,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -975,6 +1068,7 @@ async function cmdThreadStepOnce(
|
||||
thread: threadId,
|
||||
head: newHead,
|
||||
done,
|
||||
background: null,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1111,6 +1205,17 @@ export async function cmdThreadKill(storageRoot: string, threadId: ThreadId): Pr
|
||||
fail(`thread not active: ${threadId}`);
|
||||
}
|
||||
|
||||
// Check if thread is running in background and terminate it
|
||||
const runningMarker = await isThreadRunning(storageRoot, threadId);
|
||||
if (runningMarker !== null) {
|
||||
try {
|
||||
process.kill(runningMarker.pid, "SIGTERM");
|
||||
} catch {
|
||||
// Process may have already exited, ignore error
|
||||
}
|
||||
await deleteMarker(storageRoot, threadId);
|
||||
}
|
||||
|
||||
const uwf = await createUwfStore(storageRoot);
|
||||
const workflow = resolveWorkflowFromHead(uwf, head);
|
||||
if (workflow === null) {
|
||||
@@ -1130,3 +1235,8 @@ export async function cmdThreadKill(storageRoot: string, threadId: ThreadId): Pr
|
||||
|
||||
return { thread: threadId, archived: true };
|
||||
}
|
||||
|
||||
export async function cmdThreadRunning(storageRoot: string): Promise<RunningThreadsOutput> {
|
||||
const threads = await listRunningThreads(storageRoot);
|
||||
return { threads };
|
||||
}
|
||||
|
||||
@@ -19,7 +19,14 @@ mock.module("../src/tools/index.js", () => ({
|
||||
getBuiltinTools: () => [],
|
||||
}));
|
||||
|
||||
import { executeTurnTools, runBuiltinLoop, shouldNudge } from "../src/loop.js";
|
||||
import {
|
||||
executeTurnTools,
|
||||
extractFinalText,
|
||||
runBuiltinLoop,
|
||||
shouldInjectDeadlineWarning,
|
||||
shouldNudge,
|
||||
shouldProcessToolCalls,
|
||||
} from "../src/loop.js";
|
||||
|
||||
const fakeProvider = {} as any;
|
||||
const fakeToolCtx = {} as any;
|
||||
@@ -154,3 +161,96 @@ describe("runBuiltinLoop integration", () => {
|
||||
expect(original.length).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("shouldInjectDeadlineWarning", () => {
|
||||
test("5.1 returns true when turn count reaches warning threshold and not yet warned", () => {
|
||||
expect(shouldInjectDeadlineWarning(7, 10, false, false)).toBe(true);
|
||||
});
|
||||
test("5.2 returns false when already warned", () => {
|
||||
expect(shouldInjectDeadlineWarning(7, 10, true, false)).toBe(false);
|
||||
});
|
||||
test("5.3 returns false when noTools is true", () => {
|
||||
expect(shouldInjectDeadlineWarning(7, 10, false, true)).toBe(false);
|
||||
});
|
||||
test("5.4 returns false when turns remaining > DEADLINE_WARNING_TURNS", () => {
|
||||
expect(shouldInjectDeadlineWarning(5, 10, false, false)).toBe(false);
|
||||
});
|
||||
test("5.5 returns true when exactly at warning threshold", () => {
|
||||
expect(shouldInjectDeadlineWarning(7, 10, false, false)).toBe(true);
|
||||
});
|
||||
test("5.6 returns false when turns remaining is 0", () => {
|
||||
expect(shouldInjectDeadlineWarning(10, 10, false, false)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("shouldProcessToolCalls", () => {
|
||||
test("6.1 returns true when toolCalls present and noTools=false", () => {
|
||||
expect(shouldProcessToolCalls([{ id: "x", name: "read", arguments: "{}" }], false)).toBe(true);
|
||||
});
|
||||
test("6.2 returns false when toolCalls is null", () => {
|
||||
expect(shouldProcessToolCalls(null, false)).toBe(false);
|
||||
});
|
||||
test("6.3 returns false when toolCalls is empty array", () => {
|
||||
expect(shouldProcessToolCalls([], false)).toBe(false);
|
||||
});
|
||||
test("6.4 returns false when noTools=true", () => {
|
||||
expect(shouldProcessToolCalls([{ id: "x", name: "read", arguments: "{}" }], true)).toBe(false);
|
||||
});
|
||||
test("6.5 returns true when multiple tool calls present", () => {
|
||||
expect(
|
||||
shouldProcessToolCalls(
|
||||
[
|
||||
{ id: "x1", name: "read", arguments: "{}" },
|
||||
{ id: "x2", name: "write", arguments: "{}" },
|
||||
],
|
||||
false,
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractFinalText", () => {
|
||||
test("7.1 returns last assistant message content", () => {
|
||||
const messages = [
|
||||
{ role: "system" as const, content: "sys", tool_calls: null },
|
||||
{ role: "assistant" as const, content: "first", tool_calls: null },
|
||||
{ role: "assistant" as const, content: "last", tool_calls: null },
|
||||
];
|
||||
expect(extractFinalText(messages)).toBe("last");
|
||||
});
|
||||
test("7.2 returns empty string when no assistant messages", () => {
|
||||
expect(extractFinalText([{ role: "system" as const, content: "sys", tool_calls: null }])).toBe(
|
||||
"",
|
||||
);
|
||||
});
|
||||
test("7.3 skips assistant messages with null content", () => {
|
||||
const messages = [
|
||||
{ role: "assistant" as const, content: "first", tool_calls: null },
|
||||
{
|
||||
role: "assistant" as const,
|
||||
content: null,
|
||||
tool_calls: [{ id: "x", name: "t", arguments: "{}" }],
|
||||
},
|
||||
{ role: "assistant" as const, content: "second", tool_calls: null },
|
||||
];
|
||||
expect(extractFinalText(messages)).toBe("second");
|
||||
});
|
||||
test("7.4 skips assistant messages with empty content", () => {
|
||||
const messages = [
|
||||
{ role: "assistant" as const, content: "first", tool_calls: null },
|
||||
{ role: "assistant" as const, content: "", tool_calls: null },
|
||||
{ role: "user" as const, content: "nudge", tool_calls: null },
|
||||
];
|
||||
expect(extractFinalText(messages)).toBe("first");
|
||||
});
|
||||
test("7.5 handles empty messages array", () => {
|
||||
expect(extractFinalText([])).toBe("");
|
||||
});
|
||||
test("7.6 handles messages with only user and system roles", () => {
|
||||
const messages = [
|
||||
{ role: "system" as const, content: "sys", tool_calls: null },
|
||||
{ role: "user" as const, content: "query", tool_calls: null },
|
||||
];
|
||||
expect(extractFinalText(messages)).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
import type { ResolvedLlmProvider } from "@uncaged/workflow-agent-kit";
|
||||
import { createLogger } from "@uncaged/workflow-util";
|
||||
|
||||
import { type ChatMessage, chatCompletionWithTools, type LlmToolCall } from "./llm/index.js";
|
||||
import {
|
||||
type ChatMessage,
|
||||
chatCompletionWithTools,
|
||||
type LlmToolCall,
|
||||
type OpenAiToolDefinition,
|
||||
} from "./llm/index.js";
|
||||
import { appendSessionTurn } from "./session.js";
|
||||
import {
|
||||
builtinToolsToOpenAi,
|
||||
@@ -80,10 +85,184 @@ export type ShouldNudgeOptions = {
|
||||
const MAX_NUDGES = 3;
|
||||
const DEADLINE_WARNING_TURNS = 3;
|
||||
|
||||
export function shouldInjectDeadlineWarning(
|
||||
turn: number,
|
||||
maxTurns: number,
|
||||
alreadyWarned: boolean,
|
||||
noTools: boolean,
|
||||
): boolean {
|
||||
const turnsRemaining = maxTurns - turn;
|
||||
return (
|
||||
!noTools && !alreadyWarned && turnsRemaining > 0 && turnsRemaining <= DEADLINE_WARNING_TURNS
|
||||
);
|
||||
}
|
||||
|
||||
export function shouldProcessToolCalls(toolCalls: LlmToolCall[] | null, noTools: boolean): boolean {
|
||||
return !noTools && toolCalls !== null && toolCalls.length > 0;
|
||||
}
|
||||
|
||||
export function extractFinalText(messages: ChatMessage[]): string {
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const msg = messages[i];
|
||||
if (
|
||||
msg !== undefined &&
|
||||
msg.role === "assistant" &&
|
||||
msg.content !== null &&
|
||||
msg.content.trim() !== ""
|
||||
) {
|
||||
return msg.content;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
function injectDeadlineWarning(messages: ChatMessage[], turnsRemaining: number): void {
|
||||
log("4NRXW6KT", `${turnsRemaining} turns remaining, injecting deadline warning`);
|
||||
messages.push({
|
||||
role: "user",
|
||||
content:
|
||||
`⚠️ You have ${turnsRemaining} turns remaining. ` +
|
||||
"Wrap up your work and output the YAML frontmatter starting with `---`. " +
|
||||
"If you cannot finish in time, output frontmatter with `status: failed` and describe what remains.",
|
||||
});
|
||||
}
|
||||
|
||||
type HandleTextOnlyTurnResult = {
|
||||
shouldBreak: boolean;
|
||||
finalText: string;
|
||||
turnCount: number;
|
||||
nudgeCount: number;
|
||||
turnAdjustment: number;
|
||||
};
|
||||
|
||||
async function handleTextOnlyTurn(
|
||||
text: string,
|
||||
messages: ChatMessage[],
|
||||
storageRoot: string,
|
||||
sessionId: string,
|
||||
noTools: boolean,
|
||||
turn: number,
|
||||
maxTurns: number,
|
||||
currentNudgeCount: number,
|
||||
): Promise<HandleTextOnlyTurnResult> {
|
||||
await appendTurn(storageRoot, sessionId, {
|
||||
role: "assistant",
|
||||
content: text,
|
||||
toolCalls: null,
|
||||
reasoning: null,
|
||||
});
|
||||
const turnCount = 1;
|
||||
let nudgeCount = currentNudgeCount;
|
||||
let turnAdjustment = 0;
|
||||
|
||||
if (shouldNudge({ noTools, text, turn, maxTurns })) {
|
||||
nudgeCount += 1;
|
||||
log("7FXQM2KN", `text-only turn without frontmatter, nudge ${nudgeCount}/${MAX_NUDGES}`);
|
||||
const nudge =
|
||||
"You stopped calling tools but your response does not start with the required `---` YAML frontmatter. " +
|
||||
"Either continue using tools to complete your work, or output your final response starting with `---`.";
|
||||
messages.push({ role: "user", content: nudge });
|
||||
// Nudge doesn't consume turn budget (up to MAX_NUDGES)
|
||||
if (nudgeCount <= MAX_NUDGES) {
|
||||
turnAdjustment = -1;
|
||||
}
|
||||
return { shouldBreak: false, finalText: "", turnCount, nudgeCount, turnAdjustment };
|
||||
}
|
||||
|
||||
return { shouldBreak: true, finalText: text, turnCount, nudgeCount, turnAdjustment };
|
||||
}
|
||||
|
||||
async function handleToolCallTurn(
|
||||
content: string,
|
||||
toolCalls: LlmToolCall[],
|
||||
messages: ChatMessage[],
|
||||
storageRoot: string,
|
||||
sessionId: string,
|
||||
toolCtx: ToolContext,
|
||||
): Promise<number> {
|
||||
await appendTurn(storageRoot, sessionId, {
|
||||
role: "assistant",
|
||||
content,
|
||||
toolCalls: mapToolCallsForPayload(toolCalls),
|
||||
reasoning: null,
|
||||
});
|
||||
let turnCount = 1;
|
||||
|
||||
// Execute tools
|
||||
turnCount += await executeTurnTools(toolCalls, toolCtx, messages, storageRoot, sessionId);
|
||||
|
||||
return turnCount;
|
||||
}
|
||||
|
||||
export function shouldNudge({ noTools, text, turn, maxTurns }: ShouldNudgeOptions): boolean {
|
||||
return !noTools && !text.trimStart().startsWith("---") && turn < maxTurns - 1;
|
||||
}
|
||||
|
||||
type ProcessLoopIterationResult = {
|
||||
shouldBreak: boolean;
|
||||
finalText: string;
|
||||
turnCount: number;
|
||||
nudgeCount: number;
|
||||
turnAdjustment: number;
|
||||
};
|
||||
|
||||
async function processLoopIteration(
|
||||
options: RunBuiltinLoopOptions,
|
||||
messages: ChatMessage[],
|
||||
openAiTools: OpenAiToolDefinition[],
|
||||
turn: number,
|
||||
nudgeCount: number,
|
||||
): Promise<ProcessLoopIterationResult> {
|
||||
const response = await chatCompletionWithTools(
|
||||
options.provider,
|
||||
messages,
|
||||
openAiTools.length > 0 ? openAiTools : null,
|
||||
);
|
||||
|
||||
// When noTools is set, ignore any tool_calls the LLM might still return
|
||||
const effectiveToolCalls = options.noTools ? null : (response.toolCalls ?? null);
|
||||
|
||||
const assistantMessage: ChatMessage = {
|
||||
role: "assistant",
|
||||
content: response.content,
|
||||
tool_calls: effectiveToolCalls,
|
||||
};
|
||||
messages.push(assistantMessage);
|
||||
|
||||
if (!shouldProcessToolCalls(effectiveToolCalls, options.noTools)) {
|
||||
const text = response.content ?? "";
|
||||
const result = await handleTextOnlyTurn(
|
||||
text,
|
||||
messages,
|
||||
options.storageRoot,
|
||||
options.sessionId,
|
||||
options.noTools,
|
||||
turn,
|
||||
options.maxTurns,
|
||||
nudgeCount,
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
// At this point, effectiveToolCalls is guaranteed to be non-null and non-empty
|
||||
const turnCount = await handleToolCallTurn(
|
||||
response.content ?? "",
|
||||
effectiveToolCalls as LlmToolCall[],
|
||||
messages,
|
||||
options.storageRoot,
|
||||
options.sessionId,
|
||||
options.toolCtx,
|
||||
);
|
||||
|
||||
return {
|
||||
shouldBreak: false,
|
||||
finalText: "",
|
||||
turnCount,
|
||||
nudgeCount,
|
||||
turnAdjustment: 0,
|
||||
};
|
||||
}
|
||||
|
||||
/** Agent run loop: LLM ↔ tools until no tool_calls or maxTurns. */
|
||||
export async function runBuiltinLoop(
|
||||
options: RunBuiltinLoopOptions,
|
||||
@@ -99,95 +278,25 @@ export async function runBuiltinLoop(
|
||||
log("8K2M4N7P", `builtin loop turn ${turn + 1}/${options.maxTurns}`);
|
||||
|
||||
// Warn agent when approaching turn limit
|
||||
const turnsRemaining = options.maxTurns - turn;
|
||||
if (!options.noTools && !deadlineWarned && turnsRemaining <= DEADLINE_WARNING_TURNS) {
|
||||
if (shouldInjectDeadlineWarning(turn, options.maxTurns, deadlineWarned, options.noTools)) {
|
||||
deadlineWarned = true;
|
||||
log("4NRXW6KT", `${turnsRemaining} turns remaining, injecting deadline warning`);
|
||||
messages.push({
|
||||
role: "user",
|
||||
content:
|
||||
`⚠️ You have ${turnsRemaining} turns remaining. ` +
|
||||
"Wrap up your work and output the YAML frontmatter starting with `---`. " +
|
||||
"If you cannot finish in time, output frontmatter with `status: failed` and describe what remains.",
|
||||
});
|
||||
const turnsRemaining = options.maxTurns - turn;
|
||||
injectDeadlineWarning(messages, turnsRemaining);
|
||||
}
|
||||
|
||||
const response = await chatCompletionWithTools(
|
||||
options.provider,
|
||||
messages,
|
||||
openAiTools.length > 0 ? openAiTools : null,
|
||||
);
|
||||
const result = await processLoopIteration(options, messages, openAiTools, turn, nudgeCount);
|
||||
turnCount += result.turnCount;
|
||||
nudgeCount = result.nudgeCount;
|
||||
turn += result.turnAdjustment;
|
||||
|
||||
// When noTools is set, ignore any tool_calls the LLM might still return
|
||||
const effectiveToolCalls = options.noTools ? null : (response.toolCalls ?? null);
|
||||
|
||||
const assistantMessage: ChatMessage = {
|
||||
role: "assistant",
|
||||
content: response.content,
|
||||
tool_calls: effectiveToolCalls,
|
||||
};
|
||||
messages.push(assistantMessage);
|
||||
|
||||
if (effectiveToolCalls === null || effectiveToolCalls.length === 0) {
|
||||
const text = response.content ?? "";
|
||||
await appendTurn(options.storageRoot, options.sessionId, {
|
||||
role: "assistant",
|
||||
content: text,
|
||||
toolCalls: null,
|
||||
reasoning: null,
|
||||
});
|
||||
turnCount += 1;
|
||||
|
||||
if (shouldNudge({ noTools: options.noTools, text, turn, maxTurns: options.maxTurns })) {
|
||||
nudgeCount += 1;
|
||||
log("7FXQM2KN", `text-only turn without frontmatter, nudge ${nudgeCount}/${MAX_NUDGES}`);
|
||||
const nudge =
|
||||
"You stopped calling tools but your response does not start with the required `---` YAML frontmatter. " +
|
||||
"Either continue using tools to complete your work, or output your final response starting with `---`.";
|
||||
messages.push({ role: "user", content: nudge });
|
||||
// Nudge doesn't consume turn budget (up to MAX_NUDGES)
|
||||
if (nudgeCount <= MAX_NUDGES) {
|
||||
turn -= 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
finalText = text;
|
||||
if (result.shouldBreak) {
|
||||
finalText = result.finalText;
|
||||
break;
|
||||
}
|
||||
|
||||
// Assistant turn with tool calls
|
||||
await appendTurn(options.storageRoot, options.sessionId, {
|
||||
role: "assistant",
|
||||
content: response.content ?? "",
|
||||
toolCalls: mapToolCallsForPayload(effectiveToolCalls),
|
||||
reasoning: null,
|
||||
});
|
||||
turnCount += 1;
|
||||
|
||||
// Execute tools
|
||||
turnCount += await executeTurnTools(
|
||||
effectiveToolCalls,
|
||||
options.toolCtx,
|
||||
messages,
|
||||
options.storageRoot,
|
||||
options.sessionId,
|
||||
);
|
||||
}
|
||||
|
||||
if (finalText === "" && messages.length > 0) {
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const msg = messages[i];
|
||||
if (
|
||||
msg !== undefined &&
|
||||
msg.role === "assistant" &&
|
||||
msg.content !== null &&
|
||||
msg.content.trim() !== ""
|
||||
) {
|
||||
finalText = msg.content;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (finalText === "") {
|
||||
finalText = extractFinalText(messages);
|
||||
}
|
||||
|
||||
return { finalText, messages, turnCount };
|
||||
|
||||
@@ -154,6 +154,99 @@ describe("parseClaudeCodeStreamOutput", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("parseClaudeCodeStreamOutput — helper extraction", () => {
|
||||
test("processSystemLine sets model from system message", () => {
|
||||
const lines = [
|
||||
JSON.stringify({ type: "system", model: "claude-opus-4" }),
|
||||
JSON.stringify({
|
||||
type: "result",
|
||||
subtype: "success",
|
||||
result: "ok",
|
||||
session_id: "s1",
|
||||
num_turns: 0,
|
||||
total_cost_usd: 0,
|
||||
duration_ms: 0,
|
||||
stop_reason: "end_turn",
|
||||
}),
|
||||
];
|
||||
const parsed = parseClaudeCodeStreamOutput(lines.join("\n"));
|
||||
expect(parsed).not.toBeNull();
|
||||
expect(parsed!.model).toBe("claude-opus-4");
|
||||
});
|
||||
|
||||
test("processAssistantLine skips empty content", () => {
|
||||
const lines = [
|
||||
JSON.stringify({ type: "assistant", message: { role: "assistant", content: [] } }),
|
||||
JSON.stringify({
|
||||
type: "result",
|
||||
subtype: "success",
|
||||
result: "ok",
|
||||
session_id: "s1",
|
||||
num_turns: 0,
|
||||
total_cost_usd: 0,
|
||||
duration_ms: 0,
|
||||
stop_reason: "end_turn",
|
||||
}),
|
||||
];
|
||||
const parsed = parseClaudeCodeStreamOutput(lines.join("\n"));
|
||||
expect(parsed).not.toBeNull();
|
||||
expect(parsed!.turns).toHaveLength(0);
|
||||
});
|
||||
|
||||
test("processUserLine skips when no tool_result items", () => {
|
||||
const lines = [
|
||||
JSON.stringify({
|
||||
type: "user",
|
||||
message: { role: "user", content: [{ type: "text", text: "hi" }] },
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: "result",
|
||||
subtype: "success",
|
||||
result: "ok",
|
||||
session_id: "s1",
|
||||
num_turns: 0,
|
||||
total_cost_usd: 0,
|
||||
duration_ms: 0,
|
||||
stop_reason: "end_turn",
|
||||
}),
|
||||
];
|
||||
const parsed = parseClaudeCodeStreamOutput(lines.join("\n"));
|
||||
expect(parsed).not.toBeNull();
|
||||
expect(parsed!.turns).toHaveLength(0);
|
||||
});
|
||||
|
||||
test("turn indices are sequential across mixed assistant and user lines", () => {
|
||||
const lines = [
|
||||
JSON.stringify({
|
||||
type: "assistant",
|
||||
message: { role: "assistant", content: [{ type: "text", text: "A" }] },
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: "user",
|
||||
message: { role: "user", content: [{ type: "tool_result", content: "R" }] },
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: "assistant",
|
||||
message: { role: "assistant", content: [{ type: "text", text: "B" }] },
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: "result",
|
||||
subtype: "success",
|
||||
result: "ok",
|
||||
session_id: "s1",
|
||||
num_turns: 3,
|
||||
total_cost_usd: 0,
|
||||
duration_ms: 0,
|
||||
stop_reason: "end_turn",
|
||||
}),
|
||||
];
|
||||
const parsed = parseClaudeCodeStreamOutput(lines.join("\n"));
|
||||
expect(parsed).not.toBeNull();
|
||||
expect(parsed!.turns).toHaveLength(3);
|
||||
expect(parsed!.turns.map((t) => t.index)).toEqual([0, 1, 2]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("storeClaudeCodeDetail", () => {
|
||||
const baseParsed: ClaudeCodeParsedResult = {
|
||||
type: "result",
|
||||
|
||||
@@ -34,7 +34,7 @@ export const CLAUDE_CODE_DETAIL_SCHEMA: JSONSchema = {
|
||||
},
|
||||
turns: {
|
||||
type: "array",
|
||||
items: { type: "string" },
|
||||
items: { type: "string", format: "cas_ref" },
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
|
||||
@@ -67,101 +67,105 @@ function extractToolResultContent(content: unknown[]): string {
|
||||
return results.join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse Claude Code stream-json (NDJSON) output.
|
||||
* Each line is a JSON object with type: "system" | "assistant" | "user" | "result".
|
||||
*/
|
||||
export function parseClaudeCodeStreamOutput(stdout: string): ClaudeCodeParsedResult | null {
|
||||
const lines = stdout.trim().split("\n");
|
||||
const turns: ClaudeCodeTurnPayload[] = [];
|
||||
let resultLine: Record<string, unknown> | null = null;
|
||||
let model = "";
|
||||
let turnIndex = 0;
|
||||
type ParseState = {
|
||||
turns: ClaudeCodeTurnPayload[];
|
||||
resultLine: Record<string, unknown> | null;
|
||||
model: string;
|
||||
turnIndex: number;
|
||||
};
|
||||
|
||||
for (const line of lines) {
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(line);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
if (!isRecord(parsed)) continue;
|
||||
|
||||
const type = parsed.type;
|
||||
|
||||
if (type === "system" && typeof parsed.model === "string") {
|
||||
model = parsed.model;
|
||||
}
|
||||
|
||||
if (type === "assistant" && isRecord(parsed.message)) {
|
||||
const msg = parsed.message;
|
||||
const content = Array.isArray(msg.content) ? msg.content : [];
|
||||
const textContent = extractTextContent(content as unknown[]);
|
||||
const toolCalls = extractToolCalls(content as unknown[]);
|
||||
|
||||
// Only record turns that have actual content
|
||||
if (textContent !== "" || toolCalls.length > 0) {
|
||||
turns.push({
|
||||
index: turnIndex++,
|
||||
role: "assistant",
|
||||
content: textContent,
|
||||
toolCalls: toolCalls.length > 0 ? toolCalls : null,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (type === "user" && isRecord(parsed.message)) {
|
||||
const msg = parsed.message;
|
||||
const content = Array.isArray(msg.content) ? msg.content : [];
|
||||
const resultContent = extractToolResultContent(content as unknown[]);
|
||||
|
||||
if (resultContent !== "") {
|
||||
turns.push({
|
||||
index: turnIndex++,
|
||||
role: "tool_result",
|
||||
content: resultContent,
|
||||
toolCalls: null,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (type === "result") {
|
||||
resultLine = parsed;
|
||||
}
|
||||
function processSystemLine(parsed: Record<string, unknown>, state: ParseState): void {
|
||||
if (typeof parsed.model === "string") {
|
||||
state.model = parsed.model;
|
||||
}
|
||||
}
|
||||
|
||||
if (resultLine === null) return null;
|
||||
function processAssistantLine(parsed: Record<string, unknown>, state: ParseState): void {
|
||||
if (!isRecord(parsed.message)) return;
|
||||
const content = Array.isArray(parsed.message.content) ? parsed.message.content : [];
|
||||
const textContent = extractTextContent(content as unknown[]);
|
||||
const toolCalls = extractToolCalls(content as unknown[]);
|
||||
if (textContent !== "" || toolCalls.length > 0) {
|
||||
state.turns.push({
|
||||
index: state.turnIndex++,
|
||||
role: "assistant",
|
||||
content: textContent,
|
||||
toolCalls: toolCalls.length > 0 ? toolCalls : null,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const sessionId = resultLine.session_id;
|
||||
const result = resultLine.result;
|
||||
const subtype = resultLine.subtype;
|
||||
function processUserLine(parsed: Record<string, unknown>, state: ParseState): void {
|
||||
if (!isRecord(parsed.message)) return;
|
||||
const content = Array.isArray(parsed.message.content) ? parsed.message.content : [];
|
||||
const resultContent = extractToolResultContent(content as unknown[]);
|
||||
if (resultContent !== "") {
|
||||
state.turns.push({
|
||||
index: state.turnIndex++,
|
||||
role: "tool_result",
|
||||
content: resultContent,
|
||||
toolCalls: null,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function processLine(line: string, state: ParseState): void {
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(line);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
if (!isRecord(parsed)) return;
|
||||
const type = parsed.type;
|
||||
if (type === "system") processSystemLine(parsed, state);
|
||||
else if (type === "assistant") processAssistantLine(parsed, state);
|
||||
else if (type === "user") processUserLine(parsed, state);
|
||||
else if (type === "result") state.resultLine = parsed;
|
||||
}
|
||||
|
||||
function assembleResult(state: ParseState): ClaudeCodeParsedResult | null {
|
||||
if (state.resultLine === null) return null;
|
||||
const sessionId = state.resultLine.session_id;
|
||||
const result = state.resultLine.result;
|
||||
const subtype = state.resultLine.subtype;
|
||||
if (typeof sessionId !== "string" || typeof result !== "string" || typeof subtype !== "string") {
|
||||
return null;
|
||||
}
|
||||
|
||||
const usage = isRecord(resultLine.usage) ? resultLine.usage : {};
|
||||
|
||||
const usage = isRecord(state.resultLine.usage) ? state.resultLine.usage : {};
|
||||
return {
|
||||
type: safeString(resultLine.type, "result"),
|
||||
type: safeString(state.resultLine.type, "result"),
|
||||
subtype: subtype as ClaudeCodeParsedResult["subtype"],
|
||||
result,
|
||||
sessionId,
|
||||
numTurns: safeNumber(resultLine.num_turns),
|
||||
totalCostUsd: safeNumber(resultLine.total_cost_usd),
|
||||
durationMs: safeNumber(resultLine.duration_ms),
|
||||
model,
|
||||
stopReason: safeString(resultLine.stop_reason),
|
||||
numTurns: safeNumber(state.resultLine.num_turns),
|
||||
totalCostUsd: safeNumber(state.resultLine.total_cost_usd),
|
||||
durationMs: safeNumber(state.resultLine.duration_ms),
|
||||
model: state.model,
|
||||
stopReason: safeString(state.resultLine.stop_reason),
|
||||
usage: {
|
||||
inputTokens: safeNumber(usage.input_tokens),
|
||||
outputTokens: safeNumber(usage.output_tokens),
|
||||
cacheReadInputTokens: safeNumber(usage.cache_read_input_tokens),
|
||||
cacheCreationInputTokens: safeNumber(usage.cache_creation_input_tokens),
|
||||
},
|
||||
turns,
|
||||
turns: state.turns,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse Claude Code stream-json (NDJSON) output.
|
||||
* Each line is a JSON object with type: "system" | "assistant" | "user" | "result".
|
||||
*/
|
||||
export function parseClaudeCodeStreamOutput(stdout: string): ClaudeCodeParsedResult | null {
|
||||
const lines = stdout.trim().split("\n");
|
||||
const state: ParseState = { turns: [], resultLine: null, model: "", turnIndex: 0 };
|
||||
for (const line of lines) {
|
||||
processLine(line, state);
|
||||
}
|
||||
return assembleResult(state);
|
||||
}
|
||||
|
||||
/**
|
||||
* Legacy: parse Claude Code plain JSON output (non-streaming).
|
||||
* Falls back when stream-json is not available.
|
||||
|
||||
@@ -4,6 +4,96 @@ import { HermesAcpClient } from "../src/acp-client.js";
|
||||
|
||||
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
||||
|
||||
describe("handleSessionUpdate — helper extraction", () => {
|
||||
let client: HermesAcpClient;
|
||||
|
||||
beforeEach(() => {
|
||||
client = new HermesAcpClient();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await client.close();
|
||||
});
|
||||
|
||||
it("agent_message_chunk accumulates text in messageChunks", () => {
|
||||
(client as any).handleSessionUpdate({
|
||||
sessionUpdate: "agent_message_chunk",
|
||||
content: { type: "text", text: "hello" },
|
||||
});
|
||||
(client as any).handleSessionUpdate({
|
||||
sessionUpdate: "agent_message_chunk",
|
||||
content: { type: "text", text: " world" },
|
||||
});
|
||||
expect((client as any).messageChunks).toEqual(["hello", " world"]);
|
||||
});
|
||||
|
||||
it("agent_thought_chunk accumulates reasoning in reasoningChunks", () => {
|
||||
(client as any).handleSessionUpdate({
|
||||
sessionUpdate: "agent_thought_chunk",
|
||||
content: { type: "text", text: "thinking" },
|
||||
});
|
||||
expect((client as any).reasoningChunks).toEqual(["thinking"]);
|
||||
});
|
||||
|
||||
it("tool_call registers a pending tool and flushes message chunks", () => {
|
||||
(client as any).messageChunks = ["pre-tool text"];
|
||||
(client as any).handleSessionUpdate({
|
||||
sessionUpdate: "tool_call",
|
||||
title: "Bash",
|
||||
rawInput: { command: "ls" },
|
||||
toolCallId: "tc-1",
|
||||
});
|
||||
expect((client as any).pendingTools.get("tc-1")).toEqual({
|
||||
name: "Bash",
|
||||
args: JSON.stringify({ command: "ls" }),
|
||||
});
|
||||
expect((client as any).messageChunks).toEqual([]);
|
||||
expect((client as any).messages).toHaveLength(1);
|
||||
expect((client as any).messages[0].role).toBe("assistant");
|
||||
});
|
||||
|
||||
it("tool_call_update completed pushes tool_call and tool messages", () => {
|
||||
(client as any).pendingTools.set("tc-2", { name: "Read", args: '{"path":"/foo"}' });
|
||||
(client as any).handleSessionUpdate({
|
||||
sessionUpdate: "tool_call_update",
|
||||
status: "completed",
|
||||
toolCallId: "tc-2",
|
||||
rawOutput: "file contents",
|
||||
});
|
||||
const msgs = (client as any).messages as Array<{
|
||||
role: string;
|
||||
tool_calls: unknown;
|
||||
content: string | null;
|
||||
}>;
|
||||
expect(msgs).toHaveLength(2);
|
||||
expect(msgs[0].role).toBe("assistant");
|
||||
expect(msgs[0].tool_calls).toEqual([
|
||||
{ function: { name: "Read", arguments: '{"path":"/foo"}' } },
|
||||
]);
|
||||
expect(msgs[1].role).toBe("tool");
|
||||
expect(msgs[1].content).toBe("file contents");
|
||||
expect((client as any).pendingTools.has("tc-2")).toBe(false);
|
||||
});
|
||||
|
||||
it("tool_call_update with non-string rawOutput JSON-stringifies it", () => {
|
||||
(client as any).pendingTools.set("tc-3", { name: "Fetch", args: "" });
|
||||
(client as any).handleSessionUpdate({
|
||||
sessionUpdate: "tool_call_update",
|
||||
status: "completed",
|
||||
toolCallId: "tc-3",
|
||||
rawOutput: { html: "<p>page</p>" },
|
||||
});
|
||||
const msgs = (client as any).messages as Array<{ role: string; content: string | null }>;
|
||||
expect(msgs[1].content).toBe(JSON.stringify({ html: "<p>page</p>" }));
|
||||
});
|
||||
|
||||
it("unknown updateType is a no-op", () => {
|
||||
(client as any).handleSessionUpdate({ sessionUpdate: "unknown_type", data: {} });
|
||||
expect((client as any).messages).toHaveLength(0);
|
||||
expect((client as any).messageChunks).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("HermesAcpClient", () => {
|
||||
let client: HermesAcpClient;
|
||||
|
||||
|
||||
@@ -245,72 +245,75 @@ export class HermesAcpClient {
|
||||
// ---- Session update → structured messages ----
|
||||
|
||||
private handleSessionUpdate(update: Record<string, unknown>): void {
|
||||
const updateType = update.sessionUpdate as string;
|
||||
|
||||
switch (updateType) {
|
||||
case "agent_message_chunk": {
|
||||
const content = update.content as { type?: string; text?: string } | undefined;
|
||||
if (content?.type === "text" && typeof content.text === "string") {
|
||||
this.messageChunks.push(content.text);
|
||||
}
|
||||
switch (update.sessionUpdate as string) {
|
||||
case "agent_message_chunk":
|
||||
this.handleAgentMessageChunk(update);
|
||||
break;
|
||||
}
|
||||
|
||||
case "agent_thought_chunk": {
|
||||
const content = update.content as { type?: string; text?: string } | undefined;
|
||||
if (content?.type === "text" && typeof content.text === "string") {
|
||||
this.reasoningChunks.push(content.text);
|
||||
}
|
||||
case "agent_thought_chunk":
|
||||
this.handleAgentThoughtChunk(update);
|
||||
break;
|
||||
}
|
||||
|
||||
case "tool_call": {
|
||||
const title = (update.title as string) ?? "";
|
||||
const rawInput = update.rawInput;
|
||||
const args = rawInput !== undefined && rawInput !== null ? JSON.stringify(rawInput) : "";
|
||||
const toolCallId = update.toolCallId as string;
|
||||
this.pendingTools.set(toolCallId, { name: title, args });
|
||||
|
||||
// Flush accumulated assistant text before tool call
|
||||
this.flushAssistantMessage();
|
||||
case "tool_call":
|
||||
this.handleToolCall(update);
|
||||
break;
|
||||
}
|
||||
|
||||
case "tool_call_update": {
|
||||
const status = update.status as string | undefined;
|
||||
if (status === "completed" || status === "failed") {
|
||||
const toolCallId = update.toolCallId as string;
|
||||
const pending = this.pendingTools.get(toolCallId);
|
||||
const toolName = pending?.name ?? toolCallId;
|
||||
const rawOutput = update.rawOutput;
|
||||
const outputStr =
|
||||
rawOutput !== undefined && rawOutput !== null
|
||||
? typeof rawOutput === "string"
|
||||
? rawOutput
|
||||
: JSON.stringify(rawOutput)
|
||||
: "";
|
||||
this.messages.push({
|
||||
role: "assistant",
|
||||
content: null,
|
||||
reasoning: null,
|
||||
tool_calls: [{ function: { name: toolName, arguments: pending?.args ?? "" } }],
|
||||
});
|
||||
this.messages.push({
|
||||
role: "tool",
|
||||
content: outputStr,
|
||||
reasoning: null,
|
||||
tool_calls: null,
|
||||
});
|
||||
this.pendingTools.delete(toolCallId);
|
||||
}
|
||||
case "tool_call_update":
|
||||
this.handleToolCallUpdate(update);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private handleAgentMessageChunk(update: Record<string, unknown>): void {
|
||||
const content = update.content as { type?: string; text?: string } | undefined;
|
||||
if (content?.type === "text" && typeof content.text === "string") {
|
||||
this.messageChunks.push(content.text);
|
||||
}
|
||||
}
|
||||
|
||||
private handleAgentThoughtChunk(update: Record<string, unknown>): void {
|
||||
const content = update.content as { type?: string; text?: string } | undefined;
|
||||
if (content?.type === "text" && typeof content.text === "string") {
|
||||
this.reasoningChunks.push(content.text);
|
||||
}
|
||||
}
|
||||
|
||||
private handleToolCall(update: Record<string, unknown>): void {
|
||||
const title = (update.title as string) ?? "";
|
||||
const rawInput = update.rawInput;
|
||||
const args = rawInput !== undefined && rawInput !== null ? JSON.stringify(rawInput) : "";
|
||||
const toolCallId = update.toolCallId as string;
|
||||
this.pendingTools.set(toolCallId, { name: title, args });
|
||||
this.flushAssistantMessage();
|
||||
}
|
||||
|
||||
private handleToolCallUpdate(update: Record<string, unknown>): void {
|
||||
const status = update.status as string | undefined;
|
||||
if (status !== "completed" && status !== "failed") return;
|
||||
const toolCallId = update.toolCallId as string;
|
||||
const pending = this.pendingTools.get(toolCallId);
|
||||
const toolName = pending?.name ?? toolCallId;
|
||||
const rawOutput = update.rawOutput;
|
||||
const outputStr =
|
||||
rawOutput !== undefined && rawOutput !== null
|
||||
? typeof rawOutput === "string"
|
||||
? rawOutput
|
||||
: JSON.stringify(rawOutput)
|
||||
: "";
|
||||
this.messages.push({
|
||||
role: "assistant",
|
||||
content: null,
|
||||
reasoning: null,
|
||||
tool_calls: [{ function: { name: toolName, arguments: pending?.args ?? "" } }],
|
||||
});
|
||||
this.messages.push({
|
||||
role: "tool",
|
||||
content: outputStr,
|
||||
reasoning: null,
|
||||
tool_calls: null,
|
||||
});
|
||||
this.pendingTools.delete(toolCallId);
|
||||
}
|
||||
|
||||
/** Flush any accumulated text/reasoning into an assistant message. */
|
||||
private flushAssistantMessage(): void {
|
||||
const text = this.messageChunks.join("");
|
||||
|
||||
@@ -21,14 +21,6 @@ function fail(message: string): never {
|
||||
throw new Error(message);
|
||||
}
|
||||
|
||||
function readEdgePrompt(): string {
|
||||
const value = process.env.UWF_EDGE_PROMPT;
|
||||
if (value === undefined || value === "") {
|
||||
fail("UWF_EDGE_PROMPT environment variable is required");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function walkChain(store: Store, schemas: AgentStore["schemas"], headHash: CasRef): ChainState {
|
||||
const headNode = store.get(headHash);
|
||||
if (headNode === null) {
|
||||
@@ -123,7 +115,11 @@ async function loadWorkflow(store: Store, schemas: AgentStore["schemas"], workfl
|
||||
* Build agent execution context from thread head in threads.yaml.
|
||||
* Walks the CAS chain from head to StartNode and expands step outputs.
|
||||
*/
|
||||
export async function buildContext(threadId: ThreadId, role: string): Promise<AgentContext> {
|
||||
export async function buildContext(
|
||||
threadId: ThreadId,
|
||||
role: string,
|
||||
edgePrompt: string,
|
||||
): Promise<AgentContext> {
|
||||
const storageRoot = resolveStorageRoot();
|
||||
const agentStore = await createAgentStore(storageRoot);
|
||||
const { store, schemas } = agentStore;
|
||||
@@ -142,7 +138,6 @@ export async function buildContext(threadId: ThreadId, role: string): Promise<Ag
|
||||
}
|
||||
|
||||
const steps = await buildHistory(store, chain.stepsNewestFirst);
|
||||
const edgePrompt = readEdgePrompt();
|
||||
const isFirstVisit = !steps.some((s) => s.role === role);
|
||||
|
||||
return {
|
||||
@@ -172,6 +167,7 @@ export type BuildContextMeta = {
|
||||
export async function buildContextWithMeta(
|
||||
threadId: ThreadId,
|
||||
role: string,
|
||||
edgePrompt: string,
|
||||
): Promise<AgentContext & { meta: BuildContextMeta }> {
|
||||
const storageRoot = resolveStorageRoot();
|
||||
const agentStore = await createAgentStore(storageRoot);
|
||||
@@ -191,7 +187,6 @@ export async function buildContextWithMeta(
|
||||
}
|
||||
|
||||
const steps = await buildHistory(store, chain.stepsNewestFirst);
|
||||
const edgePrompt = readEdgePrompt();
|
||||
const isFirstVisit = !steps.some((s) => s.role === role);
|
||||
|
||||
return {
|
||||
|
||||
@@ -22,16 +22,24 @@ function agentLabel(name: string): string {
|
||||
return `uwf-${name}`;
|
||||
}
|
||||
|
||||
function parseArgv(argv: string[]): { threadId: ThreadId; role: string } {
|
||||
const threadId = argv[2];
|
||||
const role = argv[3];
|
||||
if (threadId === undefined || threadId === "") {
|
||||
fail("usage: <agent-cli> <thread-id> <role>");
|
||||
const USAGE = "usage: <agent-cli> --thread <id> --role <role> --prompt <text>";
|
||||
|
||||
function getNamedArg(argv: string[], name: string): string {
|
||||
const idx = argv.indexOf(name);
|
||||
if (idx === -1 || idx + 1 >= argv.length) {
|
||||
return "";
|
||||
}
|
||||
if (role === undefined || role === "") {
|
||||
fail("usage: <agent-cli> <thread-id> <role>");
|
||||
}
|
||||
return { threadId: threadId as ThreadId, role };
|
||||
return argv[idx + 1];
|
||||
}
|
||||
|
||||
function parseArgv(argv: string[]): { threadId: ThreadId; role: string; prompt: string } {
|
||||
const threadId = getNamedArg(argv, "--thread");
|
||||
const role = getNamedArg(argv, "--role");
|
||||
const prompt = getNamedArg(argv, "--prompt");
|
||||
if (threadId === "") fail(USAGE);
|
||||
if (role === "") fail(USAGE);
|
||||
if (prompt === "") fail(USAGE);
|
||||
return { threadId: threadId as ThreadId, role, prompt };
|
||||
}
|
||||
|
||||
function runWithMessage<T>(label: string, fn: () => Promise<T>): Promise<T> {
|
||||
@@ -103,11 +111,11 @@ async function persistStep(options: {
|
||||
|
||||
export function createAgent(options: AgentOptions): () => Promise<void> {
|
||||
return async function main(): Promise<void> {
|
||||
const { threadId, role } = parseArgv(process.argv);
|
||||
const { threadId, role, prompt } = parseArgv(process.argv);
|
||||
const storageRoot = resolveStorageRoot();
|
||||
loadDotenv({ path: getEnvPath(storageRoot) });
|
||||
|
||||
const ctx = await runWithMessage("context", () => buildContextWithMeta(threadId, role));
|
||||
const ctx = await runWithMessage("context", () => buildContextWithMeta(threadId, role, prompt));
|
||||
|
||||
const roleDef = ctx.workflow.roles[role];
|
||||
if (roleDef === undefined) {
|
||||
|
||||
@@ -13,7 +13,7 @@ export type AgentContext = ModeratorContext & {
|
||||
*/
|
||||
outputFormatInstruction: string;
|
||||
/**
|
||||
* Edge prompt from the graph transition that led to this role (UWF_EDGE_PROMPT).
|
||||
* Edge prompt from the graph transition that led to this role (--prompt CLI arg).
|
||||
* Always the real moderator instruction for this step.
|
||||
*/
|
||||
edgePrompt: string;
|
||||
|
||||
@@ -15,6 +15,8 @@ export type {
|
||||
ProviderConfig,
|
||||
RoleDefinition,
|
||||
RoleName,
|
||||
RunningThreadItem,
|
||||
RunningThreadsOutput,
|
||||
Scenario,
|
||||
StartEntry,
|
||||
StartNodePayload,
|
||||
|
||||
@@ -84,6 +84,7 @@ export type StepOutput = {
|
||||
thread: ThreadId;
|
||||
head: CasRef;
|
||||
done: boolean;
|
||||
background: boolean | null;
|
||||
};
|
||||
|
||||
/** uwf thread steps — single step entry */
|
||||
@@ -126,6 +127,19 @@ export type ThreadListItem = {
|
||||
head: CasRef;
|
||||
};
|
||||
|
||||
/** uwf thread running — single running thread entry */
|
||||
export type RunningThreadItem = {
|
||||
thread: ThreadId;
|
||||
workflow: CasRef;
|
||||
pid: number;
|
||||
startedAt: number;
|
||||
};
|
||||
|
||||
/** uwf thread running output */
|
||||
export type RunningThreadsOutput = {
|
||||
threads: RunningThreadItem[];
|
||||
};
|
||||
|
||||
// ── 4.6 配置 ────────────────────────────────────────────────────────
|
||||
|
||||
/** Alias types for config references */
|
||||
|
||||
Executable
+89
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env bash
|
||||
# batch-solve.sh — solve multiple Gitea issues via solve-issue workflow
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/batch-solve.sh [--agent CMD] [--repo OWNER/REPO] [--count N] ISSUE_NUM...
|
||||
#
|
||||
# Examples:
|
||||
# ./scripts/batch-solve.sh 448 449
|
||||
# ./scripts/batch-solve.sh --agent "bun run $(pwd)/packages/workflow-agent-claude-code/src/cli.ts" 448 449
|
||||
# ./scripts/batch-solve.sh --repo uncaged/workflow --count 15 448 449
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
AGENT=""
|
||||
REPO="uncaged/workflow"
|
||||
COUNT=10
|
||||
ISSUES=()
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--agent) AGENT="$2"; shift 2 ;;
|
||||
--repo) REPO="$2"; shift 2 ;;
|
||||
--count) COUNT="$2"; shift 2 ;;
|
||||
*) ISSUES+=("$1"); shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${#ISSUES[@]} -eq 0 ]]; then
|
||||
echo "Usage: $0 [--agent CMD] [--repo OWNER/REPO] [--count N] ISSUE_NUM..." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
AGENT_FLAG=""
|
||||
if [[ -n "$AGENT" ]]; then
|
||||
AGENT_FLAG="--agent $AGENT"
|
||||
fi
|
||||
|
||||
TOTAL=${#ISSUES[@]}
|
||||
PASSED=0
|
||||
FAILED=0
|
||||
RESULTS=()
|
||||
|
||||
echo "━━━ Batch solve: ${TOTAL} issues ━━━"
|
||||
echo ""
|
||||
|
||||
for i in "${!ISSUES[@]}"; do
|
||||
ISSUE="${ISSUES[$i]}"
|
||||
NUM=$((i + 1))
|
||||
echo "┌─── [$NUM/$TOTAL] Issue #${ISSUE} ───"
|
||||
|
||||
# Read issue title
|
||||
TITLE=$(tea issues "$ISSUE" -r "$REPO" 2>/dev/null | head -1 | sed 's/^# #[0-9]* //' | sed 's/ (.*//' || echo "unknown")
|
||||
echo "│ Title: $TITLE"
|
||||
|
||||
# Start thread
|
||||
PROMPT="Fix issue #${ISSUE} in ${REPO}. Read the issue first with 'tea issues ${ISSUE} -r ${REPO}' for full spec."
|
||||
THREAD_JSON=$(uwf thread start solve-issue -p "$PROMPT" 2>&1)
|
||||
THREAD_ID=$(echo "$THREAD_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin)['thread'])")
|
||||
echo "│ Thread: $THREAD_ID"
|
||||
|
||||
# Run steps
|
||||
echo "│ Running (max $COUNT steps)..."
|
||||
# shellcheck disable=SC2086
|
||||
if STEP_OUTPUT=$(uwf thread step "$THREAD_ID" $AGENT_FLAG -c "$COUNT" 2>&1); then
|
||||
# Check if done
|
||||
LAST_DONE=$(echo "$STEP_OUTPUT" | python3 -c "import json,sys; lines=sys.stdin.read().strip(); data=json.loads(lines); print(data[-1].get('done', False))")
|
||||
if [[ "$LAST_DONE" == "True" ]]; then
|
||||
echo "│ ✅ Done!"
|
||||
PASSED=$((PASSED + 1))
|
||||
RESULTS+=("✅ #${ISSUE} — ${TITLE}")
|
||||
else
|
||||
echo "│ ⚠️ Ran out of steps (not done)"
|
||||
FAILED=$((FAILED + 1))
|
||||
RESULTS+=("⚠️ #${ISSUE} — ${TITLE} (incomplete)")
|
||||
fi
|
||||
else
|
||||
echo "│ ❌ Failed"
|
||||
FAILED=$((FAILED + 1))
|
||||
RESULTS+=("❌ #${ISSUE} — ${TITLE} (error)")
|
||||
fi
|
||||
|
||||
echo "└───"
|
||||
echo ""
|
||||
done
|
||||
|
||||
echo "━━━ Results: ${PASSED}/${TOTAL} passed, ${FAILED} failed ━━━"
|
||||
for R in "${RESULTS[@]}"; do
|
||||
echo " $R"
|
||||
done
|
||||
Reference in New Issue
Block a user