Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6481fc0cc5 | |||
| 3190e06ebe | |||
| f8ae2fe25b | |||
| 48a274685b | |||
| 5b68359dfc | |||
| c2ddfb8558 | |||
| 603018caf2 |
@@ -0,0 +1,67 @@
|
||||
# Sync README
|
||||
|
||||
When updating README.md files in this monorepo, follow these conventions.
|
||||
|
||||
## Scope
|
||||
|
||||
- Root `README.md` — project overview and navigation hub
|
||||
- Per-package `packages/*/README.md` — each package self-contained
|
||||
|
||||
## Root README Structure
|
||||
|
||||
The root README should have these sections in order:
|
||||
|
||||
1. **Title and one-liner** — stateless workflow engine driven by single-step CLI
|
||||
2. **Overview** — 2-3 paragraphs explaining what it does and key concepts
|
||||
3. **Architecture** — dependency layer diagram (text-based)
|
||||
4. **Packages** — table with ALL packages from packages/ directory, columns: Package, Description, Type (cli/lib/agent/app)
|
||||
5. **Quick Start** — install, build, register workflow, start thread, run step
|
||||
6. **CLI Reference** — brief command list, detailed usage in cli-workflow README
|
||||
7. **Development** — bun install / build / check / test
|
||||
|
||||
## Per-Package README Structure
|
||||
|
||||
Each package README should have:
|
||||
|
||||
1. **Title** — package name
|
||||
2. **One-line description** — matching package.json
|
||||
3. **Overview** — what it does, where it sits in the architecture, dependencies
|
||||
4. **Installation** — bun add (for libs) or "included as binary" (for cli/agents)
|
||||
5. **API** (lib packages) — all exports from src/index.ts with type signatures, grouped by category, minimal usage examples
|
||||
6. **CLI Usage** (cli/agent packages) — command reference with examples
|
||||
7. **Internal Structure** — brief src/ file organization
|
||||
8. **Configuration** (if applicable)
|
||||
|
||||
## Execution Steps
|
||||
|
||||
### Step 1: Gather current state
|
||||
For each package read:
|
||||
- package.json (name, version, description, dependencies, bin)
|
||||
- src/index.ts (public API exports)
|
||||
- Existing README.md (preserve hand-written content worth keeping)
|
||||
|
||||
### Step 2: Update root README
|
||||
- Ensure ALL packages in packages/ directory are listed in the table
|
||||
- Update CLI command reference from uwf --help output
|
||||
- Keep Quick Start examples valid
|
||||
|
||||
### Step 3: Write/update each package README
|
||||
- Follow the per-package structure
|
||||
- API section MUST match actual src/index.ts exports — never invent
|
||||
- For agent packages: document CLI binary name, how it is invoked
|
||||
- For lib packages: document exported types and functions
|
||||
- Internal structure: list actual files in src/
|
||||
|
||||
### Step 4: Verify
|
||||
- All relative links work
|
||||
- Package names match package.json
|
||||
- No references to removed/renamed packages
|
||||
- bun run build still passes
|
||||
|
||||
## Guidelines
|
||||
|
||||
- Only document what src/index.ts actually exports
|
||||
- Root README summarizes, package READMEs go into detail
|
||||
- Verify CLI examples against actual commands
|
||||
- Preserve existing good prose when updating
|
||||
- English for all README content
|
||||
@@ -382,10 +382,6 @@ describe("cmdThreadStepDetails", () => {
|
||||
content: "done",
|
||||
});
|
||||
});
|
||||
|
||||
test("throws when step hash does not exist", async () => {
|
||||
await expect(cmdThreadStepDetails(tmpDir, "nonexistenth0" as CasRef)).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
// ── cmdThreadRead: ### Prompt deduplication ───────────────────────────────────
|
||||
@@ -471,3 +467,181 @@ describe("cmdThreadRead ### Prompt deduplication", () => {
|
||||
expect(count).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
// ── cmdThreadRead: showStart / before / quota ─────────────────────────────────
|
||||
|
||||
describe("cmdThreadRead start section / before / quota", () => {
|
||||
async function makeSimpleThread(
|
||||
uwf: UwfStore,
|
||||
roles: string[],
|
||||
): Promise<{ startHash: CasRef; stepHashes: CasRef[] }> {
|
||||
const uniqueRoles = [...new Set(roles)];
|
||||
const workflowHash = await uwf.store.put(uwf.schemas.workflow, {
|
||||
name: "simple-wf",
|
||||
description: "desc",
|
||||
roles: Object.fromEntries(
|
||||
uniqueRoles.map((r) => [
|
||||
r,
|
||||
{
|
||||
description: r,
|
||||
goal: `Goal for ${r}`,
|
||||
capabilities: [],
|
||||
procedure: "Do stuff.",
|
||||
output: "Output.",
|
||||
meta: "placeholder00" as CasRef,
|
||||
},
|
||||
]),
|
||||
),
|
||||
conditions: {},
|
||||
graph: {},
|
||||
});
|
||||
const startHash = (await uwf.store.put(uwf.schemas.startNode, {
|
||||
workflow: workflowHash,
|
||||
prompt: "Initial prompt",
|
||||
})) as CasRef;
|
||||
const outputHash = await uwf.store.put(uwf.schemas.workflow, {
|
||||
name: "out",
|
||||
description: "",
|
||||
roles: {},
|
||||
conditions: {},
|
||||
graph: {},
|
||||
});
|
||||
|
||||
const stepHashes: CasRef[] = [];
|
||||
let prev: CasRef | null = null;
|
||||
for (const role of roles) {
|
||||
const stepHash = (await uwf.store.put(uwf.schemas.stepNode, {
|
||||
start: startHash,
|
||||
prev,
|
||||
role,
|
||||
output: outputHash,
|
||||
detail: null,
|
||||
agent: "uwf-test",
|
||||
})) as CasRef;
|
||||
stepHashes.push(stepHash);
|
||||
prev = stepHash;
|
||||
}
|
||||
return { startHash, stepHashes };
|
||||
}
|
||||
|
||||
test("showStart=true includes # Thread header and ## Task section", async () => {
|
||||
const uwf = await makeUwfStore(tmpDir);
|
||||
const { stepHashes } = await makeSimpleThread(uwf, ["roleA"]);
|
||||
const threadId = "01JTEST0000000000000006" as ThreadId;
|
||||
await saveThreadsIndex(tmpDir, { [threadId]: stepHashes[stepHashes.length - 1]! });
|
||||
|
||||
const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, true);
|
||||
expect(markdown).toContain("# Thread");
|
||||
expect(markdown).toContain("## Task");
|
||||
expect(markdown).toContain("Initial prompt");
|
||||
});
|
||||
|
||||
test("showStart=false with before=null still shows # Thread header (default behavior)", async () => {
|
||||
const uwf = await makeUwfStore(tmpDir);
|
||||
const { stepHashes } = await makeSimpleThread(uwf, ["roleA"]);
|
||||
const threadId = "01JTEST0000000000000007" as ThreadId;
|
||||
await saveThreadsIndex(tmpDir, { [threadId]: stepHashes[stepHashes.length - 1]! });
|
||||
|
||||
// When before=null, the start section is always shown regardless of showStart
|
||||
const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
|
||||
expect(markdown).toContain("# Thread");
|
||||
expect(markdown).toContain("## Task");
|
||||
});
|
||||
|
||||
test("before filter: only steps before the given hash appear", async () => {
|
||||
const uwf = await makeUwfStore(tmpDir);
|
||||
const { stepHashes } = await makeSimpleThread(uwf, ["roleA", "roleB", "roleC"]);
|
||||
const [_hashA, hashB, hashC] = stepHashes as [CasRef, CasRef, CasRef];
|
||||
const threadId = "01JTEST0000000000000008" as ThreadId;
|
||||
await saveThreadsIndex(tmpDir, { [threadId]: hashC });
|
||||
|
||||
const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, hashB, false);
|
||||
expect(markdown).toContain("roleA");
|
||||
expect(markdown).not.toContain("roleB");
|
||||
expect(markdown).not.toContain("roleC");
|
||||
});
|
||||
|
||||
test("quota=1 limits output and includes skip hint", async () => {
|
||||
const uwf = await makeUwfStore(tmpDir);
|
||||
const { stepHashes } = await makeSimpleThread(uwf, ["roleA", "roleB", "roleC"]);
|
||||
const threadId = "01JTEST000000000000000A" as ThreadId;
|
||||
await saveThreadsIndex(tmpDir, { [threadId]: stepHashes[stepHashes.length - 1]! });
|
||||
|
||||
const markdown = await cmdThreadRead(tmpDir, threadId, 1, null, false);
|
||||
expect(markdown).toContain("earlier step");
|
||||
});
|
||||
|
||||
test("all steps fit in quota: no skip hint", async () => {
|
||||
const uwf = await makeUwfStore(tmpDir);
|
||||
const { stepHashes } = await makeSimpleThread(uwf, ["roleA"]);
|
||||
const threadId = "01JTEST000000000000000B" as ThreadId;
|
||||
await saveThreadsIndex(tmpDir, { [threadId]: stepHashes[0]! });
|
||||
|
||||
const markdown = await cmdThreadRead(tmpDir, threadId, THREAD_READ_DEFAULT_QUOTA, null, false);
|
||||
expect(markdown).not.toContain("earlier step");
|
||||
});
|
||||
});
|
||||
|
||||
// ── Tests that call process.exit must be last ─────────────────────────────────
|
||||
|
||||
describe("cmdThreadStepDetails (process.exit tests - must be last)", () => {
|
||||
test("throws when step hash does not exist", async () => {
|
||||
await expect(cmdThreadStepDetails(tmpDir, "nonexistenth0" as CasRef)).rejects.toThrow();
|
||||
});
|
||||
|
||||
test("before with unknown hash rejects", async () => {
|
||||
const _uwf = await makeUwfStore(tmpDir);
|
||||
const casDir = join(tmpDir, "cas");
|
||||
await mkdir(casDir, { recursive: true });
|
||||
const store = createFsStore(casDir);
|
||||
const schemas = await registerUwfSchemas(store);
|
||||
const uwfStore: UwfStore = { storageRoot: tmpDir, store, schemas };
|
||||
|
||||
const workflowHash = await uwfStore.store.put(uwfStore.schemas.workflow, {
|
||||
name: "wf2",
|
||||
description: "",
|
||||
roles: {
|
||||
roleA: {
|
||||
description: "r",
|
||||
goal: "g",
|
||||
capabilities: [],
|
||||
procedure: "p",
|
||||
output: "o",
|
||||
meta: "placeholder00" as CasRef,
|
||||
},
|
||||
},
|
||||
conditions: {},
|
||||
graph: {},
|
||||
});
|
||||
const startHash = await uwfStore.store.put(uwfStore.schemas.startNode, {
|
||||
workflow: workflowHash,
|
||||
prompt: "p",
|
||||
});
|
||||
const outputHash = await uwfStore.store.put(uwfStore.schemas.workflow, {
|
||||
name: "out",
|
||||
description: "",
|
||||
roles: {},
|
||||
conditions: {},
|
||||
graph: {},
|
||||
});
|
||||
const stepHash = await uwfStore.store.put(uwfStore.schemas.stepNode, {
|
||||
start: startHash,
|
||||
prev: null,
|
||||
role: "roleA",
|
||||
output: outputHash,
|
||||
detail: null,
|
||||
agent: "uwf-test",
|
||||
});
|
||||
await saveThreadsIndex(tmpDir, { ["01JTEST000000000000000C" as ThreadId]: stepHash as CasRef });
|
||||
|
||||
await expect(
|
||||
cmdThreadRead(
|
||||
tmpDir,
|
||||
"01JTEST000000000000000C" as ThreadId,
|
||||
THREAD_READ_DEFAULT_QUOTA,
|
||||
"unknownhash0" as CasRef,
|
||||
false,
|
||||
),
|
||||
).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -462,49 +462,68 @@ function expandDeep(store: CasStore, hash: CasRef, visited?: Set<string>): unkno
|
||||
return expandValue(store, schema, node.payload, seen);
|
||||
}
|
||||
|
||||
function expandCasRefField(store: CasStore, value: unknown, visited: Set<string>): unknown {
|
||||
if (typeof value === "string") {
|
||||
return expandDeep(store, value as CasRef, visited);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function expandAnyOfField(
|
||||
store: CasStore,
|
||||
schema: JSONSchema,
|
||||
value: unknown,
|
||||
visited: Set<string>,
|
||||
): unknown {
|
||||
if (!Array.isArray(schema.anyOf)) return value;
|
||||
for (const sub of schema.anyOf as JSONSchema[]) {
|
||||
if (sub.format === "cas_ref" && typeof value === "string") {
|
||||
return expandDeep(store, value as CasRef, visited);
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function expandArrayField(
|
||||
store: CasStore,
|
||||
schema: JSONSchema,
|
||||
value: unknown,
|
||||
visited: Set<string>,
|
||||
): unknown {
|
||||
if (!schema.items || !Array.isArray(value)) return value;
|
||||
const itemSchema = schema.items as JSONSchema;
|
||||
return (value as unknown[]).map((item) => expandValue(store, itemSchema, item, visited));
|
||||
}
|
||||
|
||||
function expandObjectField(
|
||||
store: CasStore,
|
||||
schema: JSONSchema,
|
||||
value: unknown,
|
||||
visited: Set<string>,
|
||||
): unknown {
|
||||
if (value === null || typeof value !== "object" || Array.isArray(value) || !schema.properties) {
|
||||
return value;
|
||||
}
|
||||
const props = schema.properties as Record<string, JSONSchema>;
|
||||
const obj = value as Record<string, unknown>;
|
||||
const result: Record<string, unknown> = {};
|
||||
for (const [key, val] of Object.entries(obj)) {
|
||||
const propSchema = props[key];
|
||||
result[key] = propSchema ? expandValue(store, propSchema, val, visited) : val;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function expandValue(
|
||||
store: CasStore,
|
||||
schema: JSONSchema,
|
||||
value: unknown,
|
||||
visited: Set<string>,
|
||||
): unknown {
|
||||
// If this field is a cas_ref, expand it
|
||||
if (schema.format === "cas_ref") {
|
||||
if (typeof value === "string") {
|
||||
return expandDeep(store, value as CasRef, visited);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// anyOf (nullable refs)
|
||||
if (Array.isArray(schema.anyOf)) {
|
||||
for (const sub of schema.anyOf as JSONSchema[]) {
|
||||
if (sub.format === "cas_ref" && typeof value === "string") {
|
||||
return expandDeep(store, value as CasRef, visited);
|
||||
}
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// Array of cas_ref items
|
||||
if (schema.type === "array" && schema.items && Array.isArray(value)) {
|
||||
const itemSchema = schema.items as JSONSchema;
|
||||
return (value as unknown[]).map((item) => expandValue(store, itemSchema, item, visited));
|
||||
}
|
||||
|
||||
// Object with properties
|
||||
if (value !== null && typeof value === "object" && !Array.isArray(value) && schema.properties) {
|
||||
const props = schema.properties as Record<string, JSONSchema>;
|
||||
const obj = value as Record<string, unknown>;
|
||||
const result: Record<string, unknown> = {};
|
||||
for (const [key, val] of Object.entries(obj)) {
|
||||
const propSchema = props[key];
|
||||
result[key] = propSchema ? expandValue(store, propSchema, val, visited) : val;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
return value;
|
||||
if (schema.format === "cas_ref") return expandCasRefField(store, value, visited);
|
||||
if (Array.isArray(schema.anyOf)) return expandAnyOfField(store, schema, value, visited);
|
||||
if (schema.type === "array") return expandArrayField(store, schema, value, visited);
|
||||
return expandObjectField(store, schema, value, visited);
|
||||
}
|
||||
|
||||
function collectOrderedSteps(
|
||||
@@ -588,6 +607,85 @@ export function extractLastAssistantContent(uwf: UwfStore, detailRef: CasRef): s
|
||||
return null;
|
||||
}
|
||||
|
||||
function sliceBeforeHash(
|
||||
candidates: OrderedStepItem[],
|
||||
before: CasRef,
|
||||
threadId: ThreadId,
|
||||
): OrderedStepItem[] {
|
||||
const idx = candidates.findIndex((s) => s.hash === before);
|
||||
if (idx === -1) {
|
||||
fail(`step ${before} not found in thread ${threadId}`);
|
||||
}
|
||||
return candidates.slice(0, idx);
|
||||
}
|
||||
|
||||
function selectByQuota(
|
||||
candidates: OrderedStepItem[],
|
||||
uwf: UwfStore,
|
||||
quota: number,
|
||||
): { selected: OrderedStepItem[]; skippedCount: number } {
|
||||
const selected: OrderedStepItem[] = [];
|
||||
let totalChars = 0;
|
||||
for (let i = candidates.length - 1; i >= 0; i--) {
|
||||
const item = candidates[i];
|
||||
if (item === undefined) continue;
|
||||
const outputYaml = formatYaml(expandOutput(uwf, item.payload.output));
|
||||
const blockLen = formatCompactStep(i + 1, item, outputYaml).length;
|
||||
selected.unshift(item);
|
||||
totalChars += blockLen;
|
||||
if (totalChars > quota) break;
|
||||
}
|
||||
return { selected, skippedCount: candidates.length - selected.length };
|
||||
}
|
||||
|
||||
function formatStepHeader(stepNum: number, item: OrderedStepItem): string {
|
||||
const ts = new Date(item.timestamp)
|
||||
.toISOString()
|
||||
.replace("T", " ")
|
||||
.replace(/\.\d+Z$/, "");
|
||||
return [
|
||||
`## Step ${stepNum}: ${item.payload.role} \`${item.hash}\``,
|
||||
`**Agent:** ${item.payload.agent} | **Time:** ${ts}`,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function formatStepPrompt(
|
||||
roleDef: WorkflowPayload["roles"][string] | undefined,
|
||||
role: string,
|
||||
shownPromptRoles: Set<string>,
|
||||
): string {
|
||||
if (!roleDef || shownPromptRoles.has(role)) return "";
|
||||
shownPromptRoles.add(role);
|
||||
return ["", "", "### Prompt", "", roleDef.goal].join("\n");
|
||||
}
|
||||
|
||||
function formatStepContent(uwf: UwfStore, item: OrderedStepItem): string {
|
||||
if (!item.payload.detail) return "";
|
||||
const content = extractLastAssistantContent(uwf, item.payload.detail);
|
||||
if (content === null) return "";
|
||||
return ["", "", "### Content", "", content].join("\n");
|
||||
}
|
||||
|
||||
function formatStartSection(options: {
|
||||
threadId: ThreadId;
|
||||
workflowName: string;
|
||||
workflowHash: CasRef;
|
||||
prompt: string;
|
||||
before: CasRef | null;
|
||||
showStart: boolean;
|
||||
}): string {
|
||||
if (options.before !== null && !options.showStart) return "";
|
||||
return [
|
||||
`# Thread \`${options.threadId}\``,
|
||||
"",
|
||||
`**Workflow:** ${options.workflowName} (\`${options.workflowHash}\`)`,
|
||||
"",
|
||||
"## Task",
|
||||
"",
|
||||
options.prompt,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function formatThreadReadMarkdown(options: {
|
||||
threadId: ThreadId;
|
||||
workflowName: string;
|
||||
@@ -600,50 +698,16 @@ function formatThreadReadMarkdown(options: {
|
||||
before: CasRef | null;
|
||||
showStart: boolean;
|
||||
}): string {
|
||||
const { ordered, uwf, workflow, quota, before, showStart } = options;
|
||||
const { ordered, uwf, workflow, quota, before } = options;
|
||||
|
||||
// Determine which steps to consider
|
||||
let candidates = ordered;
|
||||
if (before !== null) {
|
||||
const idx = candidates.findIndex((s) => s.hash === before);
|
||||
if (idx === -1) {
|
||||
fail(`step ${before} not found in thread ${options.threadId}`);
|
||||
}
|
||||
candidates = candidates.slice(0, idx);
|
||||
}
|
||||
const candidates = before !== null ? sliceBeforeHash(ordered, before, options.threadId) : ordered;
|
||||
const { selected, skippedCount } = selectByQuota(candidates, uwf, quota);
|
||||
|
||||
// Walk backward from newest, accumulating chars until quota exceeded
|
||||
const selected: OrderedStepItem[] = [];
|
||||
let totalChars = 0;
|
||||
for (let i = candidates.length - 1; i >= 0; i--) {
|
||||
const item = candidates[i];
|
||||
if (item === undefined) continue;
|
||||
const outputYaml = formatYaml(expandOutput(uwf, item.payload.output));
|
||||
const blockLen = formatCompactStep(i + 1, item, outputYaml).length;
|
||||
selected.unshift(item);
|
||||
totalChars += blockLen;
|
||||
if (totalChars > quota) break;
|
||||
}
|
||||
|
||||
const skippedCount = candidates.length - selected.length;
|
||||
const parts: string[] = [];
|
||||
|
||||
// Start section
|
||||
if (before === null || showStart) {
|
||||
parts.push(
|
||||
[
|
||||
`# Thread \`${options.threadId}\``,
|
||||
"",
|
||||
`**Workflow:** ${options.workflowName} (\`${options.workflowHash}\`)`,
|
||||
"",
|
||||
"## Task",
|
||||
"",
|
||||
options.prompt,
|
||||
].join("\n"),
|
||||
);
|
||||
}
|
||||
const startSection = formatStartSection(options);
|
||||
if (startSection !== "") parts.push(startSection);
|
||||
|
||||
// Skip hint
|
||||
if (skippedCount > 0 && selected.length > 0) {
|
||||
const firstSelected = selected[0];
|
||||
if (firstSelected !== undefined) {
|
||||
@@ -653,34 +717,21 @@ function formatThreadReadMarkdown(options: {
|
||||
}
|
||||
}
|
||||
|
||||
// Step blocks
|
||||
const startIndex = candidates.length - selected.length;
|
||||
const shownPromptRoles = new Set<string>();
|
||||
for (let i = 0; i < selected.length; i++) {
|
||||
const item = selected[i];
|
||||
if (item === undefined) continue;
|
||||
const stepNum = startIndex + i + 1;
|
||||
const ts = new Date(item.timestamp)
|
||||
.toISOString()
|
||||
.replace("T", " ")
|
||||
.replace(/\.\d+Z$/, "");
|
||||
const stepLines = [
|
||||
`## Step ${stepNum}: ${item.payload.role} \`${item.hash}\``,
|
||||
`**Agent:** ${item.payload.agent} | **Time:** ${ts}`,
|
||||
];
|
||||
const roleDef = workflow.roles[item.payload.role];
|
||||
if (roleDef && !shownPromptRoles.has(item.payload.role)) {
|
||||
const prompt = roleDef.goal;
|
||||
stepLines.push("", "### Prompt", "", prompt);
|
||||
shownPromptRoles.add(item.payload.role);
|
||||
}
|
||||
if (item.payload.detail) {
|
||||
const content = extractLastAssistantContent(uwf, item.payload.detail);
|
||||
if (content !== null) {
|
||||
stepLines.push("", "### Content", "", content);
|
||||
}
|
||||
}
|
||||
parts.push(stepLines.join("\n"));
|
||||
const stepBlock = [
|
||||
formatStepHeader(stepNum, item),
|
||||
formatStepPrompt(roleDef, item.payload.role, shownPromptRoles),
|
||||
formatStepContent(uwf, item),
|
||||
]
|
||||
.filter((s) => s !== "")
|
||||
.join("");
|
||||
parts.push(stepBlock);
|
||||
}
|
||||
|
||||
return parts.join("\n\n---\n\n");
|
||||
|
||||
@@ -0,0 +1,156 @@
|
||||
import { beforeEach, describe, expect, mock, test } from "bun:test";
|
||||
|
||||
const mockChatCompletionWithTools = mock(async () => ({
|
||||
content: "---\nstatus: done\n---",
|
||||
toolCalls: [],
|
||||
}));
|
||||
const mockAppendSessionTurn = mock(async () => {});
|
||||
const mockExecuteBuiltinTool = mock(async () => "tool-result");
|
||||
|
||||
mock.module("../src/llm/index.js", () => ({
|
||||
chatCompletionWithTools: mockChatCompletionWithTools,
|
||||
}));
|
||||
mock.module("../src/session.js", () => ({
|
||||
appendSessionTurn: mockAppendSessionTurn,
|
||||
}));
|
||||
mock.module("../src/tools/index.js", () => ({
|
||||
builtinToolsToOpenAi: () => [],
|
||||
executeBuiltinTool: mockExecuteBuiltinTool,
|
||||
getBuiltinTools: () => [],
|
||||
}));
|
||||
|
||||
import { executeTurnTools, runBuiltinLoop, shouldNudge } from "../src/loop.js";
|
||||
|
||||
const fakeProvider = {} as any;
|
||||
const fakeToolCtx = {} as any;
|
||||
|
||||
function makeOptions(overrides: Partial<Parameters<typeof runBuiltinLoop>[0]> = {}) {
|
||||
return {
|
||||
provider: fakeProvider,
|
||||
messages: [{ role: "system" as const, content: "sys" }],
|
||||
toolCtx: fakeToolCtx,
|
||||
maxTurns: 5,
|
||||
storageRoot: "/tmp",
|
||||
sessionId: "sess",
|
||||
noTools: false,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
mockChatCompletionWithTools.mockReset();
|
||||
mockAppendSessionTurn.mockReset();
|
||||
mockExecuteBuiltinTool.mockReset();
|
||||
});
|
||||
|
||||
describe("shouldNudge", () => {
|
||||
test("2.1 returns true when all conditions met", () => {
|
||||
expect(shouldNudge({ noTools: false, text: "some text", turn: 0, maxTurns: 5 })).toBe(true);
|
||||
});
|
||||
test("2.2 returns false when noTools=true", () => {
|
||||
expect(shouldNudge({ noTools: true, text: "some text", turn: 0, maxTurns: 5 })).toBe(false);
|
||||
});
|
||||
test("2.3 returns false when text starts with ---", () => {
|
||||
expect(shouldNudge({ noTools: false, text: "---\nstatus: done", turn: 0, maxTurns: 5 })).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
test("2.4 returns false on last turn", () => {
|
||||
expect(shouldNudge({ noTools: false, text: "some text", turn: 4, maxTurns: 5 })).toBe(false);
|
||||
});
|
||||
test("2.5 returns true on second-to-last turn", () => {
|
||||
expect(shouldNudge({ noTools: false, text: "some text", turn: 3, maxTurns: 5 })).toBe(true);
|
||||
});
|
||||
test("2.6 leading whitespace before --- suppresses nudge", () => {
|
||||
expect(shouldNudge({ noTools: false, text: " ---\nstatus: done", turn: 0, maxTurns: 5 })).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("executeTurnTools", () => {
|
||||
test("4.1 executes each tool call and pushes tool result messages", async () => {
|
||||
mockExecuteBuiltinTool.mockResolvedValue("result");
|
||||
const messages: any[] = [];
|
||||
const calls = [
|
||||
{ id: "c1", name: "tool_a", arguments: "{}" },
|
||||
{ id: "c2", name: "tool_b", arguments: "{}" },
|
||||
];
|
||||
const count = await executeTurnTools(calls, fakeToolCtx, messages, "/tmp", "sess");
|
||||
expect(messages.length).toBe(2);
|
||||
expect(messages[0].role).toBe("tool");
|
||||
expect(messages[1].role).toBe("tool");
|
||||
expect(count).toBe(2);
|
||||
});
|
||||
test("4.2 tool result content matches executeBuiltinTool return value", async () => {
|
||||
mockExecuteBuiltinTool.mockResolvedValue("result-A");
|
||||
const messages: any[] = [];
|
||||
await executeTurnTools(
|
||||
[{ id: "c1", name: "read_file", arguments: "{}" }],
|
||||
fakeToolCtx,
|
||||
messages,
|
||||
"/tmp",
|
||||
"sess",
|
||||
);
|
||||
expect(messages[0].content).toBe("result-A");
|
||||
});
|
||||
});
|
||||
|
||||
describe("runBuiltinLoop integration", () => {
|
||||
test("3.1 single text-only response returns finalText immediately", async () => {
|
||||
mockChatCompletionWithTools.mockResolvedValue({
|
||||
content: "---\nstatus: done\n---",
|
||||
toolCalls: [],
|
||||
});
|
||||
const result = await runBuiltinLoop(makeOptions());
|
||||
expect(result.finalText).toBe("---\nstatus: done\n---");
|
||||
expect(result.turnCount).toBe(1);
|
||||
});
|
||||
test("3.2 noTools=true suppresses tool calls", async () => {
|
||||
mockChatCompletionWithTools.mockResolvedValue({
|
||||
content: "ok",
|
||||
toolCalls: [{ id: "c1", name: "read_file", arguments: "{}" }],
|
||||
});
|
||||
const result = await runBuiltinLoop(makeOptions({ noTools: true }));
|
||||
expect(result.finalText).toBe("ok");
|
||||
expect(result.turnCount).toBe(1);
|
||||
});
|
||||
test("3.3 tool call followed by text response", async () => {
|
||||
mockChatCompletionWithTools
|
||||
.mockResolvedValueOnce({
|
||||
content: null,
|
||||
toolCalls: [{ id: "c1", name: "read_file", arguments: "{}" }],
|
||||
})
|
||||
.mockResolvedValueOnce({ content: "---\nstatus: done\n---", toolCalls: [] });
|
||||
mockExecuteBuiltinTool.mockResolvedValue("file contents");
|
||||
const result = await runBuiltinLoop(makeOptions());
|
||||
expect(result.finalText).toBe("---\nstatus: done\n---");
|
||||
expect(result.turnCount).toBe(3);
|
||||
});
|
||||
test("3.4 nudge cycle inserts nudge message", async () => {
|
||||
mockChatCompletionWithTools
|
||||
.mockResolvedValueOnce({ content: "I am thinking", toolCalls: [] })
|
||||
.mockResolvedValueOnce({ content: "---\nstatus: done\n---", toolCalls: [] });
|
||||
const result = await runBuiltinLoop(makeOptions());
|
||||
expect(result.finalText).toBe("---\nstatus: done\n---");
|
||||
const nudgeMsg = result.messages.find(
|
||||
(m) =>
|
||||
m.role === "user" && typeof m.content === "string" && m.content.includes("frontmatter"),
|
||||
);
|
||||
expect(nudgeMsg).toBeDefined();
|
||||
});
|
||||
test("3.5 maxTurns exhaustion falls back to last assistant content", async () => {
|
||||
mockChatCompletionWithTools.mockResolvedValue({ content: "still thinking", toolCalls: [] });
|
||||
const result = await runBuiltinLoop(makeOptions({ maxTurns: 3 }));
|
||||
expect(result.finalText).toBe("still thinking");
|
||||
});
|
||||
test("3.6 original messages array is not mutated", async () => {
|
||||
mockChatCompletionWithTools.mockResolvedValue({
|
||||
content: "---\nstatus: done\n---",
|
||||
toolCalls: [],
|
||||
});
|
||||
const original = [{ role: "system" as const, content: "sys" }];
|
||||
await runBuiltinLoop(makeOptions({ messages: original }));
|
||||
expect(original.length).toBe(1);
|
||||
});
|
||||
});
|
||||
@@ -48,7 +48,7 @@ async function appendTurn(
|
||||
await appendSessionTurn(storageRoot, sessionId, payload);
|
||||
}
|
||||
|
||||
async function executeTurnTools(
|
||||
export async function executeTurnTools(
|
||||
calls: Array<{ id: string; name: string; arguments: string }>,
|
||||
toolCtx: ToolContext,
|
||||
messages: ChatMessage[],
|
||||
@@ -70,6 +70,20 @@ async function executeTurnTools(
|
||||
return turnCount;
|
||||
}
|
||||
|
||||
export type ShouldNudgeOptions = {
|
||||
noTools: boolean;
|
||||
text: string;
|
||||
turn: number;
|
||||
maxTurns: number;
|
||||
};
|
||||
|
||||
const MAX_NUDGES = 3;
|
||||
const DEADLINE_WARNING_TURNS = 3;
|
||||
|
||||
export function shouldNudge({ noTools, text, turn, maxTurns }: ShouldNudgeOptions): boolean {
|
||||
return !noTools && !text.trimStart().startsWith("---") && turn < maxTurns - 1;
|
||||
}
|
||||
|
||||
/** Agent run loop: LLM ↔ tools until no tool_calls or maxTurns. */
|
||||
export async function runBuiltinLoop(
|
||||
options: RunBuiltinLoopOptions,
|
||||
@@ -78,23 +92,43 @@ export async function runBuiltinLoop(
|
||||
const openAiTools = options.noTools ? [] : builtinToolsToOpenAi(getBuiltinTools());
|
||||
let finalText = "";
|
||||
let turnCount = 0;
|
||||
let nudgeCount = 0;
|
||||
let deadlineWarned = false;
|
||||
|
||||
for (let turn = 0; turn < options.maxTurns; turn++) {
|
||||
log("8K2M4N7P", `builtin loop turn ${turn + 1}/${options.maxTurns}`);
|
||||
|
||||
// Warn agent when approaching turn limit
|
||||
const turnsRemaining = options.maxTurns - turn;
|
||||
if (!options.noTools && !deadlineWarned && turnsRemaining <= DEADLINE_WARNING_TURNS) {
|
||||
deadlineWarned = true;
|
||||
log("4NRXW6KT", `${turnsRemaining} turns remaining, injecting deadline warning`);
|
||||
messages.push({
|
||||
role: "user",
|
||||
content:
|
||||
`⚠️ You have ${turnsRemaining} turns remaining. ` +
|
||||
"Wrap up your work and output the YAML frontmatter starting with `---`. " +
|
||||
"If you cannot finish in time, output frontmatter with `status: failed` and describe what remains.",
|
||||
});
|
||||
}
|
||||
|
||||
const response = await chatCompletionWithTools(
|
||||
options.provider,
|
||||
messages,
|
||||
openAiTools.length > 0 ? openAiTools : null,
|
||||
);
|
||||
|
||||
// When noTools is set, ignore any tool_calls the LLM might still return
|
||||
const effectiveToolCalls = options.noTools ? null : (response.toolCalls ?? null);
|
||||
|
||||
const assistantMessage: ChatMessage = {
|
||||
role: "assistant",
|
||||
content: response.content,
|
||||
tool_calls: response.toolCalls,
|
||||
tool_calls: effectiveToolCalls,
|
||||
};
|
||||
messages.push(assistantMessage);
|
||||
|
||||
if (response.toolCalls === null || response.toolCalls.length === 0) {
|
||||
if (effectiveToolCalls === null || effectiveToolCalls.length === 0) {
|
||||
const text = response.content ?? "";
|
||||
await appendTurn(options.storageRoot, options.sessionId, {
|
||||
role: "assistant",
|
||||
@@ -104,14 +138,17 @@ export async function runBuiltinLoop(
|
||||
});
|
||||
turnCount += 1;
|
||||
|
||||
// If tools are available but LLM stopped calling them without producing
|
||||
// frontmatter, nudge it to continue working or output frontmatter.
|
||||
if (!options.noTools && !text.trimStart().startsWith("---") && turn < options.maxTurns - 1) {
|
||||
log("7FXQM2KN", "text-only turn without frontmatter, nudging LLM to continue");
|
||||
if (shouldNudge({ noTools: options.noTools, text, turn, maxTurns: options.maxTurns })) {
|
||||
nudgeCount += 1;
|
||||
log("7FXQM2KN", `text-only turn without frontmatter, nudge ${nudgeCount}/${MAX_NUDGES}`);
|
||||
const nudge =
|
||||
"You stopped calling tools but your response does not start with the required `---` YAML frontmatter. " +
|
||||
"Either continue using tools to complete your work, or output your final response starting with `---`.";
|
||||
messages.push({ role: "user", content: nudge });
|
||||
// Nudge doesn't consume turn budget (up to MAX_NUDGES)
|
||||
if (nudgeCount <= MAX_NUDGES) {
|
||||
turn -= 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -123,14 +160,14 @@ export async function runBuiltinLoop(
|
||||
await appendTurn(options.storageRoot, options.sessionId, {
|
||||
role: "assistant",
|
||||
content: response.content ?? "",
|
||||
toolCalls: mapToolCallsForPayload(response.toolCalls),
|
||||
toolCalls: mapToolCallsForPayload(effectiveToolCalls),
|
||||
reasoning: null,
|
||||
});
|
||||
turnCount += 1;
|
||||
|
||||
// Execute tools
|
||||
turnCount += await executeTurnTools(
|
||||
response.toolCalls,
|
||||
effectiveToolCalls,
|
||||
options.toolCtx,
|
||||
messages,
|
||||
options.storageRoot,
|
||||
|
||||
@@ -70,6 +70,7 @@ export function buildBuiltinMessages(ctx: AgentContext): ChatMessage[] {
|
||||
"Your task is described in the user message below — do NOT use uwf or workflow CLI commands to discover your task. " +
|
||||
"When you are done, output your final response with the YAML frontmatter block as specified above. " +
|
||||
"Do NOT output the frontmatter until you have completed all necessary work. " +
|
||||
"If you are running low on turns and cannot finish, output the frontmatter with `status: failed` and explain what remains in the body. " +
|
||||
"CRITICAL: Your final output MUST start with the `---` fence on the very first line — " +
|
||||
"no preamble text, no explanation before it. The parser requires `---` at position 0.",
|
||||
);
|
||||
|
||||
@@ -16,6 +16,7 @@ const log = createLogger({ sink: { kind: "stderr" } });
|
||||
|
||||
const CLAUDE_COMMAND = "claude";
|
||||
const CLAUDE_MAX_TURNS = 90;
|
||||
const CLAUDE_MODEL = process.env["CLAUDE_MODEL"] ?? null;
|
||||
|
||||
function buildHistorySummary(steps: AgentContext["steps"]): string {
|
||||
if (steps.length === 0) {
|
||||
@@ -87,7 +88,7 @@ function spawnClaude(args: string[]): Promise<{ stdout: string; stderr: string }
|
||||
}
|
||||
|
||||
function spawnClaudeRun(prompt: string): Promise<{ stdout: string; stderr: string }> {
|
||||
return spawnClaude([
|
||||
const args = [
|
||||
"-p",
|
||||
prompt,
|
||||
"--output-format",
|
||||
@@ -96,14 +97,18 @@ function spawnClaudeRun(prompt: string): Promise<{ stdout: string; stderr: strin
|
||||
"--dangerously-skip-permissions",
|
||||
"--max-turns",
|
||||
String(CLAUDE_MAX_TURNS),
|
||||
]);
|
||||
];
|
||||
if (CLAUDE_MODEL !== null) {
|
||||
args.push("--model", CLAUDE_MODEL);
|
||||
}
|
||||
return spawnClaude(args);
|
||||
}
|
||||
|
||||
function spawnClaudeResume(
|
||||
sessionId: string,
|
||||
message: string,
|
||||
): Promise<{ stdout: string; stderr: string }> {
|
||||
return spawnClaude([
|
||||
const args = [
|
||||
"-p",
|
||||
message,
|
||||
"--resume",
|
||||
@@ -114,7 +119,11 @@ function spawnClaudeResume(
|
||||
"--dangerously-skip-permissions",
|
||||
"--max-turns",
|
||||
String(CLAUDE_MAX_TURNS),
|
||||
]);
|
||||
];
|
||||
if (CLAUDE_MODEL !== null) {
|
||||
args.push("--model", CLAUDE_MODEL);
|
||||
}
|
||||
return spawnClaude(args);
|
||||
}
|
||||
|
||||
async function processClaudeOutput(stdout: string, store: Store): Promise<AgentRunResult> {
|
||||
|
||||
Reference in New Issue
Block a user