feat: LLM-Agent-LLM sandwich (agent-executor)

- createAgentExecutorRole: prepPrompt → agent.run → LLM₂ tool_choice → meta
- createCursorRunner: extracts Cursor CLI into AgentRunner interface
- Coder role: LLM₂ extracts filesChanged + testsPassed via tool_choice
- Reviewer role: LLM₂ extracts verdict via tool_choice (no more includes hack)
- defaultMeta fallback when LLM₂ fails or returns no tool_call
- 5 new agent-executor tests, 24 total pass
This commit is contained in:
2026-04-17 06:27:19 +00:00
parent 5a8fe051bf
commit f191d594b3
5 changed files with 437 additions and 145 deletions
+7
View File
@@ -5,6 +5,13 @@
*/
export { createCodingWorkflow } from './coding-workflow.js';
export {
type AgentExecutorConfig,
type AgentResult,
type AgentRunner,
createAgentExecutorRole,
createCursorRunner,
} from './roles/agent-executor.js';
export { createArchitectRole } from './roles/architect-llm.js';
export { createCoderRole } from './roles/coder-cursor.js';
export { createReviewerRole } from './roles/reviewer-cursor.js';
@@ -0,0 +1,153 @@
/**
* Agent Executor tests — LLM-Agent-LLM sandwich pattern.
*
* 小橘 🍊 (NEKO Team)
*/
import { describe, expect, it } from 'bun:test';
import type { LlmClient } from '../../llm-client.js';
import type { WorkflowMessage } from '../workflow-type.js';
import {
type AgentExecutorConfig,
type AgentRunner,
createAgentExecutorRole,
} from './agent-executor.js';
function mockAgent(output: string): AgentRunner {
return {
run: async () => ({ success: true, output, durationMs: 100 }),
};
}
function mockLlm(toolArgs: string): LlmClient {
return {
chat: async () => ({
tool_calls: [
{
id: 'call_1',
function: { name: 'extract', arguments: toolArgs },
},
],
}),
};
}
function mockLlmNoTool(): LlmClient {
return {
chat: async () => ({ content: 'no tool call' }),
};
}
function mockLlmError(): LlmClient {
return {
chat: async () => {
throw new Error('LLM down');
},
};
}
type TestMeta = { verdict: 'approved' | 'rejected' };
const testConfig: AgentExecutorConfig<TestMeta> = {
prepPrompt: (chain) => {
const start = chain.find((m) => m.role === '__start__');
return { prompt: `Review: ${start?.content ?? ''}`, cwd: '/tmp' };
},
parseMeta: {
system: 'Extract verdict.',
tool: {
type: 'function',
function: {
name: 'extract',
description: 'Extract verdict',
parameters: {
type: 'object',
properties: {
verdict: { type: 'string', enum: ['approved', 'rejected'] },
},
required: ['verdict'],
},
},
},
parse: (args) => JSON.parse(args),
defaultMeta: () => ({ verdict: 'approved' as const }),
},
};
const chain: WorkflowMessage[] = [
{
role: '__start__',
content: 'test task',
meta: {},
timestamp: Date.now(),
},
];
describe('createAgentExecutorRole', () => {
it('sandwich: agent output → LLM₂ tool_choice → structured meta', async () => {
const role = createAgentExecutorRole(
mockAgent('All changes look good. APPROVED.'),
mockLlm('{"verdict":"approved"}'),
testConfig,
);
const result = await role(chain, 't1', null as any);
expect(result.content).toBe('All changes look good. APPROVED.');
expect(result.meta).toEqual({ verdict: 'approved' });
});
it('LLM₂ returns rejected verdict', async () => {
const role = createAgentExecutorRole(
mockAgent('Code has issues. REJECTED.'),
mockLlm('{"verdict":"rejected"}'),
testConfig,
);
const result = await role(chain, 't1', null as any);
expect(result.meta).toEqual({ verdict: 'rejected' });
});
it('falls back to defaultMeta when LLM₂ returns no tool_call', async () => {
const role = createAgentExecutorRole(
mockAgent('Some output'),
mockLlmNoTool(),
testConfig,
);
const result = await role(chain, 't1', null as any);
expect(result.content).toBe('Some output');
expect(result.meta).toEqual({ verdict: 'approved' }); // defaultMeta
});
it('falls back to defaultMeta when LLM₂ throws', async () => {
const role = createAgentExecutorRole(
mockAgent('Agent worked fine'),
mockLlmError(),
testConfig,
);
const result = await role(chain, 't1', null as any);
expect(result.content).toBe('Agent worked fine');
expect(result.meta).toEqual({ verdict: 'approved' });
});
it('prepPrompt receives chain and topicId', async () => {
let capturedTopicId = '';
const config: AgentExecutorConfig<TestMeta> = {
...testConfig,
prepPrompt: (_c, topicId) => {
capturedTopicId = topicId;
return { prompt: 'test', cwd: '/tmp' };
},
};
const role = createAgentExecutorRole(
mockAgent('output'),
mockLlm('{"verdict":"approved"}'),
config,
);
await role(chain, 'my-topic', null as any);
expect(capturedTopicId).toBe('my-topic');
});
});
@@ -0,0 +1,152 @@
/**
* Agent Executor — LLM-Agent-LLM sandwich pattern.
*
* 1. LLM₁ (prep): prepPrompt builds the agent's prompt
* 2. Agent (exec): runs CLI agent, gets free-text report
* 3. LLM₂ (parse): extracts structured meta via tool_choice
*
* 小橘 🍊 (NEKO Team)
*/
import { mkdirSync, unlinkSync, writeFileSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { LlmClient, LlmTool } from '../../llm-client.js';
import type { Role, RoleResult, WorkflowMessage } from '../workflow-type.js';
// ── Agent runner ───────────────────────────────────────────────
export interface AgentResult {
success: boolean;
output: string;
durationMs: number;
}
export interface AgentRunner {
run(prompt: string, cwd: string): Promise<AgentResult>;
}
/**
* Default agent runner — Cursor CLI.
*/
export function createCursorRunner(opts: {
agentBin: string;
timeoutMs?: number;
}): AgentRunner {
const { agentBin, timeoutMs = 300_000 } = opts;
return {
async run(prompt, cwd) {
const promptDir = join(tmpdir(), 'pulse-v2-prompts');
mkdirSync(promptDir, { recursive: true });
const promptFile = join(
promptDir,
`task-${Date.now()}-${Math.random().toString(36).slice(2, 6)}.md`,
);
writeFileSync(promptFile, prompt, 'utf-8');
const cursorApiKey = await (async () => {
const p = Bun.spawn(
['sh', '-c', 'secret get CURSOR_API_KEY | head -1'],
{
stdout: 'pipe',
},
);
await p.exited;
return (await new Response(p.stdout).text()).trim();
})();
const start = Date.now();
const proc = Bun.spawn(
[agentBin, '--yolo', '-p', '--output-format', 'text', '-f', promptFile],
{
cwd,
env: { ...process.env, CURSOR_API_KEY: cursorApiKey },
stdout: 'pipe',
stderr: 'pipe',
},
);
const timer = setTimeout(() => proc.kill(), timeoutMs);
const exitCode = await proc.exited;
clearTimeout(timer);
const stdout = await new Response(proc.stdout).text();
const stderr = await new Response(proc.stderr).text();
try {
unlinkSync(promptFile);
} catch {}
return {
success: exitCode === 0,
output: stdout.trim() || stderr.trim(),
durationMs: Date.now() - start,
};
},
};
}
// ── Agent Executor Role factory ────────────────────────────────
export interface AgentExecutorConfig<Meta> {
/** Build prompt + cwd for the agent. */
prepPrompt: (
chain: WorkflowMessage[],
topicId: string,
) => { prompt: string; cwd: string };
/** LLM₂ structured output: tool definition for meta extraction. */
parseMeta: {
/** System prompt for the meta-extraction LLM call. */
system: string;
/** Tool definition — parameters schema defines Meta shape. */
tool: LlmTool;
/** Parse tool_call arguments into Meta. Falls back to defaultMeta on failure. */
parse: (args: string) => Meta;
/** Fallback when LLM₂ fails or returns no tool_call. */
defaultMeta: (output: string) => Meta;
};
}
/**
* Create a pure Role from an agent executor config.
* The Role runs: prepPrompt → agent → LLM₂ parse → { content, meta }.
*/
export function createAgentExecutorRole<Meta>(
agent: AgentRunner,
llm: LlmClient,
config: AgentExecutorConfig<Meta>,
): Role<Meta> {
return async (chain, topicId): Promise<RoleResult<Meta>> => {
// 1. LLM₁ prep (built into config.prepPrompt — no LLM call needed for prompt templates)
const { prompt, cwd } = config.prepPrompt(chain, topicId);
// 2. Agent exec
const result = await agent.run(prompt, cwd);
// 3. LLM₂ parse meta
let meta: Meta;
try {
const resp = await llm.chat({
messages: [
{ role: 'system', content: config.parseMeta.system },
{ role: 'user', content: result.output },
],
tools: [config.parseMeta.tool],
tool_choice: 'required',
});
const toolCall = resp.tool_calls?.[0];
if (toolCall) {
meta = config.parseMeta.parse(toolCall.function.arguments);
} else {
meta = config.parseMeta.defaultMeta(result.output);
}
} catch {
meta = config.parseMeta.defaultMeta(result.output);
}
return { content: result.output, meta };
};
}
@@ -1,26 +1,38 @@
/**
* Coder role — uses Cursor agent to implement code changes.
* Pure: returns { content, meta }, adapter writes events.
* Coder role — LLM-Agent-LLM sandwich via agent executor.
*
* 小橘 🍊 (NEKO Team)
*/
import { mkdirSync, unlinkSync, writeFileSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { LlmClient } from '../../llm-client.js';
import type { CoderMeta } from '../coding-workflow.js';
import type { Role, RoleResult } from '../workflow-type.js';
import type { Role } from '../workflow-type.js';
import {
type AgentRunner,
createAgentExecutorRole,
createCursorRunner,
} from './agent-executor.js';
export function createCoderRole(opts: { agentBin: string }): Role<CoderMeta> {
return async (chain): Promise<RoleResult<CoderMeta>> => {
const startMsg = chain.find((m) => m.role === '__start__');
const title = startMsg?.meta?.title ?? 'unknown';
const description = startMsg?.content ?? '';
const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp';
export function createCoderRole(opts: {
agentBin?: string;
agent?: AgentRunner;
llm: LlmClient;
}): Role<CoderMeta> {
const agent =
opts.agent ??
createCursorRunner({
agentBin: opts.agentBin ?? `${process.env.HOME}/.local/bin/agent`,
});
const architectMsg = chain.find((m) => m.role === 'architect');
return createAgentExecutorRole<CoderMeta>(agent, opts.llm, {
prepPrompt: (chain, topicId) => {
const startMsg = chain.find((m) => m.role === '__start__');
const title = startMsg?.meta?.title ?? topicId;
const description = startMsg?.content ?? '';
const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp';
const architectMsg = chain.find((m) => m.role === 'architect');
const prompt = `## Task: ${title}
const prompt = `## Task: ${title}
${description}
@@ -31,66 +43,45 @@ ${architectMsg?.content ?? 'None'}
${((architectMsg?.meta?.targetFiles as string[]) ?? []).join(', ') || 'Not specified'}
## Instructions
Implement the changes. Do NOT modify any existing test files. Only create or modify source files as needed.
If the task asks to create a new file, create it. If it asks to modify existing files, modify them.
Implement the changes. Do NOT modify any existing test files.
Run tests if applicable. Commit your changes.`;
const result = await runCursorAgent(opts.agentBin, prompt, repoDir);
const filesChanged = (architectMsg?.meta?.targetFiles as string[]) ?? [];
return {
content: result.output,
meta: { filesChanged, testsPassed: result.success },
};
};
}
async function runCursorAgent(
agentBin: string,
prompt: string,
repoDir: string,
): Promise<{ success: boolean; output: string; durationMs: number }> {
const promptDir = join(tmpdir(), 'pulse-v2-prompts');
mkdirSync(promptDir, { recursive: true });
const promptFile = join(
promptDir,
`task-${Date.now()}-${Math.random().toString(36).slice(2, 6)}.md`,
);
writeFileSync(promptFile, prompt, 'utf-8');
const cursorApiKey = await (async () => {
const p = Bun.spawn(['sh', '-c', 'secret get CURSOR_API_KEY | head -1'], {
stdout: 'pipe',
});
await p.exited;
return (await new Response(p.stdout).text()).trim();
})();
const start = Date.now();
const proc = Bun.spawn(
[agentBin, '--yolo', '-p', '--output-format', 'text', '-f', promptFile],
{
cwd: repoDir,
env: { ...process.env, CURSOR_API_KEY: cursorApiKey },
stdout: 'pipe',
stderr: 'pipe',
return { prompt, cwd: repoDir };
},
);
const timer = setTimeout(() => proc.kill(), 300_000);
const exitCode = await proc.exited;
clearTimeout(timer);
const stdout = await new Response(proc.stdout).text();
const stderr = await new Response(proc.stderr).text();
try {
unlinkSync(promptFile);
} catch {}
return {
success: exitCode === 0,
output: stdout.trim() || stderr.trim(),
durationMs: Date.now() - start,
};
parseMeta: {
system:
'Extract structured metadata from this coding agent report. Call the extract_coder_meta tool.',
tool: {
type: 'function',
function: {
name: 'extract_coder_meta',
description: 'Extract coder metadata from agent output',
parameters: {
type: 'object',
properties: {
filesChanged: {
type: 'array',
items: { type: 'string' },
description: 'List of files created or modified',
},
testsPassed: {
type: 'boolean',
description: 'Whether tests passed successfully',
},
},
required: ['filesChanged', 'testsPassed'],
},
},
},
parse: (args) => {
const parsed = JSON.parse(args);
return {
filesChanged: parsed.filesChanged ?? [],
testsPassed: parsed.testsPassed ?? false,
};
},
defaultMeta: () => ({ filesChanged: [], testsPassed: false }),
},
});
}
@@ -1,27 +1,37 @@
/**
* Reviewer role — uses Cursor agent in review mode.
* Pure: returns { content, meta }, adapter writes events.
* Reviewer role — LLM-Agent-LLM sandwich via agent executor.
*
* 小橘 🍊 (NEKO Team)
*/
import { mkdirSync, unlinkSync, writeFileSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { LlmClient } from '../../llm-client.js';
import type { ReviewerMeta } from '../coding-workflow.js';
import type { Role, RoleResult } from '../workflow-type.js';
import type { Role } from '../workflow-type.js';
import {
type AgentRunner,
createAgentExecutorRole,
createCursorRunner,
} from './agent-executor.js';
export function createReviewerRole(opts: {
agentBin: string;
agentBin?: string;
agent?: AgentRunner;
llm: LlmClient;
}): Role<ReviewerMeta> {
return async (chain): Promise<RoleResult<ReviewerMeta>> => {
const startMsg = chain.find((m) => m.role === '__start__');
const title = startMsg?.meta?.title ?? 'unknown';
const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp';
const agent =
opts.agent ??
createCursorRunner({
agentBin: opts.agentBin ?? `${process.env.HOME}/.local/bin/agent`,
});
const coderMsg = [...chain].reverse().find((m) => m.role === 'coder');
return createAgentExecutorRole<ReviewerMeta>(agent, opts.llm, {
prepPrompt: (chain, topicId) => {
const startMsg = chain.find((m) => m.role === '__start__');
const title = startMsg?.meta?.title ?? topicId;
const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp';
const coderMsg = [...chain].reverse().find((m) => m.role === 'coder');
const prompt = `## Code Review: ${title}
const prompt = `## Code Review: ${title}
## What was done
${coderMsg?.content ?? 'Unknown'}
@@ -34,66 +44,45 @@ Review the recent changes for correctness, security, and code quality.
Do NOT modify any files. Only output your review.
End with a clear verdict: APPROVED or REJECTED with reasons.`;
const result = await runCursorAgent(opts.agentBin, prompt, repoDir);
const output = result.output.toLowerCase();
const verdict: 'approved' | 'rejected' = output.includes('rejected')
? 'rejected'
: 'approved';
return {
content: result.output,
meta: { verdict },
};
};
}
async function runCursorAgent(
agentBin: string,
prompt: string,
repoDir: string,
): Promise<{ success: boolean; output: string; durationMs: number }> {
const promptDir = join(tmpdir(), 'pulse-v2-prompts');
mkdirSync(promptDir, { recursive: true });
const promptFile = join(
promptDir,
`task-${Date.now()}-${Math.random().toString(36).slice(2, 6)}.md`,
);
writeFileSync(promptFile, prompt, 'utf-8');
const cursorApiKey = await (async () => {
const p = Bun.spawn(['sh', '-c', 'secret get CURSOR_API_KEY | head -1'], {
stdout: 'pipe',
});
await p.exited;
return (await new Response(p.stdout).text()).trim();
})();
const start = Date.now();
const proc = Bun.spawn(
[agentBin, '--yolo', '-p', '--output-format', 'text', '-f', promptFile],
{
cwd: repoDir,
env: { ...process.env, CURSOR_API_KEY: cursorApiKey },
stdout: 'pipe',
stderr: 'pipe',
return { prompt, cwd: repoDir };
},
);
const timer = setTimeout(() => proc.kill(), 300_000);
const exitCode = await proc.exited;
clearTimeout(timer);
const stdout = await new Response(proc.stdout).text();
const stderr = await new Response(proc.stderr).text();
try {
unlinkSync(promptFile);
} catch {}
return {
success: exitCode === 0,
output: stdout.trim() || stderr.trim(),
durationMs: Date.now() - start,
};
parseMeta: {
system:
'Extract the review verdict from this code review report. Call the extract_review_verdict tool.',
tool: {
type: 'function',
function: {
name: 'extract_review_verdict',
description: 'Extract review verdict from reviewer output',
parameters: {
type: 'object',
properties: {
verdict: {
type: 'string',
enum: ['approved', 'rejected'],
description: 'Final review verdict',
},
},
required: ['verdict'],
},
},
},
parse: (args) => {
const parsed = JSON.parse(args);
return {
verdict: parsed.verdict === 'rejected' ? 'rejected' : 'approved',
};
},
defaultMeta: (output) => {
// Last resort: keyword matching on the last 200 chars
const tail = output.toLowerCase().slice(-200);
return {
verdict: tail.includes('reject')
? ('rejected' as const)
: ('approved' as const),
};
},
},
});
}