feat: LLM-Agent-LLM sandwich (agent-executor)

- createAgentExecutorRole: prepPrompt → agent.run → LLM₂ tool_choice → meta - createCursorRunner: extracts Cursor CLI into AgentRunner interface - Coder role: LLM₂ extracts filesChanged + testsPassed via tool_choice - Reviewer role: LLM₂ extracts verdict via tool_choice (no more includes hack) - defaultMeta fallback when LLM₂ fails or returns no tool_call - 5 new agent-executor tests, 24 total pass
2026-04-17 06:27:19 +00:00
parent 5a8fe051bf
commit f191d594b3
5 changed files with 437 additions and 145 deletions
@@ -5,6 +5,13 @@
 */

 export { createCodingWorkflow } from './coding-workflow.js';
+export {
+  type AgentExecutorConfig,
+  type AgentResult,
+  type AgentRunner,
+  createAgentExecutorRole,
+  createCursorRunner,
+} from './roles/agent-executor.js';
 export { createArchitectRole } from './roles/architect-llm.js';
 export { createCoderRole } from './roles/coder-cursor.js';
 export { createReviewerRole } from './roles/reviewer-cursor.js';
@@ -0,0 +1,153 @@
+/**
+ * Agent Executor tests — LLM-Agent-LLM sandwich pattern.
+ *
+ * 小橘 🍊 (NEKO Team)
+ */
+
+import { describe, expect, it } from 'bun:test';
+import type { LlmClient } from '../../llm-client.js';
+import type { WorkflowMessage } from '../workflow-type.js';
+import {
+  type AgentExecutorConfig,
+  type AgentRunner,
+  createAgentExecutorRole,
+} from './agent-executor.js';
+
+function mockAgent(output: string): AgentRunner {
+  return {
+    run: async () => ({ success: true, output, durationMs: 100 }),
+  };
+}
+
+function mockLlm(toolArgs: string): LlmClient {
+  return {
+    chat: async () => ({
+      tool_calls: [
+        {
+          id: 'call_1',
+          function: { name: 'extract', arguments: toolArgs },
+        },
+      ],
+    }),
+  };
+}
+
+function mockLlmNoTool(): LlmClient {
+  return {
+    chat: async () => ({ content: 'no tool call' }),
+  };
+}
+
+function mockLlmError(): LlmClient {
+  return {
+    chat: async () => {
+      throw new Error('LLM down');
+    },
+  };
+}
+
+type TestMeta = { verdict: 'approved' | 'rejected' };
+
+const testConfig: AgentExecutorConfig<TestMeta> = {
+  prepPrompt: (chain) => {
+    const start = chain.find((m) => m.role === '__start__');
+    return { prompt: `Review: ${start?.content ?? ''}`, cwd: '/tmp' };
+  },
+  parseMeta: {
+    system: 'Extract verdict.',
+    tool: {
+      type: 'function',
+      function: {
+        name: 'extract',
+        description: 'Extract verdict',
+        parameters: {
+          type: 'object',
+          properties: {
+            verdict: { type: 'string', enum: ['approved', 'rejected'] },
+          },
+          required: ['verdict'],
+        },
+      },
+    },
+    parse: (args) => JSON.parse(args),
+    defaultMeta: () => ({ verdict: 'approved' as const }),
+  },
+};
+
+const chain: WorkflowMessage[] = [
+  {
+    role: '__start__',
+    content: 'test task',
+    meta: {},
+    timestamp: Date.now(),
+  },
+];
+
+describe('createAgentExecutorRole', () => {
+  it('sandwich: agent output → LLM₂ tool_choice → structured meta', async () => {
+    const role = createAgentExecutorRole(
+      mockAgent('All changes look good. APPROVED.'),
+      mockLlm('{"verdict":"approved"}'),
+      testConfig,
+    );
+
+    const result = await role(chain, 't1', null as any);
+    expect(result.content).toBe('All changes look good. APPROVED.');
+    expect(result.meta).toEqual({ verdict: 'approved' });
+  });
+
+  it('LLM₂ returns rejected verdict', async () => {
+    const role = createAgentExecutorRole(
+      mockAgent('Code has issues. REJECTED.'),
+      mockLlm('{"verdict":"rejected"}'),
+      testConfig,
+    );
+
+    const result = await role(chain, 't1', null as any);
+    expect(result.meta).toEqual({ verdict: 'rejected' });
+  });
+
+  it('falls back to defaultMeta when LLM₂ returns no tool_call', async () => {
+    const role = createAgentExecutorRole(
+      mockAgent('Some output'),
+      mockLlmNoTool(),
+      testConfig,
+    );
+
+    const result = await role(chain, 't1', null as any);
+    expect(result.content).toBe('Some output');
+    expect(result.meta).toEqual({ verdict: 'approved' }); // defaultMeta
+  });
+
+  it('falls back to defaultMeta when LLM₂ throws', async () => {
+    const role = createAgentExecutorRole(
+      mockAgent('Agent worked fine'),
+      mockLlmError(),
+      testConfig,
+    );
+
+    const result = await role(chain, 't1', null as any);
+    expect(result.content).toBe('Agent worked fine');
+    expect(result.meta).toEqual({ verdict: 'approved' });
+  });
+
+  it('prepPrompt receives chain and topicId', async () => {
+    let capturedTopicId = '';
+    const config: AgentExecutorConfig<TestMeta> = {
+      ...testConfig,
+      prepPrompt: (_c, topicId) => {
+        capturedTopicId = topicId;
+        return { prompt: 'test', cwd: '/tmp' };
+      },
+    };
+
+    const role = createAgentExecutorRole(
+      mockAgent('output'),
+      mockLlm('{"verdict":"approved"}'),
+      config,
+    );
+
+    await role(chain, 'my-topic', null as any);
+    expect(capturedTopicId).toBe('my-topic');
+  });
+});
@@ -0,0 +1,152 @@
+/**
+ * Agent Executor — LLM-Agent-LLM sandwich pattern.
+ *
+ * 1. LLM₁ (prep): prepPrompt builds the agent's prompt
+ * 2. Agent (exec): runs CLI agent, gets free-text report
+ * 3. LLM₂ (parse): extracts structured meta via tool_choice
+ *
+ * 小橘 🍊 (NEKO Team)
+ */
+
+import { mkdirSync, unlinkSync, writeFileSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import type { LlmClient, LlmTool } from '../../llm-client.js';
+import type { Role, RoleResult, WorkflowMessage } from '../workflow-type.js';
+
+// ── Agent runner ───────────────────────────────────────────────
+
+export interface AgentResult {
+  success: boolean;
+  output: string;
+  durationMs: number;
+}
+
+export interface AgentRunner {
+  run(prompt: string, cwd: string): Promise<AgentResult>;
+}
+
+/**
+ * Default agent runner — Cursor CLI.
+ */
+export function createCursorRunner(opts: {
+  agentBin: string;
+  timeoutMs?: number;
+}): AgentRunner {
+  const { agentBin, timeoutMs = 300_000 } = opts;
+
+  return {
+    async run(prompt, cwd) {
+      const promptDir = join(tmpdir(), 'pulse-v2-prompts');
+      mkdirSync(promptDir, { recursive: true });
+      const promptFile = join(
+        promptDir,
+        `task-${Date.now()}-${Math.random().toString(36).slice(2, 6)}.md`,
+      );
+      writeFileSync(promptFile, prompt, 'utf-8');
+
+      const cursorApiKey = await (async () => {
+        const p = Bun.spawn(
+          ['sh', '-c', 'secret get CURSOR_API_KEY | head -1'],
+          {
+            stdout: 'pipe',
+          },
+        );
+        await p.exited;
+        return (await new Response(p.stdout).text()).trim();
+      })();
+
+      const start = Date.now();
+      const proc = Bun.spawn(
+        [agentBin, '--yolo', '-p', '--output-format', 'text', '-f', promptFile],
+        {
+          cwd,
+          env: { ...process.env, CURSOR_API_KEY: cursorApiKey },
+          stdout: 'pipe',
+          stderr: 'pipe',
+        },
+      );
+
+      const timer = setTimeout(() => proc.kill(), timeoutMs);
+      const exitCode = await proc.exited;
+      clearTimeout(timer);
+
+      const stdout = await new Response(proc.stdout).text();
+      const stderr = await new Response(proc.stderr).text();
+
+      try {
+        unlinkSync(promptFile);
+      } catch {}
+
+      return {
+        success: exitCode === 0,
+        output: stdout.trim() || stderr.trim(),
+        durationMs: Date.now() - start,
+      };
+    },
+  };
+}
+
+// ── Agent Executor Role factory ────────────────────────────────
+
+export interface AgentExecutorConfig<Meta> {
+  /** Build prompt + cwd for the agent. */
+  prepPrompt: (
+    chain: WorkflowMessage[],
+    topicId: string,
+  ) => { prompt: string; cwd: string };
+
+  /** LLM₂ structured output: tool definition for meta extraction. */
+  parseMeta: {
+    /** System prompt for the meta-extraction LLM call. */
+    system: string;
+    /** Tool definition — parameters schema defines Meta shape. */
+    tool: LlmTool;
+    /** Parse tool_call arguments into Meta. Falls back to defaultMeta on failure. */
+    parse: (args: string) => Meta;
+    /** Fallback when LLM₂ fails or returns no tool_call. */
+    defaultMeta: (output: string) => Meta;
+  };
+}
+
+/**
+ * Create a pure Role from an agent executor config.
+ * The Role runs: prepPrompt → agent → LLM₂ parse → { content, meta }.
+ */
+export function createAgentExecutorRole<Meta>(
+  agent: AgentRunner,
+  llm: LlmClient,
+  config: AgentExecutorConfig<Meta>,
+): Role<Meta> {
+  return async (chain, topicId): Promise<RoleResult<Meta>> => {
+    // 1. LLM₁ prep (built into config.prepPrompt — no LLM call needed for prompt templates)
+    const { prompt, cwd } = config.prepPrompt(chain, topicId);
+
+    // 2. Agent exec
+    const result = await agent.run(prompt, cwd);
+
+    // 3. LLM₂ parse meta
+    let meta: Meta;
+    try {
+      const resp = await llm.chat({
+        messages: [
+          { role: 'system', content: config.parseMeta.system },
+          { role: 'user', content: result.output },
+        ],
+        tools: [config.parseMeta.tool],
+        tool_choice: 'required',
+      });
+
+      const toolCall = resp.tool_calls?.[0];
+      if (toolCall) {
+        meta = config.parseMeta.parse(toolCall.function.arguments);
+      } else {
+        meta = config.parseMeta.defaultMeta(result.output);
+      }
+    } catch {
+      meta = config.parseMeta.defaultMeta(result.output);
+    }
+
+    return { content: result.output, meta };
+  };
+}
@@ -1,26 +1,38 @@
 /**
- * Coder role — uses Cursor agent to implement code changes.
- * Pure: returns { content, meta }, adapter writes events.
+ * Coder role — LLM-Agent-LLM sandwich via agent executor.
 *
 * 小橘 🍊 (NEKO Team)
 */

-import { mkdirSync, unlinkSync, writeFileSync } from 'node:fs';
-import { tmpdir } from 'node:os';
-import { join } from 'node:path';
+import type { LlmClient } from '../../llm-client.js';
 import type { CoderMeta } from '../coding-workflow.js';
-import type { Role, RoleResult } from '../workflow-type.js';
+import type { Role } from '../workflow-type.js';
+import {
+  type AgentRunner,
+  createAgentExecutorRole,
+  createCursorRunner,
+} from './agent-executor.js';

-export function createCoderRole(opts: { agentBin: string }): Role<CoderMeta> {
-  return async (chain): Promise<RoleResult<CoderMeta>> => {
-    const startMsg = chain.find((m) => m.role === '__start__');
-    const title = startMsg?.meta?.title ?? 'unknown';
-    const description = startMsg?.content ?? '';
-    const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp';
+export function createCoderRole(opts: {
+  agentBin?: string;
+  agent?: AgentRunner;
+  llm: LlmClient;
+}): Role<CoderMeta> {
+  const agent =
+    opts.agent ??
+    createCursorRunner({
+      agentBin: opts.agentBin ?? `${process.env.HOME}/.local/bin/agent`,
+    });

-    const architectMsg = chain.find((m) => m.role === 'architect');
+  return createAgentExecutorRole<CoderMeta>(agent, opts.llm, {
+    prepPrompt: (chain, topicId) => {
+      const startMsg = chain.find((m) => m.role === '__start__');
+      const title = startMsg?.meta?.title ?? topicId;
+      const description = startMsg?.content ?? '';
+      const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp';
+      const architectMsg = chain.find((m) => m.role === 'architect');

-    const prompt = `## Task: ${title}
+      const prompt = `## Task: ${title}

 ${description}

@@ -31,66 +43,45 @@ ${architectMsg?.content ?? 'None'}
 ${((architectMsg?.meta?.targetFiles as string[]) ?? []).join(', ') || 'Not specified'}

 ## Instructions
-Implement the changes. Do NOT modify any existing test files. Only create or modify source files as needed.
-If the task asks to create a new file, create it. If it asks to modify existing files, modify them.
+Implement the changes. Do NOT modify any existing test files.
 Run tests if applicable. Commit your changes.`;

-    const result = await runCursorAgent(opts.agentBin, prompt, repoDir);
-    const filesChanged = (architectMsg?.meta?.targetFiles as string[]) ?? [];
-
-    return {
-      content: result.output,
-      meta: { filesChanged, testsPassed: result.success },
-    };
-  };
-}
-
-async function runCursorAgent(
-  agentBin: string,
-  prompt: string,
-  repoDir: string,
-): Promise<{ success: boolean; output: string; durationMs: number }> {
-  const promptDir = join(tmpdir(), 'pulse-v2-prompts');
-  mkdirSync(promptDir, { recursive: true });
-  const promptFile = join(
-    promptDir,
-    `task-${Date.now()}-${Math.random().toString(36).slice(2, 6)}.md`,
-  );
-  writeFileSync(promptFile, prompt, 'utf-8');
-
-  const cursorApiKey = await (async () => {
-    const p = Bun.spawn(['sh', '-c', 'secret get CURSOR_API_KEY | head -1'], {
-      stdout: 'pipe',
-    });
-    await p.exited;
-    return (await new Response(p.stdout).text()).trim();
-  })();
-
-  const start = Date.now();
-  const proc = Bun.spawn(
-    [agentBin, '--yolo', '-p', '--output-format', 'text', '-f', promptFile],
-    {
-      cwd: repoDir,
-      env: { ...process.env, CURSOR_API_KEY: cursorApiKey },
-      stdout: 'pipe',
-      stderr: 'pipe',
+      return { prompt, cwd: repoDir };
    },
-  );

-  const timer = setTimeout(() => proc.kill(), 300_000);
-  const exitCode = await proc.exited;
-  clearTimeout(timer);
-
-  const stdout = await new Response(proc.stdout).text();
-  const stderr = await new Response(proc.stderr).text();
-
-  try {
-    unlinkSync(promptFile);
-  } catch {}
-
-  return {
-    success: exitCode === 0,
-    output: stdout.trim() || stderr.trim(),
-    durationMs: Date.now() - start,
-  };
+    parseMeta: {
+      system:
+        'Extract structured metadata from this coding agent report. Call the extract_coder_meta tool.',
+      tool: {
+        type: 'function',
+        function: {
+          name: 'extract_coder_meta',
+          description: 'Extract coder metadata from agent output',
+          parameters: {
+            type: 'object',
+            properties: {
+              filesChanged: {
+                type: 'array',
+                items: { type: 'string' },
+                description: 'List of files created or modified',
+              },
+              testsPassed: {
+                type: 'boolean',
+                description: 'Whether tests passed successfully',
+              },
+            },
+            required: ['filesChanged', 'testsPassed'],
+          },
+        },
+      },
+      parse: (args) => {
+        const parsed = JSON.parse(args);
+        return {
+          filesChanged: parsed.filesChanged ?? [],
+          testsPassed: parsed.testsPassed ?? false,
+        };
+      },
+      defaultMeta: () => ({ filesChanged: [], testsPassed: false }),
+    },
+  });
 }
@@ -1,27 +1,37 @@
 /**
- * Reviewer role — uses Cursor agent in review mode.
- * Pure: returns { content, meta }, adapter writes events.
+ * Reviewer role — LLM-Agent-LLM sandwich via agent executor.
 *
 * 小橘 🍊 (NEKO Team)
 */

-import { mkdirSync, unlinkSync, writeFileSync } from 'node:fs';
-import { tmpdir } from 'node:os';
-import { join } from 'node:path';
+import type { LlmClient } from '../../llm-client.js';
 import type { ReviewerMeta } from '../coding-workflow.js';
-import type { Role, RoleResult } from '../workflow-type.js';
+import type { Role } from '../workflow-type.js';
+import {
+  type AgentRunner,
+  createAgentExecutorRole,
+  createCursorRunner,
+} from './agent-executor.js';

 export function createReviewerRole(opts: {
-  agentBin: string;
+  agentBin?: string;
+  agent?: AgentRunner;
+  llm: LlmClient;
 }): Role<ReviewerMeta> {
-  return async (chain): Promise<RoleResult<ReviewerMeta>> => {
-    const startMsg = chain.find((m) => m.role === '__start__');
-    const title = startMsg?.meta?.title ?? 'unknown';
-    const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp';
+  const agent =
+    opts.agent ??
+    createCursorRunner({
+      agentBin: opts.agentBin ?? `${process.env.HOME}/.local/bin/agent`,
+    });

-    const coderMsg = [...chain].reverse().find((m) => m.role === 'coder');
+  return createAgentExecutorRole<ReviewerMeta>(agent, opts.llm, {
+    prepPrompt: (chain, topicId) => {
+      const startMsg = chain.find((m) => m.role === '__start__');
+      const title = startMsg?.meta?.title ?? topicId;
+      const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp';
+      const coderMsg = [...chain].reverse().find((m) => m.role === 'coder');

-    const prompt = `## Code Review: ${title}
+      const prompt = `## Code Review: ${title}

 ## What was done
 ${coderMsg?.content ?? 'Unknown'}
@@ -34,66 +44,45 @@ Review the recent changes for correctness, security, and code quality.
 Do NOT modify any files. Only output your review.
 End with a clear verdict: APPROVED or REJECTED with reasons.`;

-    const result = await runCursorAgent(opts.agentBin, prompt, repoDir);
-
-    const output = result.output.toLowerCase();
-    const verdict: 'approved' | 'rejected' = output.includes('rejected')
-      ? 'rejected'
-      : 'approved';
-
-    return {
-      content: result.output,
-      meta: { verdict },
-    };
-  };
-}
-
-async function runCursorAgent(
-  agentBin: string,
-  prompt: string,
-  repoDir: string,
-): Promise<{ success: boolean; output: string; durationMs: number }> {
-  const promptDir = join(tmpdir(), 'pulse-v2-prompts');
-  mkdirSync(promptDir, { recursive: true });
-  const promptFile = join(
-    promptDir,
-    `task-${Date.now()}-${Math.random().toString(36).slice(2, 6)}.md`,
-  );
-  writeFileSync(promptFile, prompt, 'utf-8');
-
-  const cursorApiKey = await (async () => {
-    const p = Bun.spawn(['sh', '-c', 'secret get CURSOR_API_KEY | head -1'], {
-      stdout: 'pipe',
-    });
-    await p.exited;
-    return (await new Response(p.stdout).text()).trim();
-  })();
-
-  const start = Date.now();
-  const proc = Bun.spawn(
-    [agentBin, '--yolo', '-p', '--output-format', 'text', '-f', promptFile],
-    {
-      cwd: repoDir,
-      env: { ...process.env, CURSOR_API_KEY: cursorApiKey },
-      stdout: 'pipe',
-      stderr: 'pipe',
+      return { prompt, cwd: repoDir };
    },
-  );

-  const timer = setTimeout(() => proc.kill(), 300_000);
-  const exitCode = await proc.exited;
-  clearTimeout(timer);
-
-  const stdout = await new Response(proc.stdout).text();
-  const stderr = await new Response(proc.stderr).text();
-
-  try {
-    unlinkSync(promptFile);
-  } catch {}
-
-  return {
-    success: exitCode === 0,
-    output: stdout.trim() || stderr.trim(),
-    durationMs: Date.now() - start,
-  };
+    parseMeta: {
+      system:
+        'Extract the review verdict from this code review report. Call the extract_review_verdict tool.',
+      tool: {
+        type: 'function',
+        function: {
+          name: 'extract_review_verdict',
+          description: 'Extract review verdict from reviewer output',
+          parameters: {
+            type: 'object',
+            properties: {
+              verdict: {
+                type: 'string',
+                enum: ['approved', 'rejected'],
+                description: 'Final review verdict',
+              },
+            },
+            required: ['verdict'],
+          },
+        },
+      },
+      parse: (args) => {
+        const parsed = JSON.parse(args);
+        return {
+          verdict: parsed.verdict === 'rejected' ? 'rejected' : 'approved',
+        };
+      },
+      defaultMeta: (output) => {
+        // Last resort: keyword matching on the last 200 chars
+        const tail = output.toLowerCase().slice(-200);
+        return {
+          verdict: tail.includes('reject')
+            ? ('rejected' as const)
+            : ('approved' as const),
+        };
+      },
+    },
+  });
 }