diff --git a/packages/pulse/src/workflows/index.ts b/packages/pulse/src/workflows/index.ts index 6285226..f7cb400 100644 --- a/packages/pulse/src/workflows/index.ts +++ b/packages/pulse/src/workflows/index.ts @@ -5,6 +5,13 @@ */ export { createCodingWorkflow } from './coding-workflow.js'; +export { + type AgentExecutorConfig, + type AgentResult, + type AgentRunner, + createAgentExecutorRole, + createCursorRunner, +} from './roles/agent-executor.js'; export { createArchitectRole } from './roles/architect-llm.js'; export { createCoderRole } from './roles/coder-cursor.js'; export { createReviewerRole } from './roles/reviewer-cursor.js'; diff --git a/packages/pulse/src/workflows/roles/agent-executor.test.ts b/packages/pulse/src/workflows/roles/agent-executor.test.ts new file mode 100644 index 0000000..e8ce0c3 --- /dev/null +++ b/packages/pulse/src/workflows/roles/agent-executor.test.ts @@ -0,0 +1,153 @@ +/** + * Agent Executor tests — LLM-Agent-LLM sandwich pattern. + * + * 小橘 🍊 (NEKO Team) + */ + +import { describe, expect, it } from 'bun:test'; +import type { LlmClient } from '../../llm-client.js'; +import type { WorkflowMessage } from '../workflow-type.js'; +import { + type AgentExecutorConfig, + type AgentRunner, + createAgentExecutorRole, +} from './agent-executor.js'; + +function mockAgent(output: string): AgentRunner { + return { + run: async () => ({ success: true, output, durationMs: 100 }), + }; +} + +function mockLlm(toolArgs: string): LlmClient { + return { + chat: async () => ({ + tool_calls: [ + { + id: 'call_1', + function: { name: 'extract', arguments: toolArgs }, + }, + ], + }), + }; +} + +function mockLlmNoTool(): LlmClient { + return { + chat: async () => ({ content: 'no tool call' }), + }; +} + +function mockLlmError(): LlmClient { + return { + chat: async () => { + throw new Error('LLM down'); + }, + }; +} + +type TestMeta = { verdict: 'approved' | 'rejected' }; + +const testConfig: AgentExecutorConfig = { + prepPrompt: (chain) => { + const start = chain.find((m) => m.role === '__start__'); + return { prompt: `Review: ${start?.content ?? ''}`, cwd: '/tmp' }; + }, + parseMeta: { + system: 'Extract verdict.', + tool: { + type: 'function', + function: { + name: 'extract', + description: 'Extract verdict', + parameters: { + type: 'object', + properties: { + verdict: { type: 'string', enum: ['approved', 'rejected'] }, + }, + required: ['verdict'], + }, + }, + }, + parse: (args) => JSON.parse(args), + defaultMeta: () => ({ verdict: 'approved' as const }), + }, +}; + +const chain: WorkflowMessage[] = [ + { + role: '__start__', + content: 'test task', + meta: {}, + timestamp: Date.now(), + }, +]; + +describe('createAgentExecutorRole', () => { + it('sandwich: agent output → LLM₂ tool_choice → structured meta', async () => { + const role = createAgentExecutorRole( + mockAgent('All changes look good. APPROVED.'), + mockLlm('{"verdict":"approved"}'), + testConfig, + ); + + const result = await role(chain, 't1', null as any); + expect(result.content).toBe('All changes look good. APPROVED.'); + expect(result.meta).toEqual({ verdict: 'approved' }); + }); + + it('LLM₂ returns rejected verdict', async () => { + const role = createAgentExecutorRole( + mockAgent('Code has issues. REJECTED.'), + mockLlm('{"verdict":"rejected"}'), + testConfig, + ); + + const result = await role(chain, 't1', null as any); + expect(result.meta).toEqual({ verdict: 'rejected' }); + }); + + it('falls back to defaultMeta when LLM₂ returns no tool_call', async () => { + const role = createAgentExecutorRole( + mockAgent('Some output'), + mockLlmNoTool(), + testConfig, + ); + + const result = await role(chain, 't1', null as any); + expect(result.content).toBe('Some output'); + expect(result.meta).toEqual({ verdict: 'approved' }); // defaultMeta + }); + + it('falls back to defaultMeta when LLM₂ throws', async () => { + const role = createAgentExecutorRole( + mockAgent('Agent worked fine'), + mockLlmError(), + testConfig, + ); + + const result = await role(chain, 't1', null as any); + expect(result.content).toBe('Agent worked fine'); + expect(result.meta).toEqual({ verdict: 'approved' }); + }); + + it('prepPrompt receives chain and topicId', async () => { + let capturedTopicId = ''; + const config: AgentExecutorConfig = { + ...testConfig, + prepPrompt: (_c, topicId) => { + capturedTopicId = topicId; + return { prompt: 'test', cwd: '/tmp' }; + }, + }; + + const role = createAgentExecutorRole( + mockAgent('output'), + mockLlm('{"verdict":"approved"}'), + config, + ); + + await role(chain, 'my-topic', null as any); + expect(capturedTopicId).toBe('my-topic'); + }); +}); diff --git a/packages/pulse/src/workflows/roles/agent-executor.ts b/packages/pulse/src/workflows/roles/agent-executor.ts new file mode 100644 index 0000000..208f330 --- /dev/null +++ b/packages/pulse/src/workflows/roles/agent-executor.ts @@ -0,0 +1,152 @@ +/** + * Agent Executor — LLM-Agent-LLM sandwich pattern. + * + * 1. LLM₁ (prep): prepPrompt builds the agent's prompt + * 2. Agent (exec): runs CLI agent, gets free-text report + * 3. LLM₂ (parse): extracts structured meta via tool_choice + * + * 小橘 🍊 (NEKO Team) + */ + +import { mkdirSync, unlinkSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import type { LlmClient, LlmTool } from '../../llm-client.js'; +import type { Role, RoleResult, WorkflowMessage } from '../workflow-type.js'; + +// ── Agent runner ─────────────────────────────────────────────── + +export interface AgentResult { + success: boolean; + output: string; + durationMs: number; +} + +export interface AgentRunner { + run(prompt: string, cwd: string): Promise; +} + +/** + * Default agent runner — Cursor CLI. + */ +export function createCursorRunner(opts: { + agentBin: string; + timeoutMs?: number; +}): AgentRunner { + const { agentBin, timeoutMs = 300_000 } = opts; + + return { + async run(prompt, cwd) { + const promptDir = join(tmpdir(), 'pulse-v2-prompts'); + mkdirSync(promptDir, { recursive: true }); + const promptFile = join( + promptDir, + `task-${Date.now()}-${Math.random().toString(36).slice(2, 6)}.md`, + ); + writeFileSync(promptFile, prompt, 'utf-8'); + + const cursorApiKey = await (async () => { + const p = Bun.spawn( + ['sh', '-c', 'secret get CURSOR_API_KEY | head -1'], + { + stdout: 'pipe', + }, + ); + await p.exited; + return (await new Response(p.stdout).text()).trim(); + })(); + + const start = Date.now(); + const proc = Bun.spawn( + [agentBin, '--yolo', '-p', '--output-format', 'text', '-f', promptFile], + { + cwd, + env: { ...process.env, CURSOR_API_KEY: cursorApiKey }, + stdout: 'pipe', + stderr: 'pipe', + }, + ); + + const timer = setTimeout(() => proc.kill(), timeoutMs); + const exitCode = await proc.exited; + clearTimeout(timer); + + const stdout = await new Response(proc.stdout).text(); + const stderr = await new Response(proc.stderr).text(); + + try { + unlinkSync(promptFile); + } catch {} + + return { + success: exitCode === 0, + output: stdout.trim() || stderr.trim(), + durationMs: Date.now() - start, + }; + }, + }; +} + +// ── Agent Executor Role factory ──────────────────────────────── + +export interface AgentExecutorConfig { + /** Build prompt + cwd for the agent. */ + prepPrompt: ( + chain: WorkflowMessage[], + topicId: string, + ) => { prompt: string; cwd: string }; + + /** LLM₂ structured output: tool definition for meta extraction. */ + parseMeta: { + /** System prompt for the meta-extraction LLM call. */ + system: string; + /** Tool definition — parameters schema defines Meta shape. */ + tool: LlmTool; + /** Parse tool_call arguments into Meta. Falls back to defaultMeta on failure. */ + parse: (args: string) => Meta; + /** Fallback when LLM₂ fails or returns no tool_call. */ + defaultMeta: (output: string) => Meta; + }; +} + +/** + * Create a pure Role from an agent executor config. + * The Role runs: prepPrompt → agent → LLM₂ parse → { content, meta }. + */ +export function createAgentExecutorRole( + agent: AgentRunner, + llm: LlmClient, + config: AgentExecutorConfig, +): Role { + return async (chain, topicId): Promise> => { + // 1. LLM₁ prep (built into config.prepPrompt — no LLM call needed for prompt templates) + const { prompt, cwd } = config.prepPrompt(chain, topicId); + + // 2. Agent exec + const result = await agent.run(prompt, cwd); + + // 3. LLM₂ parse meta + let meta: Meta; + try { + const resp = await llm.chat({ + messages: [ + { role: 'system', content: config.parseMeta.system }, + { role: 'user', content: result.output }, + ], + tools: [config.parseMeta.tool], + tool_choice: 'required', + }); + + const toolCall = resp.tool_calls?.[0]; + if (toolCall) { + meta = config.parseMeta.parse(toolCall.function.arguments); + } else { + meta = config.parseMeta.defaultMeta(result.output); + } + } catch { + meta = config.parseMeta.defaultMeta(result.output); + } + + return { content: result.output, meta }; + }; +} diff --git a/packages/pulse/src/workflows/roles/coder-cursor.ts b/packages/pulse/src/workflows/roles/coder-cursor.ts index af8242e..80a3932 100644 --- a/packages/pulse/src/workflows/roles/coder-cursor.ts +++ b/packages/pulse/src/workflows/roles/coder-cursor.ts @@ -1,26 +1,38 @@ /** - * Coder role — uses Cursor agent to implement code changes. - * Pure: returns { content, meta }, adapter writes events. + * Coder role — LLM-Agent-LLM sandwich via agent executor. * * 小橘 🍊 (NEKO Team) */ -import { mkdirSync, unlinkSync, writeFileSync } from 'node:fs'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; +import type { LlmClient } from '../../llm-client.js'; import type { CoderMeta } from '../coding-workflow.js'; -import type { Role, RoleResult } from '../workflow-type.js'; +import type { Role } from '../workflow-type.js'; +import { + type AgentRunner, + createAgentExecutorRole, + createCursorRunner, +} from './agent-executor.js'; -export function createCoderRole(opts: { agentBin: string }): Role { - return async (chain): Promise> => { - const startMsg = chain.find((m) => m.role === '__start__'); - const title = startMsg?.meta?.title ?? 'unknown'; - const description = startMsg?.content ?? ''; - const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp'; +export function createCoderRole(opts: { + agentBin?: string; + agent?: AgentRunner; + llm: LlmClient; +}): Role { + const agent = + opts.agent ?? + createCursorRunner({ + agentBin: opts.agentBin ?? `${process.env.HOME}/.local/bin/agent`, + }); - const architectMsg = chain.find((m) => m.role === 'architect'); + return createAgentExecutorRole(agent, opts.llm, { + prepPrompt: (chain, topicId) => { + const startMsg = chain.find((m) => m.role === '__start__'); + const title = startMsg?.meta?.title ?? topicId; + const description = startMsg?.content ?? ''; + const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp'; + const architectMsg = chain.find((m) => m.role === 'architect'); - const prompt = `## Task: ${title} + const prompt = `## Task: ${title} ${description} @@ -31,66 +43,45 @@ ${architectMsg?.content ?? 'None'} ${((architectMsg?.meta?.targetFiles as string[]) ?? []).join(', ') || 'Not specified'} ## Instructions -Implement the changes. Do NOT modify any existing test files. Only create or modify source files as needed. -If the task asks to create a new file, create it. If it asks to modify existing files, modify them. +Implement the changes. Do NOT modify any existing test files. Run tests if applicable. Commit your changes.`; - const result = await runCursorAgent(opts.agentBin, prompt, repoDir); - const filesChanged = (architectMsg?.meta?.targetFiles as string[]) ?? []; - - return { - content: result.output, - meta: { filesChanged, testsPassed: result.success }, - }; - }; -} - -async function runCursorAgent( - agentBin: string, - prompt: string, - repoDir: string, -): Promise<{ success: boolean; output: string; durationMs: number }> { - const promptDir = join(tmpdir(), 'pulse-v2-prompts'); - mkdirSync(promptDir, { recursive: true }); - const promptFile = join( - promptDir, - `task-${Date.now()}-${Math.random().toString(36).slice(2, 6)}.md`, - ); - writeFileSync(promptFile, prompt, 'utf-8'); - - const cursorApiKey = await (async () => { - const p = Bun.spawn(['sh', '-c', 'secret get CURSOR_API_KEY | head -1'], { - stdout: 'pipe', - }); - await p.exited; - return (await new Response(p.stdout).text()).trim(); - })(); - - const start = Date.now(); - const proc = Bun.spawn( - [agentBin, '--yolo', '-p', '--output-format', 'text', '-f', promptFile], - { - cwd: repoDir, - env: { ...process.env, CURSOR_API_KEY: cursorApiKey }, - stdout: 'pipe', - stderr: 'pipe', + return { prompt, cwd: repoDir }; }, - ); - const timer = setTimeout(() => proc.kill(), 300_000); - const exitCode = await proc.exited; - clearTimeout(timer); - - const stdout = await new Response(proc.stdout).text(); - const stderr = await new Response(proc.stderr).text(); - - try { - unlinkSync(promptFile); - } catch {} - - return { - success: exitCode === 0, - output: stdout.trim() || stderr.trim(), - durationMs: Date.now() - start, - }; + parseMeta: { + system: + 'Extract structured metadata from this coding agent report. Call the extract_coder_meta tool.', + tool: { + type: 'function', + function: { + name: 'extract_coder_meta', + description: 'Extract coder metadata from agent output', + parameters: { + type: 'object', + properties: { + filesChanged: { + type: 'array', + items: { type: 'string' }, + description: 'List of files created or modified', + }, + testsPassed: { + type: 'boolean', + description: 'Whether tests passed successfully', + }, + }, + required: ['filesChanged', 'testsPassed'], + }, + }, + }, + parse: (args) => { + const parsed = JSON.parse(args); + return { + filesChanged: parsed.filesChanged ?? [], + testsPassed: parsed.testsPassed ?? false, + }; + }, + defaultMeta: () => ({ filesChanged: [], testsPassed: false }), + }, + }); } diff --git a/packages/pulse/src/workflows/roles/reviewer-cursor.ts b/packages/pulse/src/workflows/roles/reviewer-cursor.ts index 4cb4346..a502a04 100644 --- a/packages/pulse/src/workflows/roles/reviewer-cursor.ts +++ b/packages/pulse/src/workflows/roles/reviewer-cursor.ts @@ -1,27 +1,37 @@ /** - * Reviewer role — uses Cursor agent in review mode. - * Pure: returns { content, meta }, adapter writes events. + * Reviewer role — LLM-Agent-LLM sandwich via agent executor. * * 小橘 🍊 (NEKO Team) */ -import { mkdirSync, unlinkSync, writeFileSync } from 'node:fs'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; +import type { LlmClient } from '../../llm-client.js'; import type { ReviewerMeta } from '../coding-workflow.js'; -import type { Role, RoleResult } from '../workflow-type.js'; +import type { Role } from '../workflow-type.js'; +import { + type AgentRunner, + createAgentExecutorRole, + createCursorRunner, +} from './agent-executor.js'; export function createReviewerRole(opts: { - agentBin: string; + agentBin?: string; + agent?: AgentRunner; + llm: LlmClient; }): Role { - return async (chain): Promise> => { - const startMsg = chain.find((m) => m.role === '__start__'); - const title = startMsg?.meta?.title ?? 'unknown'; - const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp'; + const agent = + opts.agent ?? + createCursorRunner({ + agentBin: opts.agentBin ?? `${process.env.HOME}/.local/bin/agent`, + }); - const coderMsg = [...chain].reverse().find((m) => m.role === 'coder'); + return createAgentExecutorRole(agent, opts.llm, { + prepPrompt: (chain, topicId) => { + const startMsg = chain.find((m) => m.role === '__start__'); + const title = startMsg?.meta?.title ?? topicId; + const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp'; + const coderMsg = [...chain].reverse().find((m) => m.role === 'coder'); - const prompt = `## Code Review: ${title} + const prompt = `## Code Review: ${title} ## What was done ${coderMsg?.content ?? 'Unknown'} @@ -34,66 +44,45 @@ Review the recent changes for correctness, security, and code quality. Do NOT modify any files. Only output your review. End with a clear verdict: APPROVED or REJECTED with reasons.`; - const result = await runCursorAgent(opts.agentBin, prompt, repoDir); - - const output = result.output.toLowerCase(); - const verdict: 'approved' | 'rejected' = output.includes('rejected') - ? 'rejected' - : 'approved'; - - return { - content: result.output, - meta: { verdict }, - }; - }; -} - -async function runCursorAgent( - agentBin: string, - prompt: string, - repoDir: string, -): Promise<{ success: boolean; output: string; durationMs: number }> { - const promptDir = join(tmpdir(), 'pulse-v2-prompts'); - mkdirSync(promptDir, { recursive: true }); - const promptFile = join( - promptDir, - `task-${Date.now()}-${Math.random().toString(36).slice(2, 6)}.md`, - ); - writeFileSync(promptFile, prompt, 'utf-8'); - - const cursorApiKey = await (async () => { - const p = Bun.spawn(['sh', '-c', 'secret get CURSOR_API_KEY | head -1'], { - stdout: 'pipe', - }); - await p.exited; - return (await new Response(p.stdout).text()).trim(); - })(); - - const start = Date.now(); - const proc = Bun.spawn( - [agentBin, '--yolo', '-p', '--output-format', 'text', '-f', promptFile], - { - cwd: repoDir, - env: { ...process.env, CURSOR_API_KEY: cursorApiKey }, - stdout: 'pipe', - stderr: 'pipe', + return { prompt, cwd: repoDir }; }, - ); - const timer = setTimeout(() => proc.kill(), 300_000); - const exitCode = await proc.exited; - clearTimeout(timer); - - const stdout = await new Response(proc.stdout).text(); - const stderr = await new Response(proc.stderr).text(); - - try { - unlinkSync(promptFile); - } catch {} - - return { - success: exitCode === 0, - output: stdout.trim() || stderr.trim(), - durationMs: Date.now() - start, - }; + parseMeta: { + system: + 'Extract the review verdict from this code review report. Call the extract_review_verdict tool.', + tool: { + type: 'function', + function: { + name: 'extract_review_verdict', + description: 'Extract review verdict from reviewer output', + parameters: { + type: 'object', + properties: { + verdict: { + type: 'string', + enum: ['approved', 'rejected'], + description: 'Final review verdict', + }, + }, + required: ['verdict'], + }, + }, + }, + parse: (args) => { + const parsed = JSON.parse(args); + return { + verdict: parsed.verdict === 'rejected' ? 'rejected' : 'approved', + }; + }, + defaultMeta: (output) => { + // Last resort: keyword matching on the last 200 chars + const tail = output.toLowerCase().slice(-200); + return { + verdict: tail.includes('reject') + ? ('rejected' as const) + : ('approved' as const), + }; + }, + }, + }); }