feat: enhance coding workflow — structured architect design, reviewer retry with retryCount cap

- ArchitectMeta now includes targetFiles, changes (file→description), and verification criteria - CoderMeta prepPrompt explicitly consumes architect.changes, architect.verification, and reviewer.rejectionReason on retry - ReviewerMeta now outputs verdict, rejectionReason, and monotonic retryCount (starting from 0) - Moderator enforces retryCount < 3 limit — forces closer when exceeded - Updated mock implementations, role factories, and e2e to match new meta structures - Added test for retry limit (retryCount >= 3 → closer)
2026-04-17 10:27:53 +00:00
parent 4139f1d5d5
commit 592a5566a5
7 changed files with 215 additions and 37 deletions
@@ -69,17 +69,32 @@ async function architectFn(chain: WorkflowMessage[]) {
      },
    ],
  });
-  let parsed: { analysis?: string; targetFiles?: string[] };
+  let parsed: {
+    analysis?: string;
+    targetFiles?: string[];
+    changes?: Record<string, string>;
+    verification?: string;
+  };
  try {
    parsed = JSON.parse(resp.content ?? '{}');
  } catch {
-    parsed = { analysis: resp.content ?? 'No analysis', targetFiles: [] };
+    parsed = { analysis: resp.content ?? 'No analysis' };
  }
  console.log(`  🏗️  analysis: ${(parsed.analysis ?? '').slice(0, 120)}...`);

+  const targetFiles = parsed.targetFiles ?? [];
+  const changes: Record<string, string> = parsed.changes ?? {};
+  if (Object.keys(changes).length === 0) {
+    for (const f of targetFiles) changes[f] = 'Update this file';
+  }
+
  return {
    content: parsed.analysis ?? 'No analysis',
-    meta: { targetFiles: parsed.targetFiles ?? [] },
+    meta: {
+      targetFiles,
+      changes,
+      verification: parsed.verification ?? 'Run tests and verify manually',
+    },
  };
 }

@@ -76,18 +76,23 @@ describe('CodingTask WorkflowType', () => {
    expect(r5.executed).toEqual([]);
  });

-  it('rejection triggers re-coding', async () => {
+  it('rejection triggers re-coding with rejectionReason', async () => {
    setup();

    let reviewCount = 0;
    const codingTask = createCodingWorkflow({
-      reviewerFn: async () => {
+      reviewerFn: async (chain) => {
+        const prevReviews = chain.filter((m) => m.role === 'reviewer');
        reviewCount++;
        const verdict =
          reviewCount === 1 ? ('rejected' as const) : ('approved' as const);
        return {
          content: reviewCount === 1 ? 'Needs fixes' : 'Looks good',
-          meta: { verdict },
+          meta: {
+            verdict,
+            rejectionReason: reviewCount === 1 ? 'Missing error handling' : '',
+            retryCount: prevReviews.length,
+          },
        };
      },
    });
@@ -97,12 +102,19 @@ describe('CodingTask WorkflowType', () => {

    await rule.tick(); // architect
    await rule.tick(); // coder
-    await rule.tick(); // reviewer (rejects)
+    await rule.tick(); // reviewer (rejects, retryCount=0)
+
+    const reviewerEvents = store.queryByKind('coding.reviewer');
+    expect(reviewerEvents.length).toBe(1);
+    const meta = JSON.parse(reviewerEvents[0].meta!);
+    expect(meta.verdict).toBe('rejected');
+    expect(meta.rejectionReason).toBe('Missing error handling');
+    expect(meta.retryCount).toBe(0);

    const r4 = await rule.tick();
    expect(r4.executed).toMatchObject([{ topicId: 'task-2', role: 'coder' }]);

-    await rule.tick(); // reviewer (approves)
+    await rule.tick(); // reviewer (approves, retryCount=1)
    const r6 = await rule.tick();
    expect(r6.executed).toMatchObject([{ topicId: 'task-2', role: 'closer' }]);

@@ -110,6 +122,46 @@ describe('CodingTask WorkflowType', () => {
    expect(r7.executed).toEqual([]);
  });

+  it('retryCount >= 3 forces transition to closer even if rejected', async () => {
+    setup();
+
+    let reviewRound = 0;
+    const codingTask = createCodingWorkflow({
+      reviewerFn: async (chain) => {
+        const prevReviews = chain.filter((m) => m.role === 'reviewer');
+        reviewRound++;
+        return {
+          content: `[mock] Rejection round ${reviewRound}`,
+          meta: {
+            verdict: 'rejected' as const,
+            rejectionReason: `Issue #${reviewRound}`,
+            retryCount: prevReviews.length,
+          },
+        };
+      },
+    });
+    const rule = createWorkflowRule(codingTask, store);
+
+    triggerCoding('task-retry', 'Retry limit test', 'Test max 3 retries', '/tmp/repo');
+
+    await rule.tick(); // architect
+    await rule.tick(); // coder (round 1)
+    await rule.tick(); // reviewer rejects, retryCount=0
+    await rule.tick(); // coder (round 2)
+    await rule.tick(); // reviewer rejects, retryCount=1
+    await rule.tick(); // coder (round 3)
+    await rule.tick(); // reviewer rejects, retryCount=2
+
+    await rule.tick(); // coder (round 4 — last retry)
+    await rule.tick(); // reviewer rejects, retryCount=3 → forces closer
+
+    const r = await rule.tick();
+    expect(r.executed).toMatchObject([{ topicId: 'task-retry', role: 'closer' }]);
+
+    const rEnd = await rule.tick();
+    expect(rEnd.executed).toEqual([]);
+  });
+
  it('adapter writes events, not roles (CAS content is string)', async () => {
    setup();
    const codingTask = createCodingWorkflow();
@@ -18,9 +18,17 @@ import {

 // ── Role Meta types ────────────────────────────────────────────

-export type ArchitectMeta = { targetFiles: string[] };
+export type ArchitectMeta = {
+  targetFiles: string[];
+  changes: Record<string, string>;
+  verification: string;
+};
 export type CoderMeta = { filesChanged: string[]; testsPassed: boolean };
-export type ReviewerMeta = { verdict: 'approved' | 'rejected' };
+export type ReviewerMeta = {
+  verdict: 'approved' | 'rejected';
+  rejectionReason: string;
+  retryCount: number;
+};
 export type CloserMeta = null;

 // ── Roles record ───────────────────────────────────────────────
@@ -40,7 +48,14 @@ const defaultArchitect: Role<ArchitectMeta> = async (chain) => {
  const targetFiles = ['src/main.ts', 'src/utils.ts'];
  return {
    content: `[mock] Analysis for "${title}": ${startMsg?.content ?? ''}`,
-    meta: { targetFiles },
+    meta: {
+      targetFiles,
+      changes: {
+        'src/main.ts': 'Update main entry point',
+        'src/utils.ts': 'Add utility helpers',
+      },
+      verification: 'Run `bun test` and verify all tests pass',
+    },
  };
 };

@@ -55,10 +70,12 @@ const defaultCoder: Role<CoderMeta> = async (chain) => {
  };
 };

-const defaultReviewer: Role<ReviewerMeta> = async () => {
+const defaultReviewer: Role<ReviewerMeta> = async (chain) => {
+  const reviewerMsgs = chain.filter((m) => m.role === 'reviewer');
+  const retryCount = reviewerMsgs.length;
  return {
    content: '[mock] Code looks good',
-    meta: { verdict: 'approved' },
+    meta: { verdict: 'approved', rejectionReason: '', retryCount },
  };
 };

@@ -85,8 +102,11 @@ function codingModerator(
      return 'coder';
    case 'coder':
      return 'reviewer';
-    case 'reviewer':
-      return output.meta?.verdict === 'rejected' ? 'coder' : 'closer';
+    case 'reviewer': {
+      const rejected = output.meta?.verdict === 'rejected';
+      const retryCount = output.meta?.retryCount ?? 0;
+      return rejected && retryCount < 3 ? 'coder' : 'closer';
+    }
    case 'closer':
      return END;
  }
@@ -25,18 +25,19 @@ describe('createArchitectRole', () => {
    ];
  }

-  it('calls LLM and returns { content, meta } with targetFiles', async () => {
+  it('calls LLM and returns { content, meta } with targetFiles, changes, verification', async () => {
    const mockLlm: LlmClient = {
      chat: async () => ({
        content: JSON.stringify({
          analysis: 'Need to update main.ts',
          targetFiles: ['src/main.ts'],
+          changes: { 'src/main.ts': 'Fix the entry point' },
+          verification: 'Run bun test',
        }),
      }),
    };

    const role = createArchitectRole(mockLlm);
-    // store param is still in signature but pure role shouldn't need it
    const result = await role(
      makeChain('Fix the bug', 'There is a bug in main.ts', '/tmp/repo'),
      'test-1',
@@ -44,7 +45,11 @@ describe('createArchitectRole', () => {
    );

    expect(result.content).toBe('Need to update main.ts');
-    expect(result.meta).toEqual({ targetFiles: ['src/main.ts'] });
+    expect(result.meta).toEqual({
+      targetFiles: ['src/main.ts'],
+      changes: { 'src/main.ts': 'Fix the entry point' },
+      verification: 'Run bun test',
+    });
  });

  it('handles non-JSON LLM response gracefully', async () => {
@@ -60,6 +65,10 @@ describe('createArchitectRole', () => {
    );

    expect(result.content).toBe('Just do it');
-    expect(result.meta).toEqual({ targetFiles: [] });
+    expect(result.meta).toEqual({
+      targetFiles: [],
+      changes: {},
+      verification: 'Run tests and verify manually',
+    });
  });
 });
@@ -1,7 +1,9 @@
 /**
- * Architect role — LLM analyzes coding task and suggests approach.
+ * Architect role — LLM analyzes coding task and outputs structured design.
 * Built on createLlmRole factory (shared sandwich pattern).
 *
+ * Outputs: targetFiles, changes (file → description), verification criteria.
+ *
 * 小橘 🍊 (NEKO Team)
 */

@@ -12,8 +14,14 @@ import { createLlmRole } from './llm-role-factory.js';

 export function createArchitectRole(llmClient: LlmClient): Role<ArchitectMeta> {
  return createLlmRole<ArchitectMeta>(llmClient, {
-    systemPrompt:
-      'You are a software architect. Analyze the task and suggest target files and approach. Reply in JSON: {"analysis": "...", "targetFiles": ["..."]}',
+    systemPrompt: `You are a software architect. Analyze the task and produce a structured design.
+Reply in JSON with these exact fields:
+{
+  "analysis": "high-level summary",
+  "targetFiles": ["file1.ts", "file2.ts"],
+  "changes": { "file1.ts": "description of what to change", "file2.ts": "..." },
+  "verification": "how to verify the changes are correct (e.g. test commands, manual checks)"
+}`,
    buildUserMessage: (chain) => {
      const startMsg = chain.find((m) => m.role === '__start__');
      const title = startMsg?.meta?.title ?? 'unknown';
@@ -22,15 +30,29 @@ export function createArchitectRole(llmClient: LlmClient): Role<ArchitectMeta> {
      return `Task: ${title}\nDescription: ${description}\nRepo: ${repoDir}`;
    },
    parseResponse: (resp) => {
-      let parsed: { analysis?: string; targetFiles?: string[] };
+      let parsed: {
+        analysis?: string;
+        targetFiles?: string[];
+        changes?: Record<string, string>;
+        verification?: string;
+      };
      try {
        parsed = JSON.parse(resp.content ?? '{}');
      } catch {
-        parsed = { analysis: resp.content ?? 'No analysis', targetFiles: [] };
+        parsed = { analysis: resp.content ?? 'No analysis' };
+      }
+      const targetFiles = parsed.targetFiles ?? [];
+      const changes: Record<string, string> = parsed.changes ?? {};
+      if (Object.keys(changes).length === 0) {
+        for (const f of targetFiles) changes[f] = 'Update this file';
      }
      return {
        content: parsed.analysis ?? 'No analysis',
-        meta: { targetFiles: parsed.targetFiles ?? [] },
+        meta: {
+          targetFiles,
+          changes,
+          verification: parsed.verification ?? 'Run tests and verify manually',
+        },
      };
    },
  });
@@ -31,6 +31,21 @@ export function createCoderRole(opts: {
      const description = startMsg?.content ?? '';
      const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp';
      const architectMsg = chain.find((m) => m.role === 'architect');
+      const lastReviewerMsg = [...chain].reverse().find((m) => m.role === 'reviewer');
+
+      const changes = (architectMsg?.meta?.changes as Record<string, string>) ?? {};
+      const verification = (architectMsg?.meta?.verification as string) ?? '';
+      const targetFiles = (architectMsg?.meta?.targetFiles as string[]) ?? [];
+
+      const changesSection = Object.entries(changes)
+        .map(([file, desc]) => `- **${file}**: ${desc}`)
+        .join('\n');
+
+      let rejectionSection = '';
+      if (lastReviewerMsg?.meta?.verdict === 'rejected') {
+        const reason = (lastReviewerMsg.meta.rejectionReason as string) ?? 'No reason given';
+        rejectionSection = `\n## Previous Review Rejection\n${reason}\n\nFix the issues above before proceeding.\n`;
+      }

      const prompt = `## Task: ${title}

@@ -40,11 +55,17 @@ ${description}
 ${architectMsg?.content ?? 'None'}

 ## Target Files
-${((architectMsg?.meta?.targetFiles as string[]) ?? []).join(', ') || 'Not specified'}
+${targetFiles.join(', ') || 'Not specified'}

+## Per-File Changes
+${changesSection || 'Not specified'}
+
+## Verification Criteria
+${verification || 'Not specified'}
+${rejectionSection}
 ## Instructions
-Implement the changes. Do NOT modify any existing test files.
-Run tests if applicable. Commit your changes.`;
+Implement the changes as described above. Do NOT modify any existing test files.
+Run the verification criteria to confirm correctness. Commit your changes.`;

      return { prompt, cwd: repoDir };
    },
@@ -1,6 +1,9 @@
 /**
 * Reviewer role — LLM-Agent-LLM sandwich via agent executor.
 *
+ * Compares coder's actual changes against architect's design (changes + verification).
+ * Outputs verdict, rejectionReason (when rejected), and monotonic retryCount.
+ *
 * 小橘 🍊 (NEKO Team)
 */

@@ -24,32 +27,52 @@ export function createReviewerRole(opts: {
      agentBin: opts.agentBin ?? `${process.env.HOME}/.local/bin/agent`,
    });

-  return createAgentExecutorRole<ReviewerMeta>(agent, opts.llm, {
+  const innerRole = createAgentExecutorRole<ReviewerMeta>(agent, opts.llm, {
    prepPrompt: (chain, topicId) => {
      const startMsg = chain.find((m) => m.role === '__start__');
      const title = startMsg?.meta?.title ?? topicId;
      const repoDir = (startMsg?.meta?.repoDir as string) ?? '/tmp';
+      const architectMsg = chain.find((m) => m.role === 'architect');
      const coderMsg = [...chain].reverse().find((m) => m.role === 'coder');
+      const reviewerMsgs = chain.filter((m) => m.role === 'reviewer');
+      const retryCount = reviewerMsgs.length;

-      const prompt = `## Code Review: ${title}
+      const architectChanges = (architectMsg?.meta?.changes as Record<string, string>) ?? {};
+      const architectVerification = (architectMsg?.meta?.verification as string) ?? '';
+      const changesSpec = Object.entries(architectChanges)
+        .map(([file, desc]) => `- **${file}**: ${desc}`)
+        .join('\n');

-## What was done
+      const prompt = `## Code Review: ${title} (review round ${retryCount})
+
+## Architect's Design Specification
+### Expected Changes
+${changesSpec || 'Not specified'}
+
+### Verification Criteria
+${architectVerification || 'Not specified'}
+
+## What the Coder Actually Did
 ${coderMsg?.content ?? 'Unknown'}

-## Files changed
+## Files Changed by Coder
 ${((coderMsg?.meta?.filesChanged as string[]) ?? []).join(', ')}

 ## Instructions
-Review the recent changes for correctness, security, and code quality.
+1. Compare the coder's actual changes against the architect's expected changes above.
+2. Verify the coder addressed each file and change described in the spec.
+3. Check if the verification criteria can be satisfied.
+4. Review for correctness, security, and code quality.
 Do NOT modify any files. Only output your review.
-End with a clear verdict: APPROVED or REJECTED with reasons.`;
+End with a clear verdict: APPROVED or REJECTED.
+If REJECTED, explain specifically what is wrong or missing.`;

      return { prompt, cwd: repoDir };
    },

    parseMeta: {
      system:
-        'Extract the review verdict from this code review report. Call the extract_review_verdict tool.',
+        'Extract the review verdict and rejection reason from this code review report. Call the extract_review_verdict tool.',
      tool: {
        type: 'function',
        function: {
@@ -63,8 +86,12 @@ End with a clear verdict: APPROVED or REJECTED with reasons.`;
                enum: ['approved', 'rejected'],
                description: 'Final review verdict',
              },
+              rejectionReason: {
+                type: 'string',
+                description: 'Reason for rejection (empty string if approved)',
+              },
            },
-            required: ['verdict'],
+            required: ['verdict', 'rejectionReason'],
          },
        },
      },
@@ -72,17 +99,29 @@ End with a clear verdict: APPROVED or REJECTED with reasons.`;
        const parsed = JSON.parse(args);
        return {
          verdict: parsed.verdict === 'rejected' ? 'rejected' : 'approved',
+          rejectionReason: parsed.rejectionReason ?? '',
+          retryCount: 0,
        };
      },
      defaultMeta: (output) => {
-        // Last resort: keyword matching on the last 200 chars
        const tail = output.toLowerCase().slice(-200);
        return {
          verdict: tail.includes('reject')
            ? ('rejected' as const)
            : ('approved' as const),
+          rejectionReason: tail.includes('reject') ? output.slice(-500) : '',
+          retryCount: 0,
        };
      },
    },
  });
+
+  return async (chain, topicId, store) => {
+    const result = await innerRole(chain, topicId, store);
+    const retryCount = chain.filter((m) => m.role === 'reviewer').length;
+    return {
+      ...result,
+      meta: result.meta ? { ...result.meta, retryCount } : result.meta,
+    };
+  };
 }