From 0ece23f03e821abf5deb13fa9e5037745323bfa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Sat, 30 May 2026 08:40:34 +0000 Subject: [PATCH] =?UTF-8?q?improve:=20solve-issue=20=E2=80=94=20add=20mand?= =?UTF-8?q?atory=20verification=20and=20escalation=20steps?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes hallucination issues observed in thread 06F7FSTXQGY3D5CY5YPQFK2Y3W: 1. Developer self-verification (critical): Added step 12 requiring mandatory verification of branch, file existence, and git status before reporting done status. Prevents hallucinated completions without actual tool execution. 2. Reviewer hard-check enforcement (critical): Added critical warning and step 0 requiring cd/pwd verification before review. Prevents false rejections based on assumptions without actual path checks. 3. Test debugging escalation (medium): Added structured debugging guidance with escalation path after 3 test cycles. Prevents infinite retry loops by providing strategy and fail-fast guidance. Also added 3 test cases to verify the new procedure steps exist. Based on change plan 9EVZPDTS16PMG analyzing execution anomalies that resulted in 58% waste (13 of 23 minutes). Co-Authored-By: Claude Opus 4.6 --- .workflows/solve-issue.yaml | 18 +++++++- .../solve-issue-tea-worktree.test.ts | 42 +++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/.workflows/solve-issue.yaml b/.workflows/solve-issue.yaml index 2bc614e..4c1a79c 100644 --- a/.workflows/solve-issue.yaml +++ b/.workflows/solve-issue.yaml @@ -61,6 +61,17 @@ roles: 9. Implement the code to make tests pass 10. Ensure `bun run build` passes with no errors 11. Run `bun test` to verify all tests pass + - If tests fail on first run: + * Read the test output carefully for missing imports or setup issues + * Check if you're running tests from the correct working directory (package root vs workspace root) + * Fix the immediate issue and rerun ONCE + * If tests still fail after 2 attempts: check the test spec for ambiguities + * If stuck after 3 test cycles: set $status=failed with detailed error report rather than continuing blind retries + 12. MANDATORY VERIFICATION before reporting done: + - Run `git branch --show-current` and confirm branch name matches expected + - Run `git status` and verify changed files exist + - Run `ls -la ` to verify they exist on disk + - If ANY verification fails: retry the implementation, do NOT report done If you cannot complete the implementation (e.g. the issue is too complex, blocked by external factors, or repeated attempts fail), set $status=failed with a reason. @@ -85,7 +96,12 @@ roles: procedure: | The worktree path is provided in your task prompt. cd into it first. - Before reviewing, verify the git branch: + CRITICAL: You MUST execute every verification command below. Do NOT report results without running the actual commands. Do NOT rely on prior context or assumptions. + + Before reviewing, verify the worktree and branch exist: + 0. Run `cd && pwd` to confirm the path is accessible + - If the cd fails: the worktree truly doesn't exist, reject with that reason + - If the cd succeeds: proceed with step 1 below 1. Run `git branch --show-current` — confirm the branch name references the issue number being worked on 2. If the branch doesn't correspond to the issue, flag it in your output and reject diff --git a/packages/cli-workflow/src/__tests__/solve-issue-tea-worktree.test.ts b/packages/cli-workflow/src/__tests__/solve-issue-tea-worktree.test.ts index e9ae476..1a0e3b6 100644 --- a/packages/cli-workflow/src/__tests__/solve-issue-tea-worktree.test.ts +++ b/packages/cli-workflow/src/__tests__/solve-issue-tea-worktree.test.ts @@ -103,4 +103,46 @@ describe("solve-issue workflow: tea pr create worktree fix", () => { expect(committedVariant).toBeDefined(); expect(committedVariant.required).toContain("$status"); }); + + test("developer procedure should include mandatory verification step", async () => { + const yamlContent = await readFile(workflowPath, "utf-8"); + const workflow = parse(yamlContent) as WorkflowPayload; + + const developerProcedure = workflow.roles.developer?.procedure; + expect(developerProcedure).toBeDefined(); + + // Verify the procedure includes mandatory verification step + expect(developerProcedure).toContain("MANDATORY VERIFICATION"); + expect(developerProcedure).toContain("git branch --show-current"); + expect(developerProcedure).toContain("git status"); + expect(developerProcedure).toMatch(/ls -la|verify.*exist/i); + }); + + test("reviewer procedure should enforce worktree path verification", async () => { + const yamlContent = await readFile(workflowPath, "utf-8"); + const workflow = parse(yamlContent) as WorkflowPayload; + + const reviewerProcedure = workflow.roles.reviewer?.procedure; + expect(reviewerProcedure).toBeDefined(); + + // Verify the procedure includes critical enforcement + expect(reviewerProcedure).toContain("CRITICAL"); + expect(reviewerProcedure).toMatch(/cd.*pwd/); + expect(reviewerProcedure).toContain( + "Do NOT report results without running the actual commands", + ); + }); + + test("developer procedure should include test debugging escalation", async () => { + const yamlContent = await readFile(workflowPath, "utf-8"); + const workflow = parse(yamlContent) as WorkflowPayload; + + const developerProcedure = workflow.roles.developer?.procedure; + expect(developerProcedure).toBeDefined(); + + // Verify the procedure includes test failure guidance + expect(developerProcedure).toMatch(/tests fail.*first run/i); + expect(developerProcedure).toMatch(/3 test cycles|after 3 attempts/i); + expect(developerProcedure).toContain("$status=failed"); + }); });