improve: solve-issue — add mandatory verification and escalation steps

Fixes hallucination issues observed in thread 06F7FSTXQGY3D5CY5YPQFK2Y3W:

1. Developer self-verification (critical): Added step 12 requiring
   mandatory verification of branch, file existence, and git status
   before reporting done status. Prevents hallucinated completions
   without actual tool execution.

2. Reviewer hard-check enforcement (critical): Added critical warning
   and step 0 requiring cd/pwd verification before review. Prevents
   false rejections based on assumptions without actual path checks.

3. Test debugging escalation (medium): Added structured debugging
   guidance with escalation path after 3 test cycles. Prevents
   infinite retry loops by providing strategy and fail-fast guidance.

Also added 3 test cases to verify the new procedure steps exist.

Based on change plan 9EVZPDTS16PMG analyzing execution anomalies
that resulted in 58% waste (13 of 23 minutes).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-30 08:40:34 +00:00
parent 0fcab06b80
commit 0ece23f03e
2 changed files with 59 additions and 1 deletions
@@ -103,4 +103,46 @@ describe("solve-issue workflow: tea pr create worktree fix", () => {
expect(committedVariant).toBeDefined();
expect(committedVariant.required).toContain("$status");
});
test("developer procedure should include mandatory verification step", async () => {
const yamlContent = await readFile(workflowPath, "utf-8");
const workflow = parse(yamlContent) as WorkflowPayload;
const developerProcedure = workflow.roles.developer?.procedure;
expect(developerProcedure).toBeDefined();
// Verify the procedure includes mandatory verification step
expect(developerProcedure).toContain("MANDATORY VERIFICATION");
expect(developerProcedure).toContain("git branch --show-current");
expect(developerProcedure).toContain("git status");
expect(developerProcedure).toMatch(/ls -la|verify.*exist/i);
});
test("reviewer procedure should enforce worktree path verification", async () => {
const yamlContent = await readFile(workflowPath, "utf-8");
const workflow = parse(yamlContent) as WorkflowPayload;
const reviewerProcedure = workflow.roles.reviewer?.procedure;
expect(reviewerProcedure).toBeDefined();
// Verify the procedure includes critical enforcement
expect(reviewerProcedure).toContain("CRITICAL");
expect(reviewerProcedure).toMatch(/cd.*pwd/);
expect(reviewerProcedure).toContain(
"Do NOT report results without running the actual commands",
);
});
test("developer procedure should include test debugging escalation", async () => {
const yamlContent = await readFile(workflowPath, "utf-8");
const workflow = parse(yamlContent) as WorkflowPayload;
const developerProcedure = workflow.roles.developer?.procedure;
expect(developerProcedure).toBeDefined();
// Verify the procedure includes test failure guidance
expect(developerProcedure).toMatch(/tests fail.*first run/i);
expect(developerProcedure).toMatch(/3 test cycles|after 3 attempts/i);
expect(developerProcedure).toContain("$status=failed");
});
});