Merge pull request 'improve: solve-issue — fix hallucination patterns (thread 06F7FSTXQGY3D5CY5YPQFK2Y3W)' (#579) from retrospect/solve-issue-fixes into main

This commit is contained in:
2026-05-30 08:57:58 +00:00
2 changed files with 59 additions and 1 deletions
+17 -1
View File
@@ -61,6 +61,17 @@ roles:
9. Implement the code to make tests pass 9. Implement the code to make tests pass
10. Ensure `bun run build` passes with no errors 10. Ensure `bun run build` passes with no errors
11. Run `bun test` to verify all tests pass 11. Run `bun test` to verify all tests pass
- If tests fail on first run:
* Read the test output carefully for missing imports or setup issues
* Check if you're running tests from the correct working directory (package root vs workspace root)
* Fix the immediate issue and rerun ONCE
* If tests still fail after 2 attempts: check the test spec for ambiguities
* If stuck after 3 test cycles: set $status=failed with detailed error report rather than continuing blind retries
12. MANDATORY VERIFICATION before reporting done:
- Run `git branch --show-current` and confirm branch name matches expected
- Run `git status` and verify changed files exist
- Run `ls -la <key-implementation-files>` to verify they exist on disk
- If ANY verification fails: retry the implementation, do NOT report done
If you cannot complete the implementation (e.g. the issue is too complex, blocked by external factors, If you cannot complete the implementation (e.g. the issue is too complex, blocked by external factors,
or repeated attempts fail), set $status=failed with a reason. or repeated attempts fail), set $status=failed with a reason.
@@ -85,7 +96,12 @@ roles:
procedure: | procedure: |
The worktree path is provided in your task prompt. cd into it first. The worktree path is provided in your task prompt. cd into it first.
Before reviewing, verify the git branch: CRITICAL: You MUST execute every verification command below. Do NOT report results without running the actual commands. Do NOT rely on prior context or assumptions.
Before reviewing, verify the worktree and branch exist:
0. Run `cd <worktree-path> && pwd` to confirm the path is accessible
- If the cd fails: the worktree truly doesn't exist, reject with that reason
- If the cd succeeds: proceed with step 1 below
1. Run `git branch --show-current` — confirm the branch name references the issue number being worked on 1. Run `git branch --show-current` — confirm the branch name references the issue number being worked on
2. If the branch doesn't correspond to the issue, flag it in your output and reject 2. If the branch doesn't correspond to the issue, flag it in your output and reject
@@ -103,4 +103,46 @@ describe("solve-issue workflow: tea pr create worktree fix", () => {
expect(committedVariant).toBeDefined(); expect(committedVariant).toBeDefined();
expect(committedVariant.required).toContain("$status"); expect(committedVariant.required).toContain("$status");
}); });
test("developer procedure should include mandatory verification step", async () => {
const yamlContent = await readFile(workflowPath, "utf-8");
const workflow = parse(yamlContent) as WorkflowPayload;
const developerProcedure = workflow.roles.developer?.procedure;
expect(developerProcedure).toBeDefined();
// Verify the procedure includes mandatory verification step
expect(developerProcedure).toContain("MANDATORY VERIFICATION");
expect(developerProcedure).toContain("git branch --show-current");
expect(developerProcedure).toContain("git status");
expect(developerProcedure).toMatch(/ls -la|verify.*exist/i);
});
test("reviewer procedure should enforce worktree path verification", async () => {
const yamlContent = await readFile(workflowPath, "utf-8");
const workflow = parse(yamlContent) as WorkflowPayload;
const reviewerProcedure = workflow.roles.reviewer?.procedure;
expect(reviewerProcedure).toBeDefined();
// Verify the procedure includes critical enforcement
expect(reviewerProcedure).toContain("CRITICAL");
expect(reviewerProcedure).toMatch(/cd.*pwd/);
expect(reviewerProcedure).toContain(
"Do NOT report results without running the actual commands",
);
});
test("developer procedure should include test debugging escalation", async () => {
const yamlContent = await readFile(workflowPath, "utf-8");
const workflow = parse(yamlContent) as WorkflowPayload;
const developerProcedure = workflow.roles.developer?.procedure;
expect(developerProcedure).toBeDefined();
// Verify the procedure includes test failure guidance
expect(developerProcedure).toMatch(/tests fail.*first run/i);
expect(developerProcedure).toMatch(/3 test cycles|after 3 attempts/i);
expect(developerProcedure).toContain("$status=failed");
});
}); });