From f741729b41e49f12da4af084637669b9bf497e58 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=98=9F=E6=9C=88?= <xingyue@shazhou.work>
Date: Sat, 30 May 2026 15:32:33 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20retrospect-workflow=20=E2=80=94=20add?=
 =?UTF-8?q?=20Phase=200=20validation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Check workflow exists in current project, block with wrong_project if not
- Compare thread's workflow hash vs current version
- If versions differ, diff and filter out already-fixed findings
- New status: wrong_project → $END with clear error message
---
 .workflows/retrospect-workflow.yaml | 42 +++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 11 deletions(-)
diff --git a/.workflows/retrospect-workflow.yaml b/.workflows/retrospect-workflow.yaml
index f561d9a..35e5f44 100644
--- a/.workflows/retrospect-workflow.yaml
+++ b/.workflows/retrospect-workflow.yaml
@@ -9,30 +9,45 @@ roles:
     procedure: |
       You receive a completed thread ID in your task prompt.
 
+      Phase 0 — Validation (must pass before any analysis):
+      1. Run `uwf step list <thread-id>` to get thread metadata including the workflow hash
+      2. Run `uwf workflow show <workflow-hash>` to get the workflow name
+      3. Verify the workflow exists locally: check `.workflows/<name>.yaml` in the current repo
+         - If NOT found: output $status=wrong_project with the workflow name. Do NOT proceed.
+      4. Compare the thread's workflow hash against the current registered version:
+         - Run `uwf workflow show <name>` to get the current hash
+         - If hashes differ: the thread ran on an older version. Note this — you will need to diff versions after analysis.
+
       Phase 1 — Overview scan:
-      1. Run `uwf step list <thread-id>` to get all steps with role, status, durationMs, and token usage
-      2. For each step, compute a health signal:
+      5. From the step list, compute a health signal for each step:
          - Duration: flag if >2x the median of other steps
          - Output tokens: flag if >2x the median
          - Status flow: flag non-happy-path transitions (rejected, fix_code, fix_spec, hook_failed)
          - Step count: flag if the same role appears more than expected (indicates loops)
-      3. If no anomalies found, output $status=clean with a brief summary
+      6. If no anomalies found AND versions match: output $status=clean
+      7. If no anomalies found BUT versions differ:
+         - Diff the two workflow versions to check if any procedure changes are relevant
+         - If the current version already addresses potential concerns: output $status=clean with a note
+         - Otherwise: proceed to Phase 2
 
       Phase 2 — Targeted deep-dive (only for flagged steps):
-      4. For each flagged step, run `uwf step show <hash>` to get the detail with turns
-      5. Analyze the turn sequence for:
+      8. For each flagged step, run `uwf step show <hash>` to get the detail with turns
+      9. Analyze the turn sequence for:
          - Repeated tool calls with the same or similar input (blind retries)
          - Tool errors followed by no strategy change (same approach retried)
          - Unnecessary exploration (reading files or running commands unrelated to the task)
          - Hallucinated commands or flags (commands that don't exist or wrong syntax)
          - Excessive turns before reaching the goal
-      6. For each finding, record:
-         - Which role and step hash
-         - What happened (specific turn indices and commands)
-         - Root cause hypothesis (procedure gap, missing pitfall, unclear instruction)
-         - Suggested fix (what to add/change in the procedure)
+      10. For each finding, record:
+          - Which role and step hash
+          - What happened (specific turn indices and commands)
+          - Root cause hypothesis (procedure gap, missing pitfall, unclear instruction)
+          - Suggested fix (what to add/change in the procedure)
+      11. If versions differ: compare findings against the version diff.
+          Mark any finding that is already fixed in the current version as "resolved_in_current".
+          Only report findings that are NOT yet addressed.
 
-      Output a structured findings report. Set $status=clean if nothing found, or $status=findings if issues detected.
+      Output a structured findings report. Set $status=clean if nothing actionable, $status=findings if unresolved issues exist, or $status=wrong_project if the workflow doesn't belong here.
     output: "A findings report with per-issue root cause and suggested procedure fixes. Set $status to clean or findings (with report hash)."
     frontmatter:
       oneOf:
@@ -45,6 +60,10 @@ roles:
             report: { type: string }
             targetWorkflow: { type: string }
           required: [$status, report, targetWorkflow]
+        - properties:
+            $status: { const: "wrong_project" }
+            workflowName: { type: string }
+          required: [$status, workflowName]
   proposer:
     description: "Translates findings into concrete workflow edits"
     goal: "You are a workflow improvement proposer. You read the analyst's findings and produce specific, minimal edits to the workflow YAML."
@@ -186,6 +205,7 @@ graph:
   analyst:
     clean: { role: "$END", prompt: "No issues found. Thread executed cleanly." }
     findings: { role: "proposer", prompt: "Findings report: {{{report}}}. Target workflow: {{{targetWorkflow}}}. Propose minimal edits." }
+    wrong_project: { role: "$END", prompt: "Thread uses workflow '{{{workflowName}}}' which does not exist in this project. Run retrospect from the correct repo." }
   proposer:
     no_action: { role: "$END", prompt: "No actionable changes needed: {{{reason}}}." }
     ready: { role: "developer", prompt: "Apply the change plan (CAS hash: {{{plan}}}) to the workflow definitions in this repo." }