diff --git a/examples/solve-issue.yaml b/examples/solve-issue.yaml index 3930679..617c951 100644 --- a/examples/solve-issue.yaml +++ b/examples/solve-issue.yaml @@ -1,92 +1,198 @@ name: "solve-issue" -description: "End-to-end issue resolution" +description: "TDD-driven issue resolution for small, focused changes. Loop protection relies on engine maxRounds." roles: planner: - description: "Creates implementation plan" - goal: "You are a planning agent. You analyze issues and create implementation plans grounded in the actual codebase." + description: "Analyzes issue and outputs a TDD test spec" + goal: "You are a planning agent. You analyze Gitea issues and produce a TDD test specification that downstream roles will implement and verify." capabilities: - issue-analysis - planning - - file-read - - shell procedure: | - 1. Locate the code repository: - - Check if the current working directory is the repo (look for package.json, .git, etc.) - - If the task mentions a repo URL, clone it first. - - If this is a new project, create the repo and note the path. - 2. Explore the codebase — read the relevant source files mentioned in the issue. Understand the current architecture, types, and conventions (check CLAUDE.md, CONTRIBUTING.md, .cursor/rules/). - 3. Identify which files need changes and what the changes should be, with specific code references. - 4. Output the plan with: - - `repoPath`: absolute path to the repository root - - `plan`: detailed implementation plan with file paths and code references - - `steps`: concrete action items for the developer - output: | - Provide repoPath, plan summary, and steps in the frontmatter. - The plan MUST reference actual file paths and code structures you found by reading the source. - Do NOT guess — if you haven't read a file, read it before referencing it. + On first run (no previous steps): + 1. Read the issue and all comments from Gitea using `tea issues -r ` + 2. Look for project conventions files (CLAUDE.md, CONTRIBUTING.md, .cursor/rules/) in the repo + 3. Assess whether the issue has enough information to produce a test spec + 4. If insufficient info: comment on the issue via `echo "..." | tea comment -r ` (skip if you already commented), then output $status=insufficient_info + 5. If sufficient: produce a detailed TDD test spec in markdown covering all scenarios + + On subsequent runs (bounced back by tester with fix_spec): + 1. Read the tester's output from the previous step to understand what's wrong with the spec + 2. Revise the test spec accordingly + + After producing the test spec: + 1. Store it via `uwf cas put-text ""` and capture the returned hash + 2. Put the hash in frontmatter.plan (required when $status=ready) + 3. Set repoPath to the absolute path of the repository root + output: "Output a brief summary of the test spec. Set $status to ready (with plan hash and repoPath) or insufficient_info." frontmatter: - type: object - properties: - $status: - enum: ["_"] - repoPath: - type: string - plan: - type: string - required: [$status, repoPath, plan] + oneOf: + - properties: + $status: { const: "ready" } + plan: { type: string } + repoPath: { type: string } + required: [$status, plan, repoPath] + - properties: + $status: { const: "insufficient_info" } + required: [$status] developer: - description: "Implements code changes" - goal: "You are a developer agent. You implement code changes according to plans." + description: "TDD implementation per test spec" + goal: "You are a developer agent. You implement code changes following TDD — write tests first, then implementation." capabilities: - - file-edit - - shell - - testing + - coding procedure: | - 1. Read the planner's output to get the repoPath and implementation plan. - 2. cd to the repoPath before making any changes. - 3. Create a feature branch from the default branch. - 4. Implement the plan — write code, tests, and ensure existing tests pass. - 5. Run the project's lint/check command (e.g. `bun run check`, `npm run lint`) and fix ALL errors before proceeding. Build and lint must pass cleanly. - 6. Commit your changes with a descriptive message referencing the issue. - output: "List all files changed and provide a summary of the implementation." + IMPORTANT: Always work in a git worktree, NEVER modify the main working directory directly. + The repo path and other details are provided in your task prompt. + + Before starting any work, set up an isolated worktree: + 1. cd into the repo path provided in your task prompt + 2. `git fetch origin` to get latest refs + 3. First time (no existing branch): + - `git worktree add .worktrees/fix/- -b fix/- origin/main` + - `cd .worktrees/fix/- && bun install` + 4. If bounced back from reviewer or tester (branch already exists): + - cd into the existing worktree under `.worktrees/fix/-` + - `git fetch origin && git rebase origin/main` + 5. ALL subsequent work must happen inside the worktree directory. + + Then implement TDD: + 6. Read the test spec from CAS: `uwf cas get ` (find the hash from the planner's output in your task prompt) + 7. If bounced back from reviewer or tester: read the previous role's feedback in your task prompt + 8. Write tests first based on the spec + 9. Implement the code to make tests pass + 10. Ensure `bun run build` passes with no errors + 11. Run `bun test` to verify all tests pass + + If you cannot complete the implementation (e.g. the issue is too complex, blocked by external factors, + or repeated attempts fail), set $status=failed with a reason. + output: "List all files changed and provide a summary. Set $status to done (with branch/worktree), or failed (with reason)." frontmatter: - type: object - properties: - $status: - enum: ["_"] - filesChanged: - type: array - items: - type: string - summary: - type: string - required: [$status, filesChanged, summary] + oneOf: + - properties: + $status: { const: "done" } + branch: { type: string } + worktree: { type: string } + required: [$status, branch, worktree] + - properties: + $status: { const: "failed" } + reason: { type: string } + required: [$status, reason] reviewer: - description: "Reviews code changes" - goal: "You are a code reviewer. You review implementations for correctness and quality." + description: "Code standards compliance check" + goal: "You are a code reviewer. You verify code standards compliance — NOT functionality (that's the tester's job)." capabilities: - code-review - static-analysis procedure: | - 1. Run hard checks first — build (`bun run build` or equivalent) and lint (`bunx biome check .` or equivalent) MUST pass with zero errors. If they fail, reject immediately. - 2. Then review code quality: correctness, edge cases, naming, project conventions (CLAUDE.md), and test coverage. - 3. Only reject for hard check failures or genuine correctness/security issues. Style suggestions alone should not block approval. - output: "Approve or reject with detailed comments explaining your decision." + The worktree path is provided in your task prompt. cd into it first. + + Before reviewing, verify the git branch: + 1. Run `git branch --show-current` — confirm the branch name references the issue number being worked on + 2. If the branch doesn't correspond to the issue, flag it in your output and reject + + Then perform code review: + Hard checks (must all pass): + 3. `bun run build` — no build errors + 4. `bunx biome check` — no lint violations + 5. TypeScript strict mode — no type errors + + Soft checks (review against project conventions if CLAUDE.md / .cursor/rules exist): + - Naming conventions, module boundaries, code style + - No `console.log` in production code + - No dynamic imports in production code + + Only review standards compliance. Do NOT test functionality. + If rejecting, you MUST explain the specific reason in your output. + output: "Explain your decision with specific file/line references. Set $status to approved (with branch/worktree) or rejected (with comments)." frontmatter: - type: object - properties: - $status: - enum: ["approved", "rejected"] - comments: - type: string - required: [$status, comments] + oneOf: + - properties: + $status: { const: "approved" } + branch: { type: string } + worktree: { type: string } + required: [$status, branch, worktree] + - properties: + $status: { const: "rejected" } + comments: { type: string } + worktree: { type: string } + required: [$status, comments, worktree] + tester: + description: "Functional correctness verification" + goal: "You are a tester agent. You verify that the implementation correctly satisfies every scenario in the test spec." + capabilities: + - testing + procedure: | + The worktree path is provided in your task prompt. cd into it first. + + 1. Run `bun test` for automated test verification + 2. Read the test spec from CAS: `uwf cas get ` (find the hash from the planner step in the thread history) + 3. Verify each scenario in the spec is covered and passing + 4. Determine outcome: + - passed: all scenarios verified, tests pass + - fix_code: tests fail or implementation doesn't match spec → send back to developer + - fix_spec: the spec itself is wrong or incomplete → send back to planner + output: "Report test results per scenario. Set $status to passed (with branch/worktree), fix_code (with report), or fix_spec (with report)." + frontmatter: + oneOf: + - properties: + $status: { const: "passed" } + branch: { type: string } + worktree: { type: string } + required: [$status, branch, worktree] + - properties: + $status: { const: "fix_code" } + report: { type: string } + required: [$status, report] + - properties: + $status: { const: "fix_spec" } + report: { type: string } + required: [$status, report] + committer: + description: "Commits and creates PR" + goal: "You are a committer agent. You create a clean commit and push a PR linking the original issue." + capabilities: [] + procedure: | + The worktree path, branch name, and repo info are provided in your task prompt. + cd into the worktree first. + + Note: You inherit the developer's worktree and branch. Do NOT create a new branch. + 1. Stage all changes: `git add -A` + 2. Commit with a descriptive message referencing the issue: `git commit -m "type: description\n\nFixes #N"` + 3. Push the branch: `git push -u origin ` + - If push hook fails: capture the error log in your output, mark hook_failed + 4. On push success: create a PR via `tea pr create --repo --title "..." --description "..."` + - Extract owner/repo from: `git remote get-url origin | sed 's/.*[:/]\([^/]*\/[^.]*\).*/\1/'` + - PR description must include: What / Why / Changes / Ref sections, with `Fixes #N` in Ref + - On tea failure: capture stderr/stdout, include PR details for manual creation, mark hook_failed + 5. After PR creation, clean up the worktree: + - cd to the repo root (parent of .worktrees) + - `git worktree remove ` + output: "Include PR URL on success or error log on failure. Set $status to committed (with prUrl) or hook_failed (with error)." + frontmatter: + oneOf: + - properties: + $status: { const: "committed" } + prUrl: { type: string } + required: [$status, prUrl] + - properties: + $status: { const: "hook_failed" } + error: { type: string } + required: [$status, error] graph: $START: - _: { role: "planner", prompt: "Analyze the issue described in the task and produce a detailed implementation plan." } + _: { role: "planner", prompt: "Analyze the issue and produce an implementation plan." } planner: - _: { role: "developer", prompt: "Implement the plan from the planner. Write code, tests, and ensure existing tests pass." } + insufficient_info: { role: "$END", prompt: "Insufficient information to proceed; end the workflow." } + ready: { role: "developer", prompt: "Implement the TDD test spec (CAS hash: {{{plan}}}) in repo {{{repoPath}}}." } developer: - _: { role: "reviewer", prompt: "Review the developer's implementation against the plan for correctness and quality." } + done: { role: "reviewer", prompt: "Review branch {{{branch}}} at {{{worktree}}} for code standards compliance." } + failed: { role: "$END", prompt: "Developer failed: {{{reason}}}. Ending workflow." } reviewer: - approved: { role: "$END", prompt: "The review passed. Complete the workflow." } - rejected: { role: "developer", prompt: "The reviewer rejected your implementation. Read their feedback and fix the issues: {{{comments}}}" } + rejected: { role: "developer", prompt: "Reviewer rejected: {{{comments}}}. Fix the issues in repo {{{worktree}}}." } + approved: { role: "tester", prompt: "Review passed. Run tests on branch {{{branch}}} at {{{worktree}}}." } + tester: + fix_code: { role: "developer", prompt: "Tests found code issues: {{{report}}}. Fix and re-submit." } + fix_spec: { role: "planner", prompt: "Tests found spec issues: {{{report}}}. Revise the test spec." } + passed: { role: "committer", prompt: "All tests passed. Commit and push branch {{{branch}}} from {{{worktree}}}." } + committer: + hook_failed: { role: "developer", prompt: "Push hook failed: {{{error}}}. Fix and re-submit." } + committed: { role: "$END", prompt: "PR created: {{{prUrl}}}. Workflow complete." } diff --git a/packages/cli-workflow/package.json b/packages/cli-workflow/package.json index bfaad87..66a40d7 100644 --- a/packages/cli-workflow/package.json +++ b/packages/cli-workflow/package.json @@ -11,8 +11,8 @@ "uwf": "./src/cli.ts" }, "dependencies": { - "@uncaged/json-cas": "^0.5.2", - "@uncaged/json-cas-fs": "^0.5.2", + "@uncaged/json-cas": "^0.5.3", + "@uncaged/json-cas-fs": "^0.5.3", "@uncaged/workflow-protocol": "workspace:^", "@uncaged/workflow-util": "workspace:^", "@uncaged/workflow-util-agent": "workspace:^", diff --git a/packages/cli-workflow/src/__tests__/validate-semantic.test.ts b/packages/cli-workflow/src/__tests__/validate-semantic.test.ts index 98a4687..52d3137 100644 --- a/packages/cli-workflow/src/__tests__/validate-semantic.test.ts +++ b/packages/cli-workflow/src/__tests__/validate-semantic.test.ts @@ -250,6 +250,110 @@ describe("Suite 3: Status-Edge Consistency", () => { }); }); +describe("Suite 3b: Enum-Based Multi-Exit", () => { + test("3b.1 enum multi-exit passes with matching graph keys", () => { + const wf = makeWorkflow(); + wf.roles.reviewer = { + ...wf.roles.reviewer, + frontmatter: { + type: "object", + properties: { + $status: { enum: ["approved", "rejected"] }, + comments: { type: "string" }, + }, + required: ["$status", "comments"], + } as unknown as string, + }; + wf.graph.reviewer = { + approved: { role: "$END", prompt: "Done" }, + rejected: { role: "writer", prompt: "Fix: {{{comments}}}" }, + }; + const errors = validateWorkflow(wf); + expect(errors).toEqual([]); + }); + + test("3b.2 enum multi-exit with extra graph key", () => { + const wf = makeWorkflow(); + wf.roles.reviewer = { + ...wf.roles.reviewer, + frontmatter: { + type: "object", + properties: { + $status: { enum: ["approved", "rejected"] }, + comments: { type: "string" }, + }, + required: ["$status", "comments"], + } as unknown as string, + }; + wf.graph.reviewer = { + approved: { role: "$END", prompt: "Done" }, + rejected: { role: "writer", prompt: "Fix" }, + timeout: { role: "$END", prompt: "Timed out" }, + }; + const errors = validateWorkflow(wf); + expect(errors.some((e) => e.includes("extra status keys: timeout"))).toBe(true); + }); + + test("3b.3 enum multi-exit with missing graph key", () => { + const wf = makeWorkflow(); + wf.roles.reviewer = { + ...wf.roles.reviewer, + frontmatter: { + type: "object", + properties: { + $status: { enum: ["approved", "rejected"] }, + comments: { type: "string" }, + }, + required: ["$status", "comments"], + } as unknown as string, + }; + wf.graph.reviewer = { + approved: { role: "$END", prompt: "Done" }, + }; + const errors = validateWorkflow(wf); + expect(errors.some((e) => e.includes("missing status keys: rejected"))).toBe(true); + }); + + test("3b.4 enum with single value (not multi-exit) treated as single-exit", () => { + const wf = makeWorkflow(); + wf.roles.writer = { + ...wf.roles.writer, + frontmatter: { + type: "object", + properties: { + $status: { enum: ["_"] }, + plan: { type: "string" }, + }, + required: ["$status", "plan"], + } as unknown as string, + }; + wf.graph.writer = { _: { role: "reviewer", prompt: "Review: {{{plan}}}" } }; + const errors = validateWorkflow(wf); + expect(errors).toEqual([]); + }); + + test("3b.5 enum multi-exit mustache var not in frontmatter", () => { + const wf = makeWorkflow(); + wf.roles.reviewer = { + ...wf.roles.reviewer, + frontmatter: { + type: "object", + properties: { + $status: { enum: ["approved", "rejected"] }, + comments: { type: "string" }, + }, + required: ["$status", "comments"], + } as unknown as string, + }; + wf.graph.reviewer = { + approved: { role: "$END", prompt: "Done: {{{nonexistent}}}" }, + rejected: { role: "writer", prompt: "Fix: {{{comments}}}" }, + }; + const errors = validateWorkflow(wf); + expect(errors.some((e) => e.includes("nonexistent") && e.includes("not found"))).toBe(true); + }); +}); + describe("Suite 4: Mustache Template Variable Existence", () => { test("4.1 prompt references nonexistent variable (single-exit)", () => { const wf = makeWorkflow(); diff --git a/packages/cli-workflow/src/cli.ts b/packages/cli-workflow/src/cli.ts index 12ed805..6e84c91 100755 --- a/packages/cli-workflow/src/cli.ts +++ b/packages/cli-workflow/src/cli.ts @@ -55,8 +55,7 @@ program .description( "Stateless workflow CLI\n\n" + "Four-layer architecture:\n" + - " workflow → thread → step → turn\n" + - " 模板定义 执行实例 单步结果 agent内部交互", + " workflow → thread → step → turn", ) .version(pkg.default.version, "-V, --version"); program.option("--format ", "Output format: json or yaml", "json"); diff --git a/packages/cli-workflow/src/validate-semantic.ts b/packages/cli-workflow/src/validate-semantic.ts index aacc6bf..5c38393 100644 --- a/packages/cli-workflow/src/validate-semantic.ts +++ b/packages/cli-workflow/src/validate-semantic.ts @@ -23,6 +23,28 @@ function isOneOfSchema(fm: unknown): fm is SchemaObj & { oneOf: SchemaObj[] } { return Array.isArray(obj.oneOf); } +/** Check if a frontmatter schema uses enum-based multi-exit ($status with multiple enum values). */ +function isEnumMultiExit(fm: unknown): boolean { + if (typeof fm !== "object" || fm === null) return false; + const obj = fm as SchemaObj; + const props = obj.properties as Record | undefined; + if (!props?.$status) return false; + const statusDef = props.$status; + if (!Array.isArray(statusDef.enum)) return false; + // Filter out "_" (wildcard) — if remaining values > 1, it's multi-exit + const statuses = (statusDef.enum as string[]).filter((s) => s !== "_"); + return statuses.length > 1; +} + +/** Extract status values from an enum-based $status field. */ +function getEnumStatuses(fm: SchemaObj): string[] { + const props = fm.properties as Record | undefined; + if (!props?.$status) return []; + const statusDef = props.$status; + if (!Array.isArray(statusDef.enum)) return []; + return (statusDef.enum as string[]).filter((s) => s !== "_"); +} + /** Get property names from a schema object. */ function getPropertyNames(schema: SchemaObj): Set { const props = schema.properties; @@ -230,6 +252,11 @@ function checkRoleConsistency(payload: WorkflowPayload, errors: string[]): void checkOneOfDiscriminant(roleName, variants, statuses, errors); checkMultiExitEdges(roleName, graphKeys, new Set(statuses), errors); checkMultiExitMustache(roleName, graphEntry, variants, errors); + } else if (isEnumMultiExit(fm)) { + const statuses = getEnumStatuses(fm as SchemaObj); + checkMultiExitEdges(roleName, graphKeys, new Set(statuses), errors); + // For enum-based schemas, mustache vars come from the flat properties + checkSingleExitMustache(roleName, graphEntry, fm as SchemaObj, errors); } else { checkSingleExitRole(roleName, graphKeys, graphEntry, fm as SchemaObj | null, errors); } @@ -265,6 +292,27 @@ function checkSingleExitRole( } } +/** Check mustache vars in all edge prompts against flat schema properties. */ +function checkSingleExitMustache( + roleName: string, + graphEntry: Record, + fm: SchemaObj, + errors: string[], +): void { + const propNames = getPropertyNames(fm); + for (const [status, target] of Object.entries(graphEntry)) { + const vars = extractMustacheVars(target.prompt); + for (const v of vars) { + if (v === "$status") continue; + if (!propNames.has(v)) { + errors.push( + `prompt variable "${v}" in graph[${roleName}][${status}] not found in role "${roleName}" frontmatter`, + ); + } + } + } +} + /** * Validate a parsed WorkflowPayload for semantic correctness. * Returns an array of error messages. Empty array = valid. diff --git a/packages/workflow-agent-builtin/package.json b/packages/workflow-agent-builtin/package.json index 7e87e00..4ae4442 100644 --- a/packages/workflow-agent-builtin/package.json +++ b/packages/workflow-agent-builtin/package.json @@ -22,7 +22,7 @@ "test:ci": "bun test" }, "dependencies": { - "@uncaged/json-cas": "^0.5.2", + "@uncaged/json-cas": "^0.5.3", "@uncaged/workflow-util-agent": "workspace:^", "@uncaged/workflow-util": "workspace:^" }, diff --git a/packages/workflow-agent-claude-code/package.json b/packages/workflow-agent-claude-code/package.json index 0fc8069..39e2bb0 100644 --- a/packages/workflow-agent-claude-code/package.json +++ b/packages/workflow-agent-claude-code/package.json @@ -22,7 +22,7 @@ "test:ci": "bun test" }, "dependencies": { - "@uncaged/json-cas": "^0.5.2", + "@uncaged/json-cas": "^0.5.3", "@uncaged/workflow-util-agent": "workspace:^", "@uncaged/workflow-util": "workspace:^" }, diff --git a/packages/workflow-agent-hermes/package.json b/packages/workflow-agent-hermes/package.json index a35946f..33f5467 100644 --- a/packages/workflow-agent-hermes/package.json +++ b/packages/workflow-agent-hermes/package.json @@ -22,7 +22,7 @@ "test:ci": "bun test __tests__/*.test.ts" }, "dependencies": { - "@uncaged/json-cas": "^0.5.2", + "@uncaged/json-cas": "^0.5.3", "@uncaged/workflow-util-agent": "workspace:^", "@uncaged/workflow-protocol": "workspace:^", "@uncaged/workflow-util": "workspace:^" diff --git a/packages/workflow-protocol/package.json b/packages/workflow-protocol/package.json index 0ea5633..a4711f7 100644 --- a/packages/workflow-protocol/package.json +++ b/packages/workflow-protocol/package.json @@ -15,8 +15,8 @@ } }, "dependencies": { - "@uncaged/json-cas": "^0.5.2", - "@uncaged/json-cas-fs": "^0.5.2" + "@uncaged/json-cas": "^0.5.3", + "@uncaged/json-cas-fs": "^0.5.3" }, "devDependencies": { "typescript": "^5.8.3" diff --git a/packages/workflow-util-agent/package.json b/packages/workflow-util-agent/package.json index d2904e9..a2b4051 100644 --- a/packages/workflow-util-agent/package.json +++ b/packages/workflow-util-agent/package.json @@ -19,8 +19,8 @@ "test:ci": "bun test" }, "dependencies": { - "@uncaged/json-cas": "^0.5.2", - "@uncaged/json-cas-fs": "^0.5.2", + "@uncaged/json-cas": "^0.5.3", + "@uncaged/json-cas-fs": "^0.5.3", "@uncaged/workflow-protocol": "workspace:^", "@uncaged/workflow-util": "workspace:^", "dotenv": "^16.6.1",