From 96fc3e220a7f7efc92c325367f31e9a500d00700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Thu, 7 May 2026 03:13:44 +0000 Subject: [PATCH 1/4] fix(solve-issue): handle one-shot coder completing all phases at once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The moderator now treats completedPhase matching the last planned phase name as full completion. Also recognizes sentinel values (all-done, all_done, complete) for robustness. Fixes #21 小橘 🍊(NEKO TeamοΌ‰ --- packages/workflow-role-coder/src/coder.ts | 4 +-- .../__tests__/solve-issue-template.test.ts | 22 +++++++++++++ .../src/moderator.ts | 32 ++++++++++++++++--- 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/packages/workflow-role-coder/src/coder.ts b/packages/workflow-role-coder/src/coder.ts index 37f732c..fe3dc1a 100644 --- a/packages/workflow-role-coder/src/coder.ts +++ b/packages/workflow-role-coder/src/coder.ts @@ -10,13 +10,13 @@ export const coderMetaSchema = z.object({ export type CoderMeta = z.infer; const CODER_SYSTEM = `You are a **coder**. Read the thread for the plan and work on the NEXT incomplete phase only. -Report which phase you completed. List the files you changed and summarize what you did.`; +Report which phase you completed using the planner's exact phase name. If you legitimately finish every remaining phase in this single turn, set completedPhase to the last phase name in the plan (the workflow treats that as full completion). List the files you changed and summarize what you did.`; export const coderRole: RoleDefinition = { description: "Implements the next incomplete planner phase and reports structured completion metadata.", systemPrompt: CODER_SYSTEM, extractPrompt: - "Extract which phase was completed, which files were changed, and a summary of the work done.", + "Extract completedPhase: the planner phase name finished this round (exact string from the plan). If multiple phases were finished in one round, use the last finished phase name. Extract filesChanged and a summary of the work.", schema: coderMetaSchema, }; diff --git a/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts b/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts index 919721f..6fbdd47 100644 --- a/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts +++ b/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts @@ -189,6 +189,28 @@ describe("solveIssueModerator", () => { ).toBe("reviewer"); }); + test("one-shot coder reports only last phase name β†’ reviewer (moderator treats as all phases done)", () => { + const phases: PlannerMeta["phases"] = [ + { name: "setup-branch", description: "branch", acceptance: "branch exists" }, + { name: "write-tests", description: "tests", acceptance: "tests pass" }, + { name: "verify", description: "verify", acceptance: "ok" }, + { name: "commit-and-pr", description: "pr", acceptance: "pr open" }, + ]; + expect( + solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("commit-and-pr")])), + ).toBe("reviewer"); + }); + + test("completedPhase sentinel when not a planned name β†’ reviewer", () => { + const phases: PlannerMeta["phases"] = [ + { name: "p1", description: "first", acceptance: "a1" }, + { name: "p2", description: "second", acceptance: "a2" }, + ]; + expect(solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("all-done")]))).toBe( + "reviewer", + ); + }); + test("incomplete phases β†’ END when max rounds exhausted", () => { const phases: PlannerMeta["phases"] = [ { name: "p1", description: "first", acceptance: "a1" }, diff --git a/packages/workflow-template-solve-issue/src/moderator.ts b/packages/workflow-template-solve-issue/src/moderator.ts index 20b41ab..3c73f02 100644 --- a/packages/workflow-template-solve-issue/src/moderator.ts +++ b/packages/workflow-template-solve-issue/src/moderator.ts @@ -3,6 +3,30 @@ import { END } from "@uncaged/workflow"; import type { SolveIssueMeta } from "./roles.js"; +const COMPLETED_PHASE_SENTINELS = new Set(["all-done", "all_done", "complete"]); + +function coderFinishedAllPlannedPhases( + phases: ReadonlyArray<{ name: string }>, + coderCompletedPhases: ReadonlyArray, +): boolean { + if (phases.length === 0) { + return true; + } + const plannedNames = new Set(phases.map((p) => p.name)); + const lastName = phases[phases.length - 1].name; + const explicit = new Set(coderCompletedPhases.filter((name) => plannedNames.has(name))); + if (phases.every((p) => explicit.has(p.name))) { + return true; + } + // One-shot runs often report only the final phase; treat that as the full plan done. + if (coderCompletedPhases.some((name) => name === lastName)) { + return true; + } + return coderCompletedPhases.some( + (name) => !plannedNames.has(name) && COMPLETED_PHASE_SENTINELS.has(name), + ); +} + function nextAfterCoder( ctx: ModeratorContext, maxRounds: number, @@ -12,10 +36,10 @@ function nextAfterCoder( return "reviewer"; } const phases = plannerStep.meta.phases; - const completedPhases = new Set( - ctx.steps.filter((s) => s.role === "coder").map((s) => s.meta.completedPhase), - ); - const allDone = phases.every((p) => completedPhases.has(p.name)); + const coderCompletedPhases = ctx.steps + .filter((s) => s.role === "coder") + .map((s) => s.meta.completedPhase); + const allDone = coderFinishedAllPlannedPhases(phases, coderCompletedPhases); if (allDone) { return "reviewer"; } From 172e9b34cc189bab74eecb7b5f8874d23a5a0e5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Thu, 7 May 2026 04:18:42 +0000 Subject: [PATCH 2/4] feat(planner): add hash and title fields to phase schema Each phase now carries a hash (Crockford Base32 identifier) and a one-line title alongside the existing name/description/acceptance. This gives agents immediate semantic context in the prompt without needing to load full phase details from CAS. Refs #23 --- packages/workflow-role-coder/src/coder.ts | 2 +- packages/workflow-role-planner/src/planner.ts | 6 +++-- .../__tests__/solve-issue-template.test.ts | 24 +++++++++---------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/packages/workflow-role-coder/src/coder.ts b/packages/workflow-role-coder/src/coder.ts index fe3dc1a..1a88a6c 100644 --- a/packages/workflow-role-coder/src/coder.ts +++ b/packages/workflow-role-coder/src/coder.ts @@ -10,7 +10,7 @@ export const coderMetaSchema = z.object({ export type CoderMeta = z.infer; const CODER_SYSTEM = `You are a **coder**. Read the thread for the plan and work on the NEXT incomplete phase only. -Report which phase you completed using the planner's exact phase name. If you legitimately finish every remaining phase in this single turn, set completedPhase to the last phase name in the plan (the workflow treats that as full completion). List the files you changed and summarize what you did.`; +Each planner phase is identified by a hash (8-char Crockford Base32) and a title (one-line summary). Report which phase you completed using the planner's exact phase name. If you legitimately finish every remaining phase in this single turn, set completedPhase to the last phase name in the plan (the workflow treats that as full completion). List the files you changed and summarize what you did.`; export const coderRole: RoleDefinition = { description: diff --git a/packages/workflow-role-planner/src/planner.ts b/packages/workflow-role-planner/src/planner.ts index 6ace1bf..adb12c5 100644 --- a/packages/workflow-role-planner/src/planner.ts +++ b/packages/workflow-role-planner/src/planner.ts @@ -2,6 +2,8 @@ import type { RoleDefinition } from "@uncaged/workflow"; import * as z from "zod/v4"; export const phaseSchema = z.object({ + hash: z.string(), + title: z.string(), name: z.string(), description: z.string(), acceptance: z.string(), @@ -15,7 +17,7 @@ export type PlannerMeta = z.infer; const PLANNER_SYSTEM = `You are a **planner** for a software task. Break the work into **sequential phases** the coder will execute one at a time. -Each phase must have: a short **name** (stable identifier), a **description** of what to do in that phase, and **acceptance** criteria for when that phase is done. +Each phase must have: a short **name** (stable identifier), a one-line **title** summarising the phase goal, a **hash** (8-char Crockford Base32 identifier unique within this plan), a **description** of what to do in that phase, and **acceptance** criteria for when that phase is done. Order phases so earlier steps unblock later ones. Cover root cause, edge cases, and verification across the phases. Do not emit separate file lists or a free-form "approach" field β€” put that detail inside phase descriptions.`; @@ -23,6 +25,6 @@ export const plannerRole: RoleDefinition = { description: "Breaks the task into sequential phases for the coder.", systemPrompt: PLANNER_SYSTEM, extractPrompt: - "Extract the implementation phases from the agent's analysis. Each phase needs a name, description, and acceptance criteria.", + "Extract the implementation phases from the agent's analysis. Each phase needs a hash (8-char Crockford Base32 unique within this plan), a title (one-line summary), a name, a description, and acceptance criteria.", schema: plannerMetaSchema, }; diff --git a/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts b/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts index 6fbdd47..e1cd604 100644 --- a/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts +++ b/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts @@ -16,11 +16,11 @@ import { createSolveIssueRun, solveIssueModerator } from "../src/index.js"; import type { SolveIssueMeta } from "../src/roles.js"; const DEFAULT_PHASES: PlannerMeta["phases"] = [ - { name: "phase-a", description: "Do the work", acceptance: "Done" }, + { hash: "4KNMR2PX", title: "Do the work", name: "phase-a", description: "Do the work", acceptance: "Done" }, ]; const EXPECT_PLANNER_META: PlannerMeta = { - phases: [{ name: "phase-1", description: "placeholder", acceptance: "placeholder" }], + phases: [{ hash: "7BQST3VW", title: "placeholder phase", name: "phase-1", description: "placeholder", acceptance: "placeholder" }], }; const EXPECT_CODER_META: CoderMeta = { @@ -179,8 +179,8 @@ describe("solveIssueModerator", () => { test("multiple planner phases β†’ coder until all complete, then reviewer", () => { const phases: PlannerMeta["phases"] = [ - { name: "p1", description: "first", acceptance: "a1" }, - { name: "p2", description: "second", acceptance: "a2" }, + { hash: "AA000001", title: "first phase", name: "p1", description: "first", acceptance: "a1" }, + { hash: "AA000002", title: "second phase", name: "p2", description: "second", acceptance: "a2" }, ]; expect(solveIssueModerator(makeCtx(20, [plannerStep(phases)]))).toBe("coder"); expect(solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("p1")]))).toBe("coder"); @@ -191,10 +191,10 @@ describe("solveIssueModerator", () => { test("one-shot coder reports only last phase name β†’ reviewer (moderator treats as all phases done)", () => { const phases: PlannerMeta["phases"] = [ - { name: "setup-branch", description: "branch", acceptance: "branch exists" }, - { name: "write-tests", description: "tests", acceptance: "tests pass" }, - { name: "verify", description: "verify", acceptance: "ok" }, - { name: "commit-and-pr", description: "pr", acceptance: "pr open" }, + { hash: "BB000001", title: "setup branch", name: "setup-branch", description: "branch", acceptance: "branch exists" }, + { hash: "BB000002", title: "write tests", name: "write-tests", description: "tests", acceptance: "tests pass" }, + { hash: "BB000003", title: "verify", name: "verify", description: "verify", acceptance: "ok" }, + { hash: "BB000004", title: "commit and pr", name: "commit-and-pr", description: "pr", acceptance: "pr open" }, ]; expect( solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("commit-and-pr")])), @@ -203,8 +203,8 @@ describe("solveIssueModerator", () => { test("completedPhase sentinel when not a planned name β†’ reviewer", () => { const phases: PlannerMeta["phases"] = [ - { name: "p1", description: "first", acceptance: "a1" }, - { name: "p2", description: "second", acceptance: "a2" }, + { hash: "CC000001", title: "first phase", name: "p1", description: "first", acceptance: "a1" }, + { hash: "CC000002", title: "second phase", name: "p2", description: "second", acceptance: "a2" }, ]; expect(solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("all-done")]))).toBe( "reviewer", @@ -213,8 +213,8 @@ describe("solveIssueModerator", () => { test("incomplete phases β†’ END when max rounds exhausted", () => { const phases: PlannerMeta["phases"] = [ - { name: "p1", description: "first", acceptance: "a1" }, - { name: "p2", description: "second", acceptance: "a2" }, + { hash: "DD000001", title: "first phase", name: "p1", description: "first", acceptance: "a1" }, + { hash: "DD000002", title: "second phase", name: "p2", description: "second", acceptance: "a2" }, ]; const steps: ModeratorContext["steps"] = [plannerStep(phases), coderStep("p1")]; expect(solveIssueModerator(makeCtx(3, steps))).toBe(END); From 4b44665c7e9c66436212ef67adef9a3b3073616a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Thu, 7 May 2026 04:30:19 +0000 Subject: [PATCH 3/4] feat(workflow): add thread-scoped CAS (Content-Addressable Storage) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1 of #23: - createThreadCas() core API: put/get/delete/list with XXH64 hashing - hashString() utility for string β†’ 13-char Crockford Base32 - CLI: uncaged-workflow cas get/put/list/rm subcommands - thread rm now cleans up .cas/ directory - 10 new tests for CAS operations Refs #23 --- packages/cli-workflow/src/cli-dispatch.ts | 95 ++++++++++++++++++ packages/cli-workflow/src/cmd-cas.ts | 67 +++++++++++++ packages/cli-workflow/src/cmd-thread.ts | 4 +- .../__tests__/solve-issue-template.test.ts | 98 ++++++++++++++++--- packages/workflow/__tests__/cas.test.ts | 92 +++++++++++++++++ packages/workflow/src/cas.ts | 70 +++++++++++++ packages/workflow/src/hash.ts | 7 ++ packages/workflow/src/index.ts | 3 +- 8 files changed, 422 insertions(+), 14 deletions(-) create mode 100644 packages/cli-workflow/src/cmd-cas.ts create mode 100644 packages/workflow/__tests__/cas.test.ts create mode 100644 packages/workflow/src/cas.ts diff --git a/packages/cli-workflow/src/cli-dispatch.ts b/packages/cli-workflow/src/cli-dispatch.ts index 2032eac..b88a54a 100644 --- a/packages/cli-workflow/src/cli-dispatch.ts +++ b/packages/cli-workflow/src/cli-dispatch.ts @@ -1,5 +1,6 @@ import { printCliError, printCliLine, printCliWarn } from "./cli-output.js"; import { cmdAdd, formatAddSuccess, parseAddArgv } from "./cmd-add.js"; +import { cmdCasGet, cmdCasList, cmdCasPut, cmdCasRm } from "./cmd-cas.js"; import { cmdFork, parseForkArgv } from "./cmd-fork.js"; import { cmdHistory } from "./cmd-history.js"; import { cmdKill } from "./cmd-kill.js"; @@ -33,6 +34,10 @@ function usage(): string { " uncaged-workflow thread ", " uncaged-workflow thread rm ", " uncaged-workflow fork [--from-role ]", + " uncaged-workflow cas get ", + " uncaged-workflow cas put ", + " uncaged-workflow cas list ", + " uncaged-workflow cas rm ", ].join("\n"); } @@ -276,6 +281,95 @@ async function dispatchFork(storageRoot: string, argv: string[]): Promise { + const threadId = rest[0]; + const hash = rest[1]; + if (threadId === undefined || hash === undefined || rest.length > 2) { + printCliError(`${usage()}\n\nerror: cas get requires `); + return 1; + } + const result = await cmdCasGet(storageRoot, threadId, hash); + if (!result.ok) { + printCliError(result.error); + return 1; + } + printCliLine(result.value); + return 0; +} + +async function dispatchCasPut(storageRoot: string, rest: string[]): Promise { + const threadId = rest[0]; + const content = rest[1]; + if (threadId === undefined || content === undefined || rest.length > 2) { + printCliError(`${usage()}\n\nerror: cas put requires `); + return 1; + } + const result = await cmdCasPut(storageRoot, threadId, content); + if (!result.ok) { + printCliError(result.error); + return 1; + } + printCliLine(result.value); + return 0; +} + +async function dispatchCasList(storageRoot: string, rest: string[]): Promise { + const threadId = rest[0]; + if (threadId === undefined || rest.length > 1) { + printCliError(`${usage()}\n\nerror: cas list requires `); + return 1; + } + const result = await cmdCasList(storageRoot, threadId); + if (!result.ok) { + printCliError(result.error); + return 1; + } + for (const hash of result.value) { + printCliLine(hash); + } + return 0; +} + +async function dispatchCasRm(storageRoot: string, rest: string[]): Promise { + const threadId = rest[0]; + const hash = rest[1]; + if (threadId === undefined || hash === undefined || rest.length > 2) { + printCliError(`${usage()}\n\nerror: cas rm requires `); + return 1; + } + const result = await cmdCasRm(storageRoot, threadId, hash); + if (!result.ok) { + printCliError(result.error); + return 1; + } + printCliLine(`removed cas entry ${hash}`); + return 0; +} + +const CAS_SUBCOMMAND_TABLE: Record< + string, + (storageRoot: string, rest: string[]) => Promise +> = { + get: dispatchCasGet, + put: dispatchCasPut, + list: dispatchCasList, + rm: dispatchCasRm, +}; + +async function dispatchCas(storageRoot: string, argv: string[]): Promise { + const sub = argv[0]; + if (sub === undefined) { + printCliError(`${usage()}\n\nerror: unknown cas subcommand: (none)`); + return 1; + } + const handler = CAS_SUBCOMMAND_TABLE[sub]; + if (handler === undefined) { + printCliError(`${usage()}\n\nerror: unknown cas subcommand: ${sub}`); + return 1; + } + return handler(storageRoot, argv.slice(1)); +} + type DispatchFn = (storageRoot: string, argv: string[]) => Promise; const COMMAND_TABLE: Record = { @@ -293,6 +387,7 @@ const COMMAND_TABLE: Record = { threads: dispatchThreads, thread: dispatchThreadBranch, fork: dispatchFork, + cas: dispatchCas, }; export async function runCli(storageRoot: string, argv: string[]): Promise { diff --git a/packages/cli-workflow/src/cmd-cas.ts b/packages/cli-workflow/src/cmd-cas.ts new file mode 100644 index 0000000..b62a427 --- /dev/null +++ b/packages/cli-workflow/src/cmd-cas.ts @@ -0,0 +1,67 @@ +import { dirname, join } from "node:path"; + +import { createThreadCas, err, ok, type Result } from "@uncaged/workflow"; + +import { resolveThreadDataPath } from "./thread-scan.js"; + +function resolveCasDir(threadDataPath: string, threadId: string): string { + return join(dirname(threadDataPath), `${threadId}.cas`); +} + +export async function cmdCasGet( + storageRoot: string, + threadId: string, + hash: string, +): Promise> { + const dataPath = await resolveThreadDataPath(storageRoot, threadId); + if (dataPath === null) { + return err(`thread not found: ${threadId}`); + } + const cas = createThreadCas(resolveCasDir(dataPath, threadId)); + const content = await cas.get(hash); + if (content === null) { + return err(`cas entry not found: ${hash}`); + } + return ok(content); +} + +export async function cmdCasPut( + storageRoot: string, + threadId: string, + content: string, +): Promise> { + const dataPath = await resolveThreadDataPath(storageRoot, threadId); + if (dataPath === null) { + return err(`thread not found: ${threadId}`); + } + const cas = createThreadCas(resolveCasDir(dataPath, threadId)); + const hash = await cas.put(content); + return ok(hash); +} + +export async function cmdCasList( + storageRoot: string, + threadId: string, +): Promise> { + const dataPath = await resolveThreadDataPath(storageRoot, threadId); + if (dataPath === null) { + return err(`thread not found: ${threadId}`); + } + const cas = createThreadCas(resolveCasDir(dataPath, threadId)); + const hashes = await cas.list(); + return ok(hashes); +} + +export async function cmdCasRm( + storageRoot: string, + threadId: string, + hash: string, +): Promise> { + const dataPath = await resolveThreadDataPath(storageRoot, threadId); + if (dataPath === null) { + return err(`thread not found: ${threadId}`); + } + const cas = createThreadCas(resolveCasDir(dataPath, threadId)); + await cas.delete(hash); + return ok(undefined); +} diff --git a/packages/cli-workflow/src/cmd-thread.ts b/packages/cli-workflow/src/cmd-thread.ts index af5c6b1..1422d89 100644 --- a/packages/cli-workflow/src/cmd-thread.ts +++ b/packages/cli-workflow/src/cmd-thread.ts @@ -1,4 +1,4 @@ -import { unlink } from "node:fs/promises"; +import { rm, unlink } from "node:fs/promises"; import { dirname, join } from "node:path"; import { err, ok, type Result } from "@uncaged/workflow"; @@ -33,10 +33,12 @@ export async function cmdThreadRemove( const dir = dirname(dataPath); const infoPath = join(dir, `${threadId}.info.jsonl`); const runningPath = join(dir, `${threadId}.running`); + const casPath = join(dir, `${threadId}.cas`); await unlink(dataPath); await unlink(infoPath).catch(() => {}); await unlink(runningPath).catch(() => {}); + await rm(casPath, { recursive: true, force: true }); return ok(undefined); } diff --git a/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts b/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts index e1cd604..2dd12f9 100644 --- a/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts +++ b/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts @@ -16,11 +16,25 @@ import { createSolveIssueRun, solveIssueModerator } from "../src/index.js"; import type { SolveIssueMeta } from "../src/roles.js"; const DEFAULT_PHASES: PlannerMeta["phases"] = [ - { hash: "4KNMR2PX", title: "Do the work", name: "phase-a", description: "Do the work", acceptance: "Done" }, + { + hash: "4KNMR2PX", + title: "Do the work", + name: "phase-a", + description: "Do the work", + acceptance: "Done", + }, ]; const EXPECT_PLANNER_META: PlannerMeta = { - phases: [{ hash: "7BQST3VW", title: "placeholder phase", name: "phase-1", description: "placeholder", acceptance: "placeholder" }], + phases: [ + { + hash: "7BQST3VW", + title: "placeholder phase", + name: "phase-1", + description: "placeholder", + acceptance: "placeholder", + }, + ], }; const EXPECT_CODER_META: CoderMeta = { @@ -179,8 +193,20 @@ describe("solveIssueModerator", () => { test("multiple planner phases β†’ coder until all complete, then reviewer", () => { const phases: PlannerMeta["phases"] = [ - { hash: "AA000001", title: "first phase", name: "p1", description: "first", acceptance: "a1" }, - { hash: "AA000002", title: "second phase", name: "p2", description: "second", acceptance: "a2" }, + { + hash: "AA000001", + title: "first phase", + name: "p1", + description: "first", + acceptance: "a1", + }, + { + hash: "AA000002", + title: "second phase", + name: "p2", + description: "second", + acceptance: "a2", + }, ]; expect(solveIssueModerator(makeCtx(20, [plannerStep(phases)]))).toBe("coder"); expect(solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("p1")]))).toBe("coder"); @@ -191,10 +217,34 @@ describe("solveIssueModerator", () => { test("one-shot coder reports only last phase name β†’ reviewer (moderator treats as all phases done)", () => { const phases: PlannerMeta["phases"] = [ - { hash: "BB000001", title: "setup branch", name: "setup-branch", description: "branch", acceptance: "branch exists" }, - { hash: "BB000002", title: "write tests", name: "write-tests", description: "tests", acceptance: "tests pass" }, - { hash: "BB000003", title: "verify", name: "verify", description: "verify", acceptance: "ok" }, - { hash: "BB000004", title: "commit and pr", name: "commit-and-pr", description: "pr", acceptance: "pr open" }, + { + hash: "BB000001", + title: "setup branch", + name: "setup-branch", + description: "branch", + acceptance: "branch exists", + }, + { + hash: "BB000002", + title: "write tests", + name: "write-tests", + description: "tests", + acceptance: "tests pass", + }, + { + hash: "BB000003", + title: "verify", + name: "verify", + description: "verify", + acceptance: "ok", + }, + { + hash: "BB000004", + title: "commit and pr", + name: "commit-and-pr", + description: "pr", + acceptance: "pr open", + }, ]; expect( solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("commit-and-pr")])), @@ -203,8 +253,20 @@ describe("solveIssueModerator", () => { test("completedPhase sentinel when not a planned name β†’ reviewer", () => { const phases: PlannerMeta["phases"] = [ - { hash: "CC000001", title: "first phase", name: "p1", description: "first", acceptance: "a1" }, - { hash: "CC000002", title: "second phase", name: "p2", description: "second", acceptance: "a2" }, + { + hash: "CC000001", + title: "first phase", + name: "p1", + description: "first", + acceptance: "a1", + }, + { + hash: "CC000002", + title: "second phase", + name: "p2", + description: "second", + acceptance: "a2", + }, ]; expect(solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("all-done")]))).toBe( "reviewer", @@ -213,8 +275,20 @@ describe("solveIssueModerator", () => { test("incomplete phases β†’ END when max rounds exhausted", () => { const phases: PlannerMeta["phases"] = [ - { hash: "DD000001", title: "first phase", name: "p1", description: "first", acceptance: "a1" }, - { hash: "DD000002", title: "second phase", name: "p2", description: "second", acceptance: "a2" }, + { + hash: "DD000001", + title: "first phase", + name: "p1", + description: "first", + acceptance: "a1", + }, + { + hash: "DD000002", + title: "second phase", + name: "p2", + description: "second", + acceptance: "a2", + }, ]; const steps: ModeratorContext["steps"] = [plannerStep(phases), coderStep("p1")]; expect(solveIssueModerator(makeCtx(3, steps))).toBe(END); diff --git a/packages/workflow/__tests__/cas.test.ts b/packages/workflow/__tests__/cas.test.ts new file mode 100644 index 0000000..679e242 --- /dev/null +++ b/packages/workflow/__tests__/cas.test.ts @@ -0,0 +1,92 @@ +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { mkdtemp, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { createThreadCas } from "../src/cas.js"; +import { hashString } from "../src/hash.js"; + +describe("createThreadCas", () => { + let casDir: string; + + beforeEach(async () => { + casDir = await mkdtemp(join(tmpdir(), "cas-test-")); + }); + + afterEach(async () => { + await rm(casDir, { recursive: true, force: true }); + }); + + test("put returns consistent hash for same content", async () => { + const cas = createThreadCas(casDir); + const h1 = await cas.put("hello world"); + const h2 = await cas.put("hello world"); + expect(h1).toBe(h2); + expect(h1).toHaveLength(13); + }); + + test("put returns hash matching hashString", async () => { + const cas = createThreadCas(casDir); + const content = "some content to store"; + const h = await cas.put(content); + expect(h).toBe(hashString(content)); + }); + + test("get returns stored content", async () => { + const cas = createThreadCas(casDir); + const content = "line1\nline2\nline3"; + const h = await cas.put(content); + const retrieved = await cas.get(h); + expect(retrieved).toBe(content); + }); + + test("get returns null for missing hash", async () => { + const cas = createThreadCas(casDir); + const result = await cas.get("0000000000000"); + expect(result).toBeNull(); + }); + + test("delete removes entry", async () => { + const cas = createThreadCas(casDir); + const h = await cas.put("to be deleted"); + await cas.delete(h); + const result = await cas.get(h); + expect(result).toBeNull(); + }); + + test("delete on missing hash does not throw", async () => { + const cas = createThreadCas(casDir); + await cas.delete("0000000000000"); + }); + + test("list returns all stored hashes", async () => { + const cas = createThreadCas(casDir); + const h1 = await cas.put("aaa"); + const h2 = await cas.put("bbb"); + const h3 = await cas.put("ccc"); + const hashes = await cas.list(); + expect(hashes.sort()).toEqual([h1, h2, h3].sort()); + }); + + test("list returns empty array when cas dir does not exist", async () => { + const cas = createThreadCas(join(casDir, "nonexistent")); + const hashes = await cas.list(); + expect(hashes).toEqual([]); + }); + + test("put is idempotent β€” same content written twice causes no error", async () => { + const cas = createThreadCas(casDir); + const h1 = await cas.put("idempotent"); + const h2 = await cas.put("idempotent"); + expect(h1).toBe(h2); + const content = await cas.get(h1); + expect(content).toBe("idempotent"); + }); + + test("different content produces different hashes", async () => { + const cas = createThreadCas(casDir); + const h1 = await cas.put("alpha"); + const h2 = await cas.put("beta"); + expect(h1).not.toBe(h2); + }); +}); diff --git a/packages/workflow/src/cas.ts b/packages/workflow/src/cas.ts new file mode 100644 index 0000000..8e422ea --- /dev/null +++ b/packages/workflow/src/cas.ts @@ -0,0 +1,70 @@ +import { mkdir, readdir, readFile, rename, unlink, writeFile } from "node:fs/promises"; +import { join } from "node:path"; + +import { hashString } from "./hash.js"; + +export type CasStore = { + put(content: string): Promise; + get(hash: string): Promise; + delete(hash: string): Promise; + list(): Promise; +}; + +export function createThreadCas(casDir: string): CasStore { + async function ensureDir(): Promise { + await mkdir(casDir, { recursive: true }); + } + + function filePath(hash: string): string { + return join(casDir, `${hash}.txt`); + } + + return { + async put(content: string): Promise { + const hash = hashString(content); + await ensureDir(); + const target = filePath(hash); + const tmp = `${target}.tmp.${Date.now()}`; + await writeFile(tmp, content, "utf8"); + await rename(tmp, target); + return hash; + }, + + async get(hash: string): Promise { + try { + return await readFile(filePath(hash), "utf8"); + } catch (e) { + const errObj = e as NodeJS.ErrnoException; + if (errObj.code === "ENOENT") { + return null; + } + throw e; + } + }, + + async delete(hash: string): Promise { + try { + await unlink(filePath(hash)); + } catch (e) { + const errObj = e as NodeJS.ErrnoException; + if (errObj.code === "ENOENT") { + return; + } + throw e; + } + }, + + async list(): Promise { + try { + const entries = await readdir(casDir); + return entries.filter((name) => name.endsWith(".txt")).map((name) => name.slice(0, -4)); + } catch (e) { + const errObj = e as NodeJS.ErrnoException; + if (errObj.code === "ENOENT") { + return []; + } + throw e; + } + }, + }; +} diff --git a/packages/workflow/src/hash.ts b/packages/workflow/src/hash.ts index 7d2c51c..3f85984 100644 --- a/packages/workflow/src/hash.ts +++ b/packages/workflow/src/hash.ts @@ -15,3 +15,10 @@ export function hashWorkflowBundleBytes(data: Uint8Array): string { const digest = XXH.h64(0).update(buf).digest(); return encodeUint64AsCrockford(digestToUint64(digest)); } + +/** XXH64 (seed 0) over a UTF-8 string, encoded as 13-char Crockford Base32. */ +export function hashString(content: string): string { + const buf = Buffer.from(content, "utf8"); + const digest = XXH.h64(0).update(buf).digest(); + return encodeUint64AsCrockford(digestToUint64(digest)); +} diff --git a/packages/workflow/src/index.ts b/packages/workflow/src/index.ts index 47d4564..131de9c 100644 --- a/packages/workflow/src/index.ts +++ b/packages/workflow/src/index.ts @@ -7,6 +7,7 @@ export { } from "./base32.js"; export { buildDescriptor } from "./build-descriptor.js"; export { validateWorkflowBundle, type WorkflowBundleValidationInput } from "./bundle-validator.js"; +export { type CasStore, createThreadCas } from "./cas.js"; export { createWorkflow } from "./create-workflow.js"; export { type ExecuteThreadIo, @@ -25,7 +26,7 @@ export { selectForkHistoricalSteps, } from "./fork-thread.js"; export { stringifyWorkflowDescriptor } from "./generate-descriptor.js"; -export { hashWorkflowBundleBytes } from "./hash.js"; +export { hashString, hashWorkflowBundleBytes } from "./hash.js"; export { type LlmError, llmErrorToCause, From 341ff656dc62d2c392a5a2e15d8984bb0c32b971 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Thu, 7 May 2026 04:54:25 +0000 Subject: [PATCH 4/4] feat(planner,coder,moderator): integrate CAS for phase tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2 of #23: - Planner schema compact: {hash, title} only, details stored via CAS CLI - Planner prompt instructs agent to shell out `cas put` for each phase - Coder prompt instructs agent to `cas get` for phase details, report hash - Moderator compares hashes instead of names - Removed COMPLETED_PHASE_SENTINELS β€” hash matching eliminates ambiguity Refs #23 --- packages/workflow-role-coder/src/coder.ts | 8 +- packages/workflow-role-planner/src/planner.ts | 16 ++- .../__tests__/solve-issue-template.test.ts | 105 +++++------------- .../src/moderator.ts | 19 ++-- 4 files changed, 49 insertions(+), 99 deletions(-) diff --git a/packages/workflow-role-coder/src/coder.ts b/packages/workflow-role-coder/src/coder.ts index 1a88a6c..c3c6891 100644 --- a/packages/workflow-role-coder/src/coder.ts +++ b/packages/workflow-role-coder/src/coder.ts @@ -10,13 +10,17 @@ export const coderMetaSchema = z.object({ export type CoderMeta = z.infer; const CODER_SYSTEM = `You are a **coder**. Read the thread for the plan and work on the NEXT incomplete phase only. -Each planner phase is identified by a hash (8-char Crockford Base32) and a title (one-line summary). Report which phase you completed using the planner's exact phase name. If you legitimately finish every remaining phase in this single turn, set completedPhase to the last phase name in the plan (the workflow treats that as full completion). List the files you changed and summarize what you did.`; +Each planner phase is identified by a content-hash and a title. To read a phase's full details (name, description, acceptance criteria), run: + + uncaged-workflow cas get + +Report which phase you completed using the phase **hash** (not the title). If you legitimately finish every remaining phase in this single turn, set completedPhase to the **last** phase hash in the plan (the workflow treats that as full completion). List the files you changed and summarize what you did.`; export const coderRole: RoleDefinition = { description: "Implements the next incomplete planner phase and reports structured completion metadata.", systemPrompt: CODER_SYSTEM, extractPrompt: - "Extract completedPhase: the planner phase name finished this round (exact string from the plan). If multiple phases were finished in one round, use the last finished phase name. Extract filesChanged and a summary of the work.", + "Extract completedPhase: the planner phase hash finished this round (exact hash string from the plan). If multiple phases were finished in one round, use the last finished phase hash. Extract filesChanged and a summary of the work.", schema: coderMetaSchema, }; diff --git a/packages/workflow-role-planner/src/planner.ts b/packages/workflow-role-planner/src/planner.ts index adb12c5..33abbc5 100644 --- a/packages/workflow-role-planner/src/planner.ts +++ b/packages/workflow-role-planner/src/planner.ts @@ -4,9 +4,6 @@ import * as z from "zod/v4"; export const phaseSchema = z.object({ hash: z.string(), title: z.string(), - name: z.string(), - description: z.string(), - acceptance: z.string(), }); export const plannerMetaSchema = z.object({ @@ -17,14 +14,21 @@ export type PlannerMeta = z.infer; const PLANNER_SYSTEM = `You are a **planner** for a software task. Break the work into **sequential phases** the coder will execute one at a time. -Each phase must have: a short **name** (stable identifier), a one-line **title** summarising the phase goal, a **hash** (8-char Crockford Base32 identifier unique within this plan), a **description** of what to do in that phase, and **acceptance** criteria for when that phase is done. +For each phase, decide on a name, detailed description, and acceptance criteria. Then store the full detail text in CAS so the coder can retrieve it later: -Order phases so earlier steps unblock later ones. Cover root cause, edge cases, and verification across the phases. Do not emit separate file lists or a free-form "approach" field β€” put that detail inside phase descriptions.`; + uncaged-workflow cas put "# \n\nDescription: \n\nAcceptance: " + +The command prints a content-hash to stdout. Use that hash as the phase identifier. + +Your final structured output must contain compact phases only: + { "phases": [{ "hash": "", "title": "" }] } + +The current thread ID is provided in the thread context. Order phases so earlier steps unblock later ones. Cover root cause, edge cases, and verification across the phases.`; export const plannerRole: RoleDefinition = { description: "Breaks the task into sequential phases for the coder.", systemPrompt: PLANNER_SYSTEM, extractPrompt: - "Extract the implementation phases from the agent's analysis. Each phase needs a hash (8-char Crockford Base32 unique within this plan), a title (one-line summary), a name, a description, and acceptance criteria.", + "Extract the implementation phases from the agent's output. Each phase has a hash (the CAS content-hash returned by the cas put command) and a title (one-line summary).", schema: plannerMetaSchema, }; diff --git a/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts b/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts index 2dd12f9..0c20ca6 100644 --- a/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts +++ b/packages/workflow-template-solve-issue/__tests__/solve-issue-template.test.ts @@ -19,9 +19,6 @@ const DEFAULT_PHASES: PlannerMeta["phases"] = [ { hash: "4KNMR2PX", title: "Do the work", - name: "phase-a", - description: "Do the work", - acceptance: "Done", }, ]; @@ -30,15 +27,12 @@ const EXPECT_PLANNER_META: PlannerMeta = { { hash: "7BQST3VW", title: "placeholder phase", - name: "phase-1", - description: "placeholder", - acceptance: "placeholder", }, ], }; const EXPECT_CODER_META: CoderMeta = { - completedPhase: "phase-1", + completedPhase: "7BQST3VW", filesChanged: [], summary: "", }; @@ -123,7 +117,7 @@ function plannerStep(phases: PlannerMeta["phases"] = DEFAULT_PHASES): RoleStep { +function coderStep(completedPhase = "4KNMR2PX"): RoleStep { return { role: "coder", content: "code", @@ -196,101 +190,54 @@ describe("solveIssueModerator", () => { { hash: "AA000001", title: "first phase", - name: "p1", - description: "first", - acceptance: "a1", }, { hash: "AA000002", title: "second phase", - name: "p2", - description: "second", - acceptance: "a2", }, ]; expect(solveIssueModerator(makeCtx(20, [plannerStep(phases)]))).toBe("coder"); - expect(solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("p1")]))).toBe("coder"); + expect(solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("AA000001")]))).toBe( + "coder", + ); expect( - solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("p1"), coderStep("p2")])), + solveIssueModerator( + makeCtx(20, [plannerStep(phases), coderStep("AA000001"), coderStep("AA000002")]), + ), ).toBe("reviewer"); }); - test("one-shot coder reports only last phase name β†’ reviewer (moderator treats as all phases done)", () => { + test("one-shot coder reports only last phase hash β†’ reviewer (moderator treats as all phases done)", () => { const phases: PlannerMeta["phases"] = [ - { - hash: "BB000001", - title: "setup branch", - name: "setup-branch", - description: "branch", - acceptance: "branch exists", - }, - { - hash: "BB000002", - title: "write tests", - name: "write-tests", - description: "tests", - acceptance: "tests pass", - }, - { - hash: "BB000003", - title: "verify", - name: "verify", - description: "verify", - acceptance: "ok", - }, - { - hash: "BB000004", - title: "commit and pr", - name: "commit-and-pr", - description: "pr", - acceptance: "pr open", - }, + { hash: "BB000001", title: "setup branch" }, + { hash: "BB000002", title: "write tests" }, + { hash: "BB000003", title: "verify" }, + { hash: "BB000004", title: "commit and pr" }, ]; - expect( - solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("commit-and-pr")])), - ).toBe("reviewer"); + expect(solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("BB000004")]))).toBe( + "reviewer", + ); }); - test("completedPhase sentinel when not a planned name β†’ reviewer", () => { + test("unrecognised completedPhase hash β†’ coder retry when budget allows", () => { const phases: PlannerMeta["phases"] = [ - { - hash: "CC000001", - title: "first phase", - name: "p1", - description: "first", - acceptance: "a1", - }, - { - hash: "CC000002", - title: "second phase", - name: "p2", - description: "second", - acceptance: "a2", - }, + { hash: "CC000001", title: "first phase" }, + { hash: "CC000002", title: "second phase" }, ]; expect(solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("all-done")]))).toBe( - "reviewer", + "coder", ); }); test("incomplete phases β†’ END when max rounds exhausted", () => { const phases: PlannerMeta["phases"] = [ - { - hash: "DD000001", - title: "first phase", - name: "p1", - description: "first", - acceptance: "a1", - }, - { - hash: "DD000002", - title: "second phase", - name: "p2", - description: "second", - acceptance: "a2", - }, + { hash: "DD000001", title: "first phase" }, + { hash: "DD000002", title: "second phase" }, + ]; + const steps: ModeratorContext["steps"] = [ + plannerStep(phases), + coderStep("DD000001"), ]; - const steps: ModeratorContext["steps"] = [plannerStep(phases), coderStep("p1")]; expect(solveIssueModerator(makeCtx(3, steps))).toBe(END); }); }); diff --git a/packages/workflow-template-solve-issue/src/moderator.ts b/packages/workflow-template-solve-issue/src/moderator.ts index 3c73f02..39a6d05 100644 --- a/packages/workflow-template-solve-issue/src/moderator.ts +++ b/packages/workflow-template-solve-issue/src/moderator.ts @@ -3,28 +3,23 @@ import { END } from "@uncaged/workflow"; import type { SolveIssueMeta } from "./roles.js"; -const COMPLETED_PHASE_SENTINELS = new Set(["all-done", "all_done", "complete"]); - function coderFinishedAllPlannedPhases( - phases: ReadonlyArray<{ name: string }>, + phases: ReadonlyArray<{ hash: string }>, coderCompletedPhases: ReadonlyArray, ): boolean { if (phases.length === 0) { return true; } - const plannedNames = new Set(phases.map((p) => p.name)); - const lastName = phases[phases.length - 1].name; - const explicit = new Set(coderCompletedPhases.filter((name) => plannedNames.has(name))); - if (phases.every((p) => explicit.has(p.name))) { + const plannedHashes = new Set(phases.map((p) => p.hash)); + const lastHash = phases[phases.length - 1].hash; + const explicit = new Set(coderCompletedPhases.filter((h) => plannedHashes.has(h))); + if (phases.every((p) => explicit.has(p.hash))) { return true; } - // One-shot runs often report only the final phase; treat that as the full plan done. - if (coderCompletedPhases.some((name) => name === lastName)) { + if (coderCompletedPhases.some((h) => h === lastHash)) { return true; } - return coderCompletedPhases.some( - (name) => !plannedNames.has(name) && COMPLETED_PHASE_SENTINELS.has(name), - ); + return false; } function nextAfterCoder(