feat: thread-scoped CAS for phase tracking (#23)

This commit is contained in:
2026-05-07 04:59:03 +00:00
11 changed files with 431 additions and 25 deletions
+95
View File
@@ -1,5 +1,6 @@
import { printCliError, printCliLine, printCliWarn } from "./cli-output.js"; import { printCliError, printCliLine, printCliWarn } from "./cli-output.js";
import { cmdAdd, formatAddSuccess, parseAddArgv } from "./cmd-add.js"; import { cmdAdd, formatAddSuccess, parseAddArgv } from "./cmd-add.js";
import { cmdCasGet, cmdCasList, cmdCasPut, cmdCasRm } from "./cmd-cas.js";
import { cmdFork, parseForkArgv } from "./cmd-fork.js"; import { cmdFork, parseForkArgv } from "./cmd-fork.js";
import { cmdHistory } from "./cmd-history.js"; import { cmdHistory } from "./cmd-history.js";
import { cmdKill } from "./cmd-kill.js"; import { cmdKill } from "./cmd-kill.js";
@@ -33,6 +34,10 @@ function usage(): string {
" uncaged-workflow thread <id>", " uncaged-workflow thread <id>",
" uncaged-workflow thread rm <id>", " uncaged-workflow thread rm <id>",
" uncaged-workflow fork <thread-id> [--from-role <role>]", " uncaged-workflow fork <thread-id> [--from-role <role>]",
" uncaged-workflow cas get <thread-id> <hash>",
" uncaged-workflow cas put <thread-id> <content>",
" uncaged-workflow cas list <thread-id>",
" uncaged-workflow cas rm <thread-id> <hash>",
].join("\n"); ].join("\n");
} }
@@ -276,6 +281,95 @@ async function dispatchFork(storageRoot: string, argv: string[]): Promise<number
return 0; return 0;
} }
async function dispatchCasGet(storageRoot: string, rest: string[]): Promise<number> {
const threadId = rest[0];
const hash = rest[1];
if (threadId === undefined || hash === undefined || rest.length > 2) {
printCliError(`${usage()}\n\nerror: cas get requires <thread-id> <hash>`);
return 1;
}
const result = await cmdCasGet(storageRoot, threadId, hash);
if (!result.ok) {
printCliError(result.error);
return 1;
}
printCliLine(result.value);
return 0;
}
async function dispatchCasPut(storageRoot: string, rest: string[]): Promise<number> {
const threadId = rest[0];
const content = rest[1];
if (threadId === undefined || content === undefined || rest.length > 2) {
printCliError(`${usage()}\n\nerror: cas put requires <thread-id> <content>`);
return 1;
}
const result = await cmdCasPut(storageRoot, threadId, content);
if (!result.ok) {
printCliError(result.error);
return 1;
}
printCliLine(result.value);
return 0;
}
async function dispatchCasList(storageRoot: string, rest: string[]): Promise<number> {
const threadId = rest[0];
if (threadId === undefined || rest.length > 1) {
printCliError(`${usage()}\n\nerror: cas list requires <thread-id>`);
return 1;
}
const result = await cmdCasList(storageRoot, threadId);
if (!result.ok) {
printCliError(result.error);
return 1;
}
for (const hash of result.value) {
printCliLine(hash);
}
return 0;
}
async function dispatchCasRm(storageRoot: string, rest: string[]): Promise<number> {
const threadId = rest[0];
const hash = rest[1];
if (threadId === undefined || hash === undefined || rest.length > 2) {
printCliError(`${usage()}\n\nerror: cas rm requires <thread-id> <hash>`);
return 1;
}
const result = await cmdCasRm(storageRoot, threadId, hash);
if (!result.ok) {
printCliError(result.error);
return 1;
}
printCliLine(`removed cas entry ${hash}`);
return 0;
}
const CAS_SUBCOMMAND_TABLE: Record<
string,
(storageRoot: string, rest: string[]) => Promise<number>
> = {
get: dispatchCasGet,
put: dispatchCasPut,
list: dispatchCasList,
rm: dispatchCasRm,
};
async function dispatchCas(storageRoot: string, argv: string[]): Promise<number> {
const sub = argv[0];
if (sub === undefined) {
printCliError(`${usage()}\n\nerror: unknown cas subcommand: (none)`);
return 1;
}
const handler = CAS_SUBCOMMAND_TABLE[sub];
if (handler === undefined) {
printCliError(`${usage()}\n\nerror: unknown cas subcommand: ${sub}`);
return 1;
}
return handler(storageRoot, argv.slice(1));
}
type DispatchFn = (storageRoot: string, argv: string[]) => Promise<number>; type DispatchFn = (storageRoot: string, argv: string[]) => Promise<number>;
const COMMAND_TABLE: Record<string, DispatchFn> = { const COMMAND_TABLE: Record<string, DispatchFn> = {
@@ -293,6 +387,7 @@ const COMMAND_TABLE: Record<string, DispatchFn> = {
threads: dispatchThreads, threads: dispatchThreads,
thread: dispatchThreadBranch, thread: dispatchThreadBranch,
fork: dispatchFork, fork: dispatchFork,
cas: dispatchCas,
}; };
export async function runCli(storageRoot: string, argv: string[]): Promise<number> { export async function runCli(storageRoot: string, argv: string[]): Promise<number> {
+67
View File
@@ -0,0 +1,67 @@
import { dirname, join } from "node:path";
import { createThreadCas, err, ok, type Result } from "@uncaged/workflow";
import { resolveThreadDataPath } from "./thread-scan.js";
function resolveCasDir(threadDataPath: string, threadId: string): string {
return join(dirname(threadDataPath), `${threadId}.cas`);
}
export async function cmdCasGet(
storageRoot: string,
threadId: string,
hash: string,
): Promise<Result<string, string>> {
const dataPath = await resolveThreadDataPath(storageRoot, threadId);
if (dataPath === null) {
return err(`thread not found: ${threadId}`);
}
const cas = createThreadCas(resolveCasDir(dataPath, threadId));
const content = await cas.get(hash);
if (content === null) {
return err(`cas entry not found: ${hash}`);
}
return ok(content);
}
export async function cmdCasPut(
storageRoot: string,
threadId: string,
content: string,
): Promise<Result<string, string>> {
const dataPath = await resolveThreadDataPath(storageRoot, threadId);
if (dataPath === null) {
return err(`thread not found: ${threadId}`);
}
const cas = createThreadCas(resolveCasDir(dataPath, threadId));
const hash = await cas.put(content);
return ok(hash);
}
export async function cmdCasList(
storageRoot: string,
threadId: string,
): Promise<Result<string[], string>> {
const dataPath = await resolveThreadDataPath(storageRoot, threadId);
if (dataPath === null) {
return err(`thread not found: ${threadId}`);
}
const cas = createThreadCas(resolveCasDir(dataPath, threadId));
const hashes = await cas.list();
return ok(hashes);
}
export async function cmdCasRm(
storageRoot: string,
threadId: string,
hash: string,
): Promise<Result<void, string>> {
const dataPath = await resolveThreadDataPath(storageRoot, threadId);
if (dataPath === null) {
return err(`thread not found: ${threadId}`);
}
const cas = createThreadCas(resolveCasDir(dataPath, threadId));
await cas.delete(hash);
return ok(undefined);
}
+3 -1
View File
@@ -1,4 +1,4 @@
import { unlink } from "node:fs/promises"; import { rm, unlink } from "node:fs/promises";
import { dirname, join } from "node:path"; import { dirname, join } from "node:path";
import { err, ok, type Result } from "@uncaged/workflow"; import { err, ok, type Result } from "@uncaged/workflow";
@@ -33,10 +33,12 @@ export async function cmdThreadRemove(
const dir = dirname(dataPath); const dir = dirname(dataPath);
const infoPath = join(dir, `${threadId}.info.jsonl`); const infoPath = join(dir, `${threadId}.info.jsonl`);
const runningPath = join(dir, `${threadId}.running`); const runningPath = join(dir, `${threadId}.running`);
const casPath = join(dir, `${threadId}.cas`);
await unlink(dataPath); await unlink(dataPath);
await unlink(infoPath).catch(() => {}); await unlink(infoPath).catch(() => {});
await unlink(runningPath).catch(() => {}); await unlink(runningPath).catch(() => {});
await rm(casPath, { recursive: true, force: true });
return ok(undefined); return ok(undefined);
} }
+6 -2
View File
@@ -10,13 +10,17 @@ export const coderMetaSchema = z.object({
export type CoderMeta = z.infer<typeof coderMetaSchema>; export type CoderMeta = z.infer<typeof coderMetaSchema>;
const CODER_SYSTEM = `You are a **coder**. Read the thread for the plan and work on the NEXT incomplete phase only. const CODER_SYSTEM = `You are a **coder**. Read the thread for the plan and work on the NEXT incomplete phase only.
Report which phase you completed. List the files you changed and summarize what you did.`; Each planner phase is identified by a content-hash and a title. To read a phase's full details (name, description, acceptance criteria), run:
uncaged-workflow cas get <thread-id> <hash>
Report which phase you completed using the phase **hash** (not the title). If you legitimately finish every remaining phase in this single turn, set completedPhase to the **last** phase hash in the plan (the workflow treats that as full completion). List the files you changed and summarize what you did.`;
export const coderRole: RoleDefinition<CoderMeta> = { export const coderRole: RoleDefinition<CoderMeta> = {
description: description:
"Implements the next incomplete planner phase and reports structured completion metadata.", "Implements the next incomplete planner phase and reports structured completion metadata.",
systemPrompt: CODER_SYSTEM, systemPrompt: CODER_SYSTEM,
extractPrompt: extractPrompt:
"Extract which phase was completed, which files were changed, and a summary of the work done.", "Extract completedPhase: the planner phase hash finished this round (exact hash string from the plan). If multiple phases were finished in one round, use the last finished phase hash. Extract filesChanged and a summary of the work.",
schema: coderMetaSchema, schema: coderMetaSchema,
}; };
+12 -6
View File
@@ -2,9 +2,8 @@ import type { RoleDefinition } from "@uncaged/workflow";
import * as z from "zod/v4"; import * as z from "zod/v4";
export const phaseSchema = z.object({ export const phaseSchema = z.object({
name: z.string(), hash: z.string(),
description: z.string(), title: z.string(),
acceptance: z.string(),
}); });
export const plannerMetaSchema = z.object({ export const plannerMetaSchema = z.object({
@@ -15,14 +14,21 @@ export type PlannerMeta = z.infer<typeof plannerMetaSchema>;
const PLANNER_SYSTEM = `You are a **planner** for a software task. Break the work into **sequential phases** the coder will execute one at a time. const PLANNER_SYSTEM = `You are a **planner** for a software task. Break the work into **sequential phases** the coder will execute one at a time.
Each phase must have: a short **name** (stable identifier), a **description** of what to do in that phase, and **acceptance** criteria for when that phase is done. For each phase, decide on a name, detailed description, and acceptance criteria. Then store the full detail text in CAS so the coder can retrieve it later:
Order phases so earlier steps unblock later ones. Cover root cause, edge cases, and verification across the phases. Do not emit separate file lists or a free-form "approach" field — put that detail inside phase descriptions.`; uncaged-workflow cas put <thread-id> "# <name>\n\nDescription: <description>\n\nAcceptance: <acceptance>"
The command prints a content-hash to stdout. Use that hash as the phase identifier.
Your final structured output must contain compact phases only:
{ "phases": [{ "hash": "<hash-from-cas-put>", "title": "<one-line-summary>" }] }
The current thread ID is provided in the thread context. Order phases so earlier steps unblock later ones. Cover root cause, edge cases, and verification across the phases.`;
export const plannerRole: RoleDefinition<PlannerMeta> = { export const plannerRole: RoleDefinition<PlannerMeta> = {
description: "Breaks the task into sequential phases for the coder.", description: "Breaks the task into sequential phases for the coder.",
systemPrompt: PLANNER_SYSTEM, systemPrompt: PLANNER_SYSTEM,
extractPrompt: extractPrompt:
"Extract the implementation phases from the agent's analysis. Each phase needs a name, description, and acceptance criteria.", "Extract the implementation phases from the agent's output. Each phase has a hash (the CAS content-hash returned by the cas put command) and a title (one-line summary).",
schema: plannerMetaSchema, schema: plannerMetaSchema,
}; };
@@ -16,15 +16,23 @@ import { createSolveIssueRun, solveIssueModerator } from "../src/index.js";
import type { SolveIssueMeta } from "../src/roles.js"; import type { SolveIssueMeta } from "../src/roles.js";
const DEFAULT_PHASES: PlannerMeta["phases"] = [ const DEFAULT_PHASES: PlannerMeta["phases"] = [
{ name: "phase-a", description: "Do the work", acceptance: "Done" }, {
hash: "4KNMR2PX",
title: "Do the work",
},
]; ];
const EXPECT_PLANNER_META: PlannerMeta = { const EXPECT_PLANNER_META: PlannerMeta = {
phases: [{ name: "phase-1", description: "placeholder", acceptance: "placeholder" }], phases: [
{
hash: "7BQST3VW",
title: "placeholder phase",
},
],
}; };
const EXPECT_CODER_META: CoderMeta = { const EXPECT_CODER_META: CoderMeta = {
completedPhase: "phase-1", completedPhase: "7BQST3VW",
filesChanged: [], filesChanged: [],
summary: "", summary: "",
}; };
@@ -109,7 +117,7 @@ function plannerStep(phases: PlannerMeta["phases"] = DEFAULT_PHASES): RoleStep<S
}; };
} }
function coderStep(completedPhase = "phase-a"): RoleStep<SolveIssueMeta> { function coderStep(completedPhase = "4KNMR2PX"): RoleStep<SolveIssueMeta> {
return { return {
role: "coder", role: "coder",
content: "code", content: "code",
@@ -179,22 +187,57 @@ describe("solveIssueModerator", () => {
test("multiple planner phases → coder until all complete, then reviewer", () => { test("multiple planner phases → coder until all complete, then reviewer", () => {
const phases: PlannerMeta["phases"] = [ const phases: PlannerMeta["phases"] = [
{ name: "p1", description: "first", acceptance: "a1" }, {
{ name: "p2", description: "second", acceptance: "a2" }, hash: "AA000001",
title: "first phase",
},
{
hash: "AA000002",
title: "second phase",
},
]; ];
expect(solveIssueModerator(makeCtx(20, [plannerStep(phases)]))).toBe("coder"); expect(solveIssueModerator(makeCtx(20, [plannerStep(phases)]))).toBe("coder");
expect(solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("p1")]))).toBe("coder"); expect(solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("AA000001")]))).toBe(
"coder",
);
expect( expect(
solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("p1"), coderStep("p2")])), solveIssueModerator(
makeCtx(20, [plannerStep(phases), coderStep("AA000001"), coderStep("AA000002")]),
),
).toBe("reviewer"); ).toBe("reviewer");
}); });
test("one-shot coder reports only last phase hash → reviewer (moderator treats as all phases done)", () => {
const phases: PlannerMeta["phases"] = [
{ hash: "BB000001", title: "setup branch" },
{ hash: "BB000002", title: "write tests" },
{ hash: "BB000003", title: "verify" },
{ hash: "BB000004", title: "commit and pr" },
];
expect(solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("BB000004")]))).toBe(
"reviewer",
);
});
test("unrecognised completedPhase hash → coder retry when budget allows", () => {
const phases: PlannerMeta["phases"] = [
{ hash: "CC000001", title: "first phase" },
{ hash: "CC000002", title: "second phase" },
];
expect(solveIssueModerator(makeCtx(20, [plannerStep(phases), coderStep("all-done")]))).toBe(
"coder",
);
});
test("incomplete phases → END when max rounds exhausted", () => { test("incomplete phases → END when max rounds exhausted", () => {
const phases: PlannerMeta["phases"] = [ const phases: PlannerMeta["phases"] = [
{ name: "p1", description: "first", acceptance: "a1" }, { hash: "DD000001", title: "first phase" },
{ name: "p2", description: "second", acceptance: "a2" }, { hash: "DD000002", title: "second phase" },
];
const steps: ModeratorContext<SolveIssueMeta>["steps"] = [
plannerStep(phases),
coderStep("DD000001"),
]; ];
const steps: ModeratorContext<SolveIssueMeta>["steps"] = [plannerStep(phases), coderStep("p1")];
expect(solveIssueModerator(makeCtx(3, steps))).toBe(END); expect(solveIssueModerator(makeCtx(3, steps))).toBe(END);
}); });
}); });
@@ -3,6 +3,25 @@ import { END } from "@uncaged/workflow";
import type { SolveIssueMeta } from "./roles.js"; import type { SolveIssueMeta } from "./roles.js";
function coderFinishedAllPlannedPhases(
phases: ReadonlyArray<{ hash: string }>,
coderCompletedPhases: ReadonlyArray<string>,
): boolean {
if (phases.length === 0) {
return true;
}
const plannedHashes = new Set(phases.map((p) => p.hash));
const lastHash = phases[phases.length - 1].hash;
const explicit = new Set(coderCompletedPhases.filter((h) => plannedHashes.has(h)));
if (phases.every((p) => explicit.has(p.hash))) {
return true;
}
if (coderCompletedPhases.some((h) => h === lastHash)) {
return true;
}
return false;
}
function nextAfterCoder( function nextAfterCoder(
ctx: ModeratorContext<SolveIssueMeta>, ctx: ModeratorContext<SolveIssueMeta>,
maxRounds: number, maxRounds: number,
@@ -12,10 +31,10 @@ function nextAfterCoder(
return "reviewer"; return "reviewer";
} }
const phases = plannerStep.meta.phases; const phases = plannerStep.meta.phases;
const completedPhases = new Set( const coderCompletedPhases = ctx.steps
ctx.steps.filter((s) => s.role === "coder").map((s) => s.meta.completedPhase), .filter((s) => s.role === "coder")
); .map((s) => s.meta.completedPhase);
const allDone = phases.every((p) => completedPhases.has(p.name)); const allDone = coderFinishedAllPlannedPhases(phases, coderCompletedPhases);
if (allDone) { if (allDone) {
return "reviewer"; return "reviewer";
} }
+92
View File
@@ -0,0 +1,92 @@
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
import { mkdtemp, rm } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { createThreadCas } from "../src/cas.js";
import { hashString } from "../src/hash.js";
describe("createThreadCas", () => {
let casDir: string;
beforeEach(async () => {
casDir = await mkdtemp(join(tmpdir(), "cas-test-"));
});
afterEach(async () => {
await rm(casDir, { recursive: true, force: true });
});
test("put returns consistent hash for same content", async () => {
const cas = createThreadCas(casDir);
const h1 = await cas.put("hello world");
const h2 = await cas.put("hello world");
expect(h1).toBe(h2);
expect(h1).toHaveLength(13);
});
test("put returns hash matching hashString", async () => {
const cas = createThreadCas(casDir);
const content = "some content to store";
const h = await cas.put(content);
expect(h).toBe(hashString(content));
});
test("get returns stored content", async () => {
const cas = createThreadCas(casDir);
const content = "line1\nline2\nline3";
const h = await cas.put(content);
const retrieved = await cas.get(h);
expect(retrieved).toBe(content);
});
test("get returns null for missing hash", async () => {
const cas = createThreadCas(casDir);
const result = await cas.get("0000000000000");
expect(result).toBeNull();
});
test("delete removes entry", async () => {
const cas = createThreadCas(casDir);
const h = await cas.put("to be deleted");
await cas.delete(h);
const result = await cas.get(h);
expect(result).toBeNull();
});
test("delete on missing hash does not throw", async () => {
const cas = createThreadCas(casDir);
await cas.delete("0000000000000");
});
test("list returns all stored hashes", async () => {
const cas = createThreadCas(casDir);
const h1 = await cas.put("aaa");
const h2 = await cas.put("bbb");
const h3 = await cas.put("ccc");
const hashes = await cas.list();
expect(hashes.sort()).toEqual([h1, h2, h3].sort());
});
test("list returns empty array when cas dir does not exist", async () => {
const cas = createThreadCas(join(casDir, "nonexistent"));
const hashes = await cas.list();
expect(hashes).toEqual([]);
});
test("put is idempotent — same content written twice causes no error", async () => {
const cas = createThreadCas(casDir);
const h1 = await cas.put("idempotent");
const h2 = await cas.put("idempotent");
expect(h1).toBe(h2);
const content = await cas.get(h1);
expect(content).toBe("idempotent");
});
test("different content produces different hashes", async () => {
const cas = createThreadCas(casDir);
const h1 = await cas.put("alpha");
const h2 = await cas.put("beta");
expect(h1).not.toBe(h2);
});
});
+70
View File
@@ -0,0 +1,70 @@
import { mkdir, readdir, readFile, rename, unlink, writeFile } from "node:fs/promises";
import { join } from "node:path";
import { hashString } from "./hash.js";
export type CasStore = {
put(content: string): Promise<string>;
get(hash: string): Promise<string | null>;
delete(hash: string): Promise<void>;
list(): Promise<string[]>;
};
export function createThreadCas(casDir: string): CasStore {
async function ensureDir(): Promise<void> {
await mkdir(casDir, { recursive: true });
}
function filePath(hash: string): string {
return join(casDir, `${hash}.txt`);
}
return {
async put(content: string): Promise<string> {
const hash = hashString(content);
await ensureDir();
const target = filePath(hash);
const tmp = `${target}.tmp.${Date.now()}`;
await writeFile(tmp, content, "utf8");
await rename(tmp, target);
return hash;
},
async get(hash: string): Promise<string | null> {
try {
return await readFile(filePath(hash), "utf8");
} catch (e) {
const errObj = e as NodeJS.ErrnoException;
if (errObj.code === "ENOENT") {
return null;
}
throw e;
}
},
async delete(hash: string): Promise<void> {
try {
await unlink(filePath(hash));
} catch (e) {
const errObj = e as NodeJS.ErrnoException;
if (errObj.code === "ENOENT") {
return;
}
throw e;
}
},
async list(): Promise<string[]> {
try {
const entries = await readdir(casDir);
return entries.filter((name) => name.endsWith(".txt")).map((name) => name.slice(0, -4));
} catch (e) {
const errObj = e as NodeJS.ErrnoException;
if (errObj.code === "ENOENT") {
return [];
}
throw e;
}
},
};
}
+7
View File
@@ -15,3 +15,10 @@ export function hashWorkflowBundleBytes(data: Uint8Array): string {
const digest = XXH.h64(0).update(buf).digest(); const digest = XXH.h64(0).update(buf).digest();
return encodeUint64AsCrockford(digestToUint64(digest)); return encodeUint64AsCrockford(digestToUint64(digest));
} }
/** XXH64 (seed 0) over a UTF-8 string, encoded as 13-char Crockford Base32. */
export function hashString(content: string): string {
const buf = Buffer.from(content, "utf8");
const digest = XXH.h64(0).update(buf).digest();
return encodeUint64AsCrockford(digestToUint64(digest));
}
+2 -1
View File
@@ -7,6 +7,7 @@ export {
} from "./base32.js"; } from "./base32.js";
export { buildDescriptor } from "./build-descriptor.js"; export { buildDescriptor } from "./build-descriptor.js";
export { validateWorkflowBundle, type WorkflowBundleValidationInput } from "./bundle-validator.js"; export { validateWorkflowBundle, type WorkflowBundleValidationInput } from "./bundle-validator.js";
export { type CasStore, createThreadCas } from "./cas.js";
export { createWorkflow } from "./create-workflow.js"; export { createWorkflow } from "./create-workflow.js";
export { export {
type ExecuteThreadIo, type ExecuteThreadIo,
@@ -25,7 +26,7 @@ export {
selectForkHistoricalSteps, selectForkHistoricalSteps,
} from "./fork-thread.js"; } from "./fork-thread.js";
export { stringifyWorkflowDescriptor } from "./generate-descriptor.js"; export { stringifyWorkflowDescriptor } from "./generate-descriptor.js";
export { hashWorkflowBundleBytes } from "./hash.js"; export { hashString, hashWorkflowBundleBytes } from "./hash.js";
export { export {
type LlmError, type LlmError,
llmErrorToCause, llmErrorToCause,