Merge pull request 'chore: add output rules to develop roles — suppress verbose diffs' (#244) from chore/slim-role-output into main

This commit is contained in:
2026-05-13 15:01:02 +00:00
12 changed files with 118 additions and 41 deletions
+6
View File
@@ -0,0 +1,6 @@
#!/usr/bin/env bash
# pre-push hook: typecheck + biome + lint-log-tags
set -euo pipefail
echo "🔍 pre-push: running checks..."
bun run check
echo "✅ pre-push: all checks passed"
+1 -1
View File
@@ -6,7 +6,7 @@
],
"scripts": {
"build": "bunx tsc --build",
"check": "bunx tsc --build && biome check .",
"check": "bunx tsc --build && biome check . && bash scripts/lint-log-tags.sh",
"typecheck": "bunx tsc --build",
"format": "biome format --write .",
"test": "bun run --filter '*' test",
@@ -100,7 +100,7 @@ export function startGatewayWsClient(params: GatewayWsClientParams): () => void
clearReconnectTimer();
const delayMs = Math.min(INITIAL_BACKOFF_MS * 2 ** attempt, MAX_BACKOFF_MS);
attempt++;
params.log("6CJX2RLP", `gateway WebSocket reconnect in ${delayMs}ms (attempt ${attempt})`);
params.log("6CJX2R8P", `gateway WebSocket reconnect in ${delayMs}ms (attempt ${attempt})`);
reconnectTimer = setTimeout(connect, delayMs);
};
@@ -143,7 +143,7 @@ export function startGatewayWsClient(params: GatewayWsClientParams): () => void
ws.addEventListener("message", (ev) => {
const data = ev.data;
if (typeof data !== "string") {
params.log("T9W2KL5H", "gateway WebSocket non-text frame ignored");
params.log("T9W2K35H", "gateway WebSocket non-text frame ignored");
return;
}
void handleGatewayMessage(ws, data, params).catch((e: unknown) => {
@@ -1,9 +1,4 @@
import {
BaseEdge,
EdgeLabelRenderer,
type EdgeProps,
getSmoothStepPath,
} from "@xyflow/react";
import { BaseEdge, EdgeLabelRenderer, type EdgeProps, getSmoothStepPath } from "@xyflow/react";
import type { ConditionEdgeData } from "./types.ts";
// Must match the FEEDBACK_OFFSET_X in use-layout.ts
@@ -15,12 +10,7 @@ const FEEDBACK_RADIUS = 16;
* Build an SVG path for a feedback (back) edge that routes to the right of the nodes.
* The path goes: source right → arc → vertical up → arc → target right
*/
function feedbackPath(
sourceX: number,
sourceY: number,
targetX: number,
targetY: number,
): string {
function feedbackPath(sourceX: number, sourceY: number, targetX: number, targetY: number): string {
const rightX = Math.max(sourceX, targetX) + FEEDBACK_OFFSET_X;
const r = FEEDBACK_RADIUS;
@@ -42,6 +32,7 @@ function feedbackPath(
return segments.join(" ");
}
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: edge routing logic is inherently branchy
export function ConditionEdge(props: EdgeProps) {
const {
id,
@@ -1,7 +1,7 @@
import type { Edge, Node } from "@xyflow/react";
import { useMemo } from "react";
import type { WorkflowGraphEdge } from "../../api.ts";
import type { ConditionEdgeData, NodeState, RoleNodeData, TerminalNodeData } from "./types.ts";
import type { NodeState, RoleNodeData, TerminalNodeData } from "./types.ts";
const START_ID = "__start__";
const END_ID = "__end__";
@@ -41,6 +41,7 @@ function edgeKey(e: WorkflowGraphEdge): string {
* Forward edges go from lower rank to higher rank; feedback edges go backwards.
* Self-loops are neither forward nor feedback — they're handled separately.
*/
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: topological sort is inherently branchy
function extractSpine(edges: readonly WorkflowGraphEdge[]): string[] {
// Collect all node IDs
const ids = new Set<string>();
@@ -213,8 +214,8 @@ function computeLayout(input: LayoutInput): LayoutResult {
isFallback,
isFeedback,
isSelfLoop,
labelX,
labelY,
labelX,
labelY,
},
};
});
@@ -223,8 +224,5 @@ function computeLayout(input: LayoutInput): LayoutResult {
}
export function useLayout(input: LayoutInput): LayoutResult {
return useMemo(
() => computeLayout(input),
[input.edges, input.roles, input.nodeStates],
);
return useMemo(() => computeLayout(input), [input]);
}
@@ -9,7 +9,9 @@ import type { DevelopMeta } from "../src/roles.js";
const developModerator = tableToModerator(developTable);
const DEFAULT_PHASES: PlannerMeta["phases"] = [
type PlannedMeta = Extract<PlannerMeta, { status: "planned" }>;
const DEFAULT_PHASES: PlannedMeta["phases"] = [
{
hash: "4KNMR2PX",
title: "Do the work",
@@ -36,11 +38,11 @@ function makeCtx(steps: ModeratorContext<DevelopMeta>["steps"]): ModeratorContex
};
}
function plannerStep(phases: PlannerMeta["phases"] = DEFAULT_PHASES): RoleStep<DevelopMeta> {
function plannerStep(phases: PlannedMeta["phases"] = DEFAULT_PHASES): RoleStep<DevelopMeta> {
return {
role: "planner",
contentHash: "STUBHASHPLANNER001",
meta: { phases },
meta: { status: "planned" as const, phases },
refs: phases.map((p) => p.hash),
timestamp: 1,
};
@@ -153,7 +155,7 @@ describe("developModerator", () => {
});
test("multiple planner phases → coder until all complete, then reviewer", () => {
const phases: PlannerMeta["phases"] = [
const phases: PlannedMeta["phases"] = [
{ hash: "AA000001", title: "first phase" },
{ hash: "AA000002", title: "second phase" },
];
@@ -167,7 +169,7 @@ describe("developModerator", () => {
});
test("one-shot coder reports only last phase hash → reviewer (moderator treats as all phases done)", () => {
const phases: PlannerMeta["phases"] = [
const phases: PlannedMeta["phases"] = [
{ hash: "BB000001", title: "setup branch" },
{ hash: "BB000002", title: "write tests" },
{ hash: "BB000003", title: "verify" },
@@ -179,7 +181,7 @@ describe("developModerator", () => {
});
test("unrecognised completedPhase hash → coder retry when budget allows", () => {
const phases: PlannerMeta["phases"] = [
const phases: PlannedMeta["phases"] = [
{ hash: "CC000001", title: "first phase" },
{ hash: "CC000002", title: "second phase" },
];
@@ -187,7 +189,7 @@ describe("developModerator", () => {
});
test("incomplete phases → coder retry (supervisor controls termination)", () => {
const phases: PlannerMeta["phases"] = [
const phases: PlannedMeta["phases"] = [
{ hash: "DD000001", title: "first phase" },
{ hash: "DD000002", title: "second phase" },
];
@@ -198,6 +200,17 @@ describe("developModerator", () => {
expect(developModerator(makeCtx(steps))).toBe("coder");
});
test("planner aborted → END", () => {
const abortedStep: RoleStep<DevelopMeta> = {
role: "planner",
contentHash: "STUBHASHABORT001",
meta: { status: "aborted", reason: "No workspace path provided" },
refs: [],
timestamp: 1,
};
expect(developModerator(makeCtx([abortedStep]))).toBe("__end__");
});
test("committer → END for any committer meta status", () => {
const committed = committerStep({ status: "committed", branch: "f", commitSha: "x" });
const recoverable = committerStep({
@@ -30,6 +30,18 @@ function coderFinishedAllPlannedPhases(
// ── Conditions ─────────────────────────────────────────────────────
const plannerAborted: ModeratorCondition<DevelopMeta> = {
name: "plannerAborted",
description: "The planner aborted due to insufficient information",
check: (ctx) => {
const plannerStep = ctx.steps.find((s) => s.role === "planner");
if (plannerStep === undefined) {
return false;
}
return plannerStep.meta.status === "aborted";
},
};
const allPhasesComplete: ModeratorCondition<DevelopMeta> = {
name: "allPhasesComplete",
description: "All planned phases have been completed by the coder",
@@ -38,7 +50,7 @@ const allPhasesComplete: ModeratorCondition<DevelopMeta> = {
if (plannerStep === undefined) {
return true;
}
const phases = plannerStep.meta.phases;
const phases = plannerStep.meta.status === "planned" ? plannerStep.meta.phases : [];
if (!Array.isArray(phases)) {
return true;
}
@@ -71,7 +83,10 @@ const testsPassed: ModeratorCondition<DevelopMeta> = {
const table: ModeratorTable<DevelopMeta> = {
[START]: [{ condition: "FALLBACK", role: "planner" }],
planner: [{ condition: "FALLBACK", role: "coder" }],
planner: [
{ condition: plannerAborted, role: END },
{ condition: "FALLBACK", role: "coder" },
],
coder: [
{ condition: allPhasesComplete, role: "reviewer" },
{ condition: "FALLBACK", role: "coder" },
@@ -25,7 +25,11 @@ The thread ID (26-char Crockford Base32) appears in the first message. If unsure
## Completing a phase
Report which phase you completed using the phase **hash** (not the title). If you legitimately finish every remaining phase in this single turn, set completedPhase to the **last** phase hash in the plan (the workflow treats that as full completion). List the files you changed and summarize what you did.`;
Report which phase you completed using the phase **hash** (not the title). If you legitimately finish every remaining phase in this single turn, set completedPhase to the **last** phase hash in the plan (the workflow treats that as full completion). List the files you changed and summarize what you did.
## Output rules
Keep your final response **short** — a brief summary paragraph plus the structured meta output. Do NOT paste diffs, file contents, or code blocks in your response. The actual changes are already on disk; repeating them wastes tokens. Just say what you did and why.`;
export const coderRole: RoleDefinition<CoderMeta> = {
description:
@@ -6,16 +6,27 @@ export const phaseSchema = z.object({
title: z.string(),
});
export const plannerMetaSchema = z.object({
phases: z.array(phaseSchema),
});
export const plannerMetaSchema = z.discriminatedUnion("status", [
z.object({
status: z.literal("planned"),
phases: z.array(phaseSchema),
}),
z.object({
status: z.literal("aborted"),
reason: z.string().describe("Why the task cannot proceed"),
}),
]);
export type PlannerMeta = z.infer<typeof plannerMetaSchema>;
const PLANNER_SYSTEM = `You are a **planner** for a software task. Break the work into **sequential phases** the coder will execute one at a time.
const PLANNER_SYSTEM = `You are a **planner** for a software task. Break the work into **sequential phases** the coder will execute one at a time. **Abort** if the prompt lacks critical information (e.g. no project/workspace path, ambiguous target repo).
Run \`uncaged-workflow skill develop\` for thread ID lookup, CAS commands, and meta output guide.
## Prerequisites — check FIRST
The prompt MUST include an **absolute filesystem path** to the project workspace (e.g. \`/home/user/repos/my-project\`). If no workspace path is given and you cannot reliably infer one from context, **abort immediately** with a clear reason explaining what information is missing. Do NOT guess paths.
## Storing phase details — MANDATORY
For each phase, store its full detail text in CAS via \`uncaged-workflow cas put '<content>'\`. The command prints a content-hash — use that as the phase identifier.
@@ -37,13 +48,20 @@ Fewer phases is always better. Each phase must justify its existence — if two
## Output format
After storing all phases via the CLI, output compact JSON only:
{ "phases": [{ "hash": "<hash-from-cas-put>", "title": "<one-line-summary>" }] }
{ "status": "planned", "phases": [{ "hash": "<hash-from-cas-put>", "title": "<one-line-summary>" }] }
Order phases so earlier steps unblock later ones. Cover root cause, edge cases, and verification across the phases.`;
If aborting:
{ "status": "aborted", "reason": "<what is missing>" }
Order phases so earlier steps unblock later ones. Cover root cause, edge cases, and verification across the phases.
## Output rules
Keep your final response **short** — just the JSON with phases. Do NOT paste code snippets, diffs, or implementation details in your response. Phase details are already stored in CAS; your response should only contain the compact phases JSON.`;
export const plannerRole: RoleDefinition<PlannerMeta> = {
description: "Breaks the task into sequential phases for the coder.",
systemPrompt: PLANNER_SYSTEM,
schema: plannerMetaSchema,
extractRefs: (meta) => meta.phases.map((p) => p.hash),
extractRefs: (meta) => meta.status === "planned" ? meta.phases.map((p) => p.hash) : [],
};
@@ -32,7 +32,11 @@ const REVIEWER_SYSTEM = `You are a code reviewer. Review the git diff for correc
- **Approve** only if there are zero issues
- **Reject** with specific issues that must be fixed — every issue you find is blocking
Be thorough. A false approve costs more than a false reject.`;
Be thorough. A false approve costs more than a false reject.
## Output rules
Keep your final response **short**. Summarize findings in a few bullet points, then output the structured verdict. Do NOT paste the full diff or large code blocks in your response.`;
export const reviewerRole: RoleDefinition<ReviewerMeta> = {
description: "Runs git diff checks and sets approved when the change is ready.",
@@ -14,7 +14,11 @@ export const testerMetaSchema = z.discriminatedUnion("status", [
export type TesterMeta = z.infer<typeof testerMetaSchema>;
const TESTER_SYSTEM = `You are a tester. Run the project's test suite, build, and lint commands. Check what commands are available from the preparer's output in the thread. Report pass/fail with details of what failed.`;
const TESTER_SYSTEM = `You are a tester. Run the project's test suite, build, and lint commands. Check what commands are available from the preparer's output in the thread. Report pass/fail with details of what failed.
## Output rules
Keep your final response **short**. Report pass/fail with a brief summary of failures (if any). Do NOT paste full test output or build logs — just the key error lines.`;
export const testerRole: RoleDefinition<TesterMeta> = {
description: "Runs test, build, and lint commands and reports pass or fail with details.",
+24
View File
@@ -0,0 +1,24 @@
#!/usr/bin/env bash
# Validate Crockford Base32 log tags in .log("TAG", ...) calls.
# Crockford Base32 excludes: I, L, O, U
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
BAD=0
while IFS= read -r match; do
file="${match%%:*}"
rest="${match#*:}"
line="${rest%%:*}"
tag=$(echo "$rest" | grep -oP '\.log\(\s*"\K[A-Za-z0-9]+')
if echo "$tag" | grep -qiE '[ILOU]'; then
echo "${file}:${line} tag \"${tag}\" contains invalid Crockford Base32 char (I/L/O/U)"
BAD=1
fi
done < <(grep -rn '\.log("[A-Za-z0-9]\{8\}"' "$ROOT/packages/" --include='*.ts' \
| grep -v node_modules | grep -v '/dist/')
if [ "$BAD" -eq 0 ]; then
echo " ✅ All log tags are valid Crockford Base32"
fi
exit $BAD