Merge pull request 'feat(claude-code): enrich step details with per-turn breakdown' (#423) from feat/422-claude-code-detail-enrichment into main

This commit is contained in:
2026-05-23 08:19:20 +00:00
7 changed files with 456 additions and 82 deletions
@@ -2,6 +2,7 @@ import { describe, expect, test } from "bun:test";
import { createMemoryStore, walk } from "@uncaged/json-cas";
import {
parseClaudeCodeJsonOutput,
parseClaudeCodeStreamOutput,
storeClaudeCodeDetail,
storeClaudeCodeRawOutput,
} from "../src/session-detail.js";
@@ -17,6 +18,8 @@ describe("parseClaudeCodeJsonOutput", () => {
num_turns: 3,
total_cost_usd: 0.08,
duration_ms: 10276,
stop_reason: "end_turn",
usage: { input_tokens: 100, output_tokens: 50 },
});
const parsed = parseClaudeCodeJsonOutput(stdout);
expect(parsed).not.toBeNull();
@@ -27,22 +30,10 @@ describe("parseClaudeCodeJsonOutput", () => {
expect(parsed!.numTurns).toBe(3);
expect(parsed!.totalCostUsd).toBe(0.08);
expect(parsed!.durationMs).toBe(10276);
});
test("parses error_max_turns result", () => {
const stdout = JSON.stringify({
type: "result",
subtype: "error_max_turns",
result: "Ran out of turns",
session_id: "abc-def",
num_turns: 90,
total_cost_usd: 1.5,
duration_ms: 50000,
});
const parsed = parseClaudeCodeJsonOutput(stdout);
expect(parsed).not.toBeNull();
expect(parsed!.subtype).toBe("error_max_turns");
expect(parsed!.result).toBe("Ran out of turns");
expect(parsed!.stopReason).toBe("end_turn");
expect(parsed!.usage.inputTokens).toBe(100);
expect(parsed!.usage.outputTokens).toBe(50);
expect(parsed!.turns).toEqual([]);
});
test("returns null for non-JSON output", () => {
@@ -57,45 +48,157 @@ describe("parseClaudeCodeJsonOutput", () => {
});
});
describe("storeClaudeCodeDetail", () => {
test("stores claude-code-detail CAS node and returns output + detailHash", async () => {
const store = createMemoryStore();
const parsed: ClaudeCodeParsedResult = {
type: "result",
subtype: "success",
result: "The answer",
sessionId: "abc-123",
numTurns: 5,
totalCostUsd: 0.12,
durationMs: 15000,
};
describe("parseClaudeCodeStreamOutput", () => {
test("parses stream-json output with turns", () => {
const lines = [
JSON.stringify({
type: "system",
subtype: "init",
session_id: "sess-123",
model: "claude-sonnet-4.5",
tools: ["Bash", "Read"],
}),
JSON.stringify({
type: "assistant",
message: {
role: "assistant",
content: [
{ type: "text", text: "I'll list the files." },
{ type: "tool_use", id: "tool_1", name: "Bash", input: { command: "ls" } },
],
},
session_id: "sess-123",
}),
JSON.stringify({
type: "user",
message: {
role: "user",
content: [
{ type: "tool_result", tool_use_id: "tool_1", content: "file1.ts\nfile2.ts" },
],
},
session_id: "sess-123",
}),
JSON.stringify({
type: "assistant",
message: {
role: "assistant",
content: [{ type: "text", text: "There are 2 files." }],
},
session_id: "sess-123",
}),
JSON.stringify({
type: "result",
subtype: "success",
result: "There are 2 files.",
session_id: "sess-123",
num_turns: 2,
total_cost_usd: 0.05,
duration_ms: 5000,
stop_reason: "end_turn",
usage: {
input_tokens: 200,
output_tokens: 30,
cache_read_input_tokens: 100,
cache_creation_input_tokens: 0,
},
}),
];
const stdout = lines.join("\n");
const parsed = parseClaudeCodeStreamOutput(stdout);
expect(parsed).not.toBeNull();
expect(parsed!.model).toBe("claude-sonnet-4.5");
expect(parsed!.sessionId).toBe("sess-123");
expect(parsed!.result).toBe("There are 2 files.");
expect(parsed!.stopReason).toBe("end_turn");
expect(parsed!.usage.inputTokens).toBe(200);
expect(parsed!.usage.outputTokens).toBe(30);
expect(parsed!.usage.cacheReadInputTokens).toBe(100);
// Turns: assistant(text+tool), tool_result, assistant(text)
expect(parsed!.turns).toHaveLength(3);
expect(parsed!.turns[0]!.role).toBe("assistant");
expect(parsed!.turns[0]!.content).toBe("I'll list the files.");
expect(parsed!.turns[0]!.toolCalls).toHaveLength(1);
expect(parsed!.turns[0]!.toolCalls![0]!.name).toBe("Bash");
expect(parsed!.turns[1]!.role).toBe("tool_result");
expect(parsed!.turns[1]!.content).toBe("file1.ts\nfile2.ts");
expect(parsed!.turns[2]!.role).toBe("assistant");
expect(parsed!.turns[2]!.content).toBe("There are 2 files.");
expect(parsed!.turns[2]!.toolCalls).toBeNull();
});
test("returns null when no result line", () => {
const stdout = JSON.stringify({ type: "system", model: "test" });
expect(parseClaudeCodeStreamOutput(stdout)).toBeNull();
});
test("skips invalid JSON lines gracefully", () => {
const lines = [
"not json",
JSON.stringify({
type: "result",
subtype: "success",
result: "ok",
session_id: "s1",
num_turns: 1,
total_cost_usd: 0.01,
duration_ms: 1000,
stop_reason: "end_turn",
usage: {},
}),
];
const parsed = parseClaudeCodeStreamOutput(lines.join("\n"));
expect(parsed).not.toBeNull();
expect(parsed!.result).toBe("ok");
expect(parsed!.turns).toHaveLength(0);
});
});
describe("storeClaudeCodeDetail", () => {
const baseParsed: ClaudeCodeParsedResult = {
type: "result",
subtype: "success",
result: "The answer",
sessionId: "abc-123",
numTurns: 5,
totalCostUsd: 0.12,
durationMs: 15000,
model: "claude-sonnet-4.5",
stopReason: "end_turn",
usage: { inputTokens: 100, outputTokens: 50, cacheReadInputTokens: 0, cacheCreationInputTokens: 0 },
turns: [
{ index: 0, role: "assistant", content: "hello", toolCalls: null },
{ index: 1, role: "tool_result", content: "world", toolCalls: null },
],
};
test("stores detail with per-turn CAS nodes", async () => {
const store = createMemoryStore();
const { detailHash, output, sessionId } = await storeClaudeCodeDetail(store, baseParsed);
const { detailHash, output, sessionId } = await storeClaudeCodeDetail(store, parsed);
expect(detailHash).toHaveLength(13);
expect(output).toBe("The answer");
expect(sessionId).toBe("abc-123");
const node = await store.get(detailHash);
expect(node).not.toBeNull();
expect(node!.payload.sessionId).toBe("abc-123");
expect(node!.payload.numTurns).toBe(5);
expect(node!.payload.totalCostUsd).toBe(0.12);
expect(node!.payload.durationMs).toBe(15000);
expect(node!.payload.model).toBe("claude-sonnet-4.5");
expect(node!.payload.stopReason).toBe("end_turn");
expect(node!.payload.usage.inputTokens).toBe(100);
expect(node!.payload.turns).toHaveLength(2);
// Verify turn CAS nodes
const turn0 = await store.get(node!.payload.turns[0]);
expect(turn0).not.toBeNull();
expect(turn0!.payload.role).toBe("assistant");
expect(turn0!.payload.content).toBe("hello");
});
test("detail node is walkable from root", async () => {
const store = createMemoryStore();
const parsed: ClaudeCodeParsedResult = {
type: "result",
subtype: "success",
result: "walkable test",
sessionId: "walk-123",
numTurns: 1,
totalCostUsd: 0.01,
durationMs: 1000,
};
const { detailHash } = await storeClaudeCodeDetail(store, parsed);
const { detailHash } = await storeClaudeCodeDetail(store, baseParsed);
const visited: string[] = [];
walk(store, detailHash, (hash) => visited.push(hash));
expect(visited.length).toBeGreaterThan(0);
@@ -1,6 +1,8 @@
import { spawn } from "node:child_process";
import type { Store } from "@uncaged/json-cas";
import { createLogger } from "@uncaged/workflow-util";
import {
type AgentContext,
type AgentRunResult,
@@ -10,7 +12,9 @@ import {
setCachedSessionId,
} from "@uncaged/workflow-agent-kit";
import { parseClaudeCodeJsonOutput, storeClaudeCodeDetail } from "./session-detail.js";
import { parseClaudeCodeStreamOutput, storeClaudeCodeDetail } from "./session-detail.js";
const log = createLogger({ sink: { kind: "stderr" } });
const CLAUDE_COMMAND = "claude";
const CLAUDE_MAX_TURNS = 90;
@@ -88,7 +92,8 @@ function spawnClaudeRun(prompt: string): Promise<{ stdout: string; stderr: strin
"-p",
prompt,
"--output-format",
"json",
"stream-json",
"--verbose",
"--dangerously-skip-permissions",
"--max-turns",
String(CLAUDE_MAX_TURNS),
@@ -105,7 +110,8 @@ function spawnClaudeResume(
"--resume",
sessionId,
"--output-format",
"json",
"stream-json",
"--verbose",
"--dangerously-skip-permissions",
"--max-turns",
String(CLAUDE_MAX_TURNS),
@@ -113,7 +119,7 @@ function spawnClaudeResume(
}
async function processClaudeOutput(stdout: string, store: Store): Promise<AgentRunResult> {
const parsed = parseClaudeCodeJsonOutput(stdout);
const parsed = parseClaudeCodeStreamOutput(stdout);
if (parsed !== null) {
const { detailHash, output, sessionId } = await storeClaudeCodeDetail(store, parsed);
@@ -121,7 +127,7 @@ async function processClaudeOutput(stdout: string, store: Store): Promise<AgentR
}
throw new Error(
`Claude Code returned non-JSON output (first 200 chars): ${stdout.slice(0, 200)}`,
`Claude Code returned unparseable output (first 200 chars): ${stdout.slice(0, 200)}`,
);
}
@@ -135,17 +141,21 @@ async function runClaudeCode(ctx: AgentContext): Promise<AgentRunResult> {
try {
const { stdout } = await spawnClaudeResume(cachedSessionId, fullPrompt);
const result = await processClaudeOutput(stdout, ctx.store);
await setCachedSessionId(ctx.threadId, ctx.role, result.sessionId);
if (result.sessionId !== undefined && result.sessionId !== "") {
await setCachedSessionId(ctx.threadId, ctx.role, result.sessionId);
}
return result;
} catch {
// Resume failed — fall through to fresh run.
} catch (err) {
log("5VKR8N3Q", "resume failed for session %s, falling back to fresh run: %s", cachedSessionId, err);
}
}
}
const { stdout } = await spawnClaudeRun(fullPrompt);
const result = await processClaudeOutput(stdout, ctx.store);
await setCachedSessionId(ctx.threadId, ctx.role, result.sessionId);
if (result.sessionId !== undefined && result.sessionId !== "") {
await setCachedSessionId(ctx.threadId, ctx.role, result.sessionId);
}
return result;
}
@@ -1,6 +1,7 @@
export { buildClaudeCodePrompt, createClaudeCodeAgent } from "./claude-code.js";
export {
parseClaudeCodeJsonOutput,
parseClaudeCodeStreamOutput,
storeClaudeCodeDetail,
storeClaudeCodeRawOutput,
} from "./session-detail.js";
@@ -3,13 +3,52 @@ import type { JSONSchema } from "@uncaged/json-cas";
export const CLAUDE_CODE_DETAIL_SCHEMA: JSONSchema = {
title: "claude-code-detail",
type: "object",
required: ["sessionId", "numTurns", "totalCostUsd", "durationMs", "subtype"],
required: [
"sessionId",
"model",
"subtype",
"durationMs",
"numTurns",
"totalCostUsd",
"stopReason",
"usage",
"turns",
],
properties: {
sessionId: { type: "string" },
model: { type: "string" },
subtype: { type: "string" },
durationMs: { type: "integer" },
numTurns: { type: "integer" },
totalCostUsd: { type: "number" },
durationMs: { type: "integer" },
subtype: { type: "string" },
stopReason: { type: "string" },
usage: {
type: "object",
properties: {
inputTokens: { type: "integer" },
outputTokens: { type: "integer" },
cacheReadInputTokens: { type: "integer" },
cacheCreationInputTokens: { type: "integer" },
},
required: ["inputTokens", "outputTokens", "cacheReadInputTokens", "cacheCreationInputTokens"],
},
turns: {
type: "array",
items: { type: "string" },
},
},
additionalProperties: false,
};
export const CLAUDE_CODE_TURN_SCHEMA: JSONSchema = {
title: "claude-code-turn",
type: "object",
required: ["index", "role", "content", "toolCalls"],
properties: {
index: { type: "integer" },
role: { type: "string" },
content: { type: "string" },
toolCalls: {},
},
additionalProperties: false,
};
@@ -1,13 +1,171 @@
import { bootstrap, putSchema, type Store } from "@uncaged/json-cas";
import { CLAUDE_CODE_DETAIL_SCHEMA, CLAUDE_CODE_RAW_OUTPUT_SCHEMA } from "./schemas.js";
import type { ClaudeCodeDetailPayload, ClaudeCodeParsedResult } from "./types.js";
import {
CLAUDE_CODE_DETAIL_SCHEMA,
CLAUDE_CODE_RAW_OUTPUT_SCHEMA,
CLAUDE_CODE_TURN_SCHEMA,
} from "./schemas.js";
import type {
ClaudeCodeDetailPayload,
ClaudeCodeParsedResult,
ClaudeCodeToolCall,
ClaudeCodeTurnPayload,
} from "./types.js";
function isRecord(value: unknown): value is Record<string, unknown> {
return typeof value === "object" && value !== null && !Array.isArray(value);
}
/** Parse Claude Code JSON stdout (`claude -p --output-format json`). */
function safeNumber(v: unknown, fallback = 0): number {
return typeof v === "number" ? v : fallback;
}
function safeString(v: unknown, fallback = ""): string {
return typeof v === "string" ? v : fallback;
}
/**
* Extract tool calls from an assistant message content array.
*/
function extractToolCalls(content: unknown[]): ClaudeCodeToolCall[] {
const calls: ClaudeCodeToolCall[] = [];
for (const item of content) {
if (isRecord(item) && item.type === "tool_use" && typeof item.name === "string") {
calls.push({
name: item.name,
input: typeof item.input === "string" ? item.input : JSON.stringify(item.input ?? {}),
});
}
}
return calls;
}
/**
* Extract text content from a message content array.
*/
function extractTextContent(content: unknown[]): string {
const texts: string[] = [];
for (const item of content) {
if (isRecord(item) && item.type === "text" && typeof item.text === "string") {
texts.push(item.text);
}
}
return texts.join("\n");
}
/**
* Extract tool result content from a user message content array.
*/
function extractToolResultContent(content: unknown[]): string {
const results: string[] = [];
for (const item of content) {
if (isRecord(item) && item.type === "tool_result") {
const text = typeof item.content === "string" ? item.content : "";
results.push(text);
}
}
return results.join("\n");
}
/**
* Parse Claude Code stream-json (NDJSON) output.
* Each line is a JSON object with type: "system" | "assistant" | "user" | "result".
*/
export function parseClaudeCodeStreamOutput(stdout: string): ClaudeCodeParsedResult | null {
const lines = stdout.trim().split("\n");
const turns: ClaudeCodeTurnPayload[] = [];
let resultLine: Record<string, unknown> | null = null;
let model = "";
let turnIndex = 0;
for (const line of lines) {
let parsed: unknown;
try {
parsed = JSON.parse(line);
} catch {
continue;
}
if (!isRecord(parsed)) continue;
const type = parsed.type;
if (type === "system" && typeof parsed.model === "string") {
model = parsed.model;
}
if (type === "assistant" && isRecord(parsed.message)) {
const msg = parsed.message;
const content = Array.isArray(msg.content) ? msg.content : [];
const textContent = extractTextContent(content as unknown[]);
const toolCalls = extractToolCalls(content as unknown[]);
// Only record turns that have actual content
if (textContent !== "" || toolCalls.length > 0) {
turns.push({
index: turnIndex++,
role: "assistant",
content: textContent,
toolCalls: toolCalls.length > 0 ? toolCalls : null,
});
}
}
if (type === "user" && isRecord(parsed.message)) {
const msg = parsed.message;
const content = Array.isArray(msg.content) ? msg.content : [];
const resultContent = extractToolResultContent(content as unknown[]);
if (resultContent !== "") {
turns.push({
index: turnIndex++,
role: "tool_result",
content: resultContent,
toolCalls: null,
});
}
}
if (type === "result") {
resultLine = parsed;
}
}
if (resultLine === null) return null;
const sessionId = resultLine.session_id;
const result = resultLine.result;
const subtype = resultLine.subtype;
if (typeof sessionId !== "string" || typeof result !== "string" || typeof subtype !== "string") {
return null;
}
const usage = isRecord(resultLine.usage) ? resultLine.usage : {};
return {
type: safeString(resultLine.type, "result"),
subtype: subtype as ClaudeCodeParsedResult["subtype"],
result,
sessionId,
numTurns: safeNumber(resultLine.num_turns),
totalCostUsd: safeNumber(resultLine.total_cost_usd),
durationMs: safeNumber(resultLine.duration_ms),
model,
stopReason: safeString(resultLine.stop_reason),
usage: {
inputTokens: safeNumber(usage.input_tokens),
outputTokens: safeNumber(usage.output_tokens),
cacheReadInputTokens: safeNumber(usage.cache_read_input_tokens),
cacheCreationInputTokens: safeNumber(usage.cache_creation_input_tokens),
},
turns,
};
}
/**
* Legacy: parse Claude Code plain JSON output (non-streaming).
* Falls back when stream-json is not available.
*/
export function parseClaudeCodeJsonOutput(stdout: string): ClaudeCodeParsedResult | null {
let parsed: unknown;
try {
@@ -16,9 +174,7 @@ export function parseClaudeCodeJsonOutput(stdout: string): ClaudeCodeParsedResul
return null;
}
if (!isRecord(parsed)) {
return null;
}
if (!isRecord(parsed)) return null;
const sessionId = parsed.session_id;
const result = parsed.result;
@@ -28,44 +184,68 @@ export function parseClaudeCodeJsonOutput(stdout: string): ClaudeCodeParsedResul
return null;
}
const usage = isRecord(parsed.usage) ? parsed.usage : {};
return {
type: typeof parsed.type === "string" ? parsed.type : "result",
type: safeString(parsed.type, "result"),
subtype: subtype as ClaudeCodeParsedResult["subtype"],
result,
sessionId,
numTurns: typeof parsed.num_turns === "number" ? parsed.num_turns : 0,
totalCostUsd: typeof parsed.total_cost_usd === "number" ? parsed.total_cost_usd : 0,
durationMs: typeof parsed.duration_ms === "number" ? parsed.duration_ms : 0,
numTurns: safeNumber(parsed.num_turns),
totalCostUsd: safeNumber(parsed.total_cost_usd),
durationMs: safeNumber(parsed.duration_ms),
model: "",
stopReason: safeString(parsed.stop_reason),
usage: {
inputTokens: safeNumber(usage.input_tokens),
outputTokens: safeNumber(usage.output_tokens),
cacheReadInputTokens: safeNumber(usage.cache_read_input_tokens),
cacheCreationInputTokens: safeNumber(usage.cache_creation_input_tokens),
},
turns: [],
};
}
type ClaudeCodeSchemaHashes = {
detail: string;
turn: string;
rawOutput: string;
};
async function registerSchemas(store: Store): Promise<ClaudeCodeSchemaHashes> {
await bootstrap(store);
const [detail, rawOutput] = await Promise.all([
const [detail, turn, rawOutput] = await Promise.all([
putSchema(store, CLAUDE_CODE_DETAIL_SCHEMA),
putSchema(store, CLAUDE_CODE_TURN_SCHEMA),
putSchema(store, CLAUDE_CODE_RAW_OUTPUT_SCHEMA),
]);
return { detail, rawOutput };
return { detail, turn, rawOutput };
}
/** Store parsed Claude Code result as a CAS detail node. */
/** Store parsed Claude Code result with per-turn breakdown as CAS detail nodes. */
export async function storeClaudeCodeDetail(
store: Store,
parsed: ClaudeCodeParsedResult,
): Promise<{ detailHash: string; output: string; sessionId: string }> {
const schemas = await registerSchemas(store);
// Store each turn as an individual CAS node
const turnHashes: string[] = [];
for (const turn of parsed.turns) {
const hash = await store.put(schemas.turn, turn);
turnHashes.push(hash);
}
const detail: ClaudeCodeDetailPayload = {
sessionId: parsed.sessionId,
model: parsed.model,
subtype: parsed.subtype,
durationMs: parsed.durationMs,
numTurns: parsed.numTurns,
totalCostUsd: parsed.totalCostUsd,
durationMs: parsed.durationMs,
subtype: parsed.subtype,
stopReason: parsed.stopReason,
usage: parsed.usage,
turns: turnHashes,
};
const detailHash = await store.put(schemas.detail, detail);
@@ -1,5 +1,38 @@
export type ClaudeCodeResultSubtype = "success" | "error_max_turns" | "error_budget";
/** A single tool call within an assistant turn. */
export type ClaudeCodeToolCall = {
name: string;
input: string;
};
/** A single turn (assistant text, tool use, or tool result). */
export type ClaudeCodeTurnPayload = {
index: number;
role: "assistant" | "tool_result";
content: string;
toolCalls: ClaudeCodeToolCall[] | null;
};
/** Top-level detail stored as CAS node. */
export type ClaudeCodeDetailPayload = {
sessionId: string;
model: string;
subtype: string;
durationMs: number;
numTurns: number;
totalCostUsd: number;
stopReason: string;
usage: {
inputTokens: number;
outputTokens: number;
cacheReadInputTokens: number;
cacheCreationInputTokens: number;
};
turns: string[]; // CAS hashes of ClaudeCodeTurnPayload
};
/** Intermediate parsed result from stream-json output. */
export type ClaudeCodeParsedResult = {
type: string;
subtype: ClaudeCodeResultSubtype;
@@ -8,12 +41,13 @@ export type ClaudeCodeParsedResult = {
numTurns: number;
totalCostUsd: number;
durationMs: number;
};
export type ClaudeCodeDetailPayload = {
sessionId: string;
numTurns: number;
totalCostUsd: number;
durationMs: number;
subtype: string;
model: string;
stopReason: string;
usage: {
inputTokens: number;
outputTokens: number;
cacheReadInputTokens: number;
cacheCreationInputTokens: number;
};
turns: ClaudeCodeTurnPayload[];
};
@@ -1,4 +1,5 @@
import { mkdir, readFile, writeFile } from "node:fs/promises";
import { mkdir, readFile, rename, writeFile } from "node:fs/promises";
import { randomBytes } from "node:crypto";
import { dirname, join } from "node:path";
import type { ThreadId } from "@uncaged/workflow-protocol";
@@ -45,8 +46,14 @@ async function readCache(): Promise<SessionCache> {
async function writeCache(cache: SessionCache): Promise<void> {
const path = getCachePath();
await mkdir(dirname(path), { recursive: true });
await writeFile(path, `${JSON.stringify(cache, null, 2)}\n`, "utf8");
const dir = dirname(path);
await mkdir(dir, { recursive: true });
// Atomic write: write to temp file then rename to avoid partial reads on concurrent access.
// NOTE: Current workflow execution is serial (execFileSync), so true concurrency doesn't occur.
// This is a safety net for future parallel execution.
const tmpPath = join(dir, `.agent-sessions.${randomBytes(4).toString("hex")}.tmp`);
await writeFile(tmpPath, `${JSON.stringify(cache, null, 2)}\n`, "utf8");
await rename(tmpPath, path);
}
/** Read the cached session ID for a thread+role pair. */