diff --git a/packages/workflow-agent-claude-code/__tests__/session-detail.test.ts b/packages/workflow-agent-claude-code/__tests__/session-detail.test.ts index db67558..efd4922 100644 --- a/packages/workflow-agent-claude-code/__tests__/session-detail.test.ts +++ b/packages/workflow-agent-claude-code/__tests__/session-detail.test.ts @@ -154,6 +154,99 @@ describe("parseClaudeCodeStreamOutput", () => { }); }); +describe("parseClaudeCodeStreamOutput — helper extraction", () => { + test("processSystemLine sets model from system message", () => { + const lines = [ + JSON.stringify({ type: "system", model: "claude-opus-4" }), + JSON.stringify({ + type: "result", + subtype: "success", + result: "ok", + session_id: "s1", + num_turns: 0, + total_cost_usd: 0, + duration_ms: 0, + stop_reason: "end_turn", + }), + ]; + const parsed = parseClaudeCodeStreamOutput(lines.join("\n")); + expect(parsed).not.toBeNull(); + expect(parsed!.model).toBe("claude-opus-4"); + }); + + test("processAssistantLine skips empty content", () => { + const lines = [ + JSON.stringify({ type: "assistant", message: { role: "assistant", content: [] } }), + JSON.stringify({ + type: "result", + subtype: "success", + result: "ok", + session_id: "s1", + num_turns: 0, + total_cost_usd: 0, + duration_ms: 0, + stop_reason: "end_turn", + }), + ]; + const parsed = parseClaudeCodeStreamOutput(lines.join("\n")); + expect(parsed).not.toBeNull(); + expect(parsed!.turns).toHaveLength(0); + }); + + test("processUserLine skips when no tool_result items", () => { + const lines = [ + JSON.stringify({ + type: "user", + message: { role: "user", content: [{ type: "text", text: "hi" }] }, + }), + JSON.stringify({ + type: "result", + subtype: "success", + result: "ok", + session_id: "s1", + num_turns: 0, + total_cost_usd: 0, + duration_ms: 0, + stop_reason: "end_turn", + }), + ]; + const parsed = parseClaudeCodeStreamOutput(lines.join("\n")); + expect(parsed).not.toBeNull(); + expect(parsed!.turns).toHaveLength(0); + }); + + test("turn indices are sequential across mixed assistant and user lines", () => { + const lines = [ + JSON.stringify({ + type: "assistant", + message: { role: "assistant", content: [{ type: "text", text: "A" }] }, + }), + JSON.stringify({ + type: "user", + message: { role: "user", content: [{ type: "tool_result", content: "R" }] }, + }), + JSON.stringify({ + type: "assistant", + message: { role: "assistant", content: [{ type: "text", text: "B" }] }, + }), + JSON.stringify({ + type: "result", + subtype: "success", + result: "ok", + session_id: "s1", + num_turns: 3, + total_cost_usd: 0, + duration_ms: 0, + stop_reason: "end_turn", + }), + ]; + const parsed = parseClaudeCodeStreamOutput(lines.join("\n")); + expect(parsed).not.toBeNull(); + expect(parsed!.turns).toHaveLength(3); + expect(parsed!.turns.map((t) => t.index)).toEqual([0, 1, 2]); + }); +}); + describe("storeClaudeCodeDetail", () => { const baseParsed: ClaudeCodeParsedResult = { type: "result", diff --git a/packages/workflow-agent-claude-code/src/session-detail.ts b/packages/workflow-agent-claude-code/src/session-detail.ts index 9309c48..941fdf1 100644 --- a/packages/workflow-agent-claude-code/src/session-detail.ts +++ b/packages/workflow-agent-claude-code/src/session-detail.ts @@ -67,101 +67,105 @@ function extractToolResultContent(content: unknown[]): string { return results.join("\n"); } -/** - * Parse Claude Code stream-json (NDJSON) output. - * Each line is a JSON object with type: "system" | "assistant" | "user" | "result". - */ -export function parseClaudeCodeStreamOutput(stdout: string): ClaudeCodeParsedResult | null { - const lines = stdout.trim().split("\n"); - const turns: ClaudeCodeTurnPayload[] = []; - let resultLine: Record | null = null; - let model = ""; - let turnIndex = 0; +type ParseState = { + turns: ClaudeCodeTurnPayload[]; + resultLine: Record | null; + model: string; + turnIndex: number; +}; - for (const line of lines) { - let parsed: unknown; - try { - parsed = JSON.parse(line); - } catch { - continue; - } - if (!isRecord(parsed)) continue; - - const type = parsed.type; - - if (type === "system" && typeof parsed.model === "string") { - model = parsed.model; - } - - if (type === "assistant" && isRecord(parsed.message)) { - const msg = parsed.message; - const content = Array.isArray(msg.content) ? msg.content : []; - const textContent = extractTextContent(content as unknown[]); - const toolCalls = extractToolCalls(content as unknown[]); - - // Only record turns that have actual content - if (textContent !== "" || toolCalls.length > 0) { - turns.push({ - index: turnIndex++, - role: "assistant", - content: textContent, - toolCalls: toolCalls.length > 0 ? toolCalls : null, - }); - } - } - - if (type === "user" && isRecord(parsed.message)) { - const msg = parsed.message; - const content = Array.isArray(msg.content) ? msg.content : []; - const resultContent = extractToolResultContent(content as unknown[]); - - if (resultContent !== "") { - turns.push({ - index: turnIndex++, - role: "tool_result", - content: resultContent, - toolCalls: null, - }); - } - } - - if (type === "result") { - resultLine = parsed; - } +function processSystemLine(parsed: Record, state: ParseState): void { + if (typeof parsed.model === "string") { + state.model = parsed.model; } +} - if (resultLine === null) return null; +function processAssistantLine(parsed: Record, state: ParseState): void { + if (!isRecord(parsed.message)) return; + const content = Array.isArray(parsed.message.content) ? parsed.message.content : []; + const textContent = extractTextContent(content as unknown[]); + const toolCalls = extractToolCalls(content as unknown[]); + if (textContent !== "" || toolCalls.length > 0) { + state.turns.push({ + index: state.turnIndex++, + role: "assistant", + content: textContent, + toolCalls: toolCalls.length > 0 ? toolCalls : null, + }); + } +} - const sessionId = resultLine.session_id; - const result = resultLine.result; - const subtype = resultLine.subtype; +function processUserLine(parsed: Record, state: ParseState): void { + if (!isRecord(parsed.message)) return; + const content = Array.isArray(parsed.message.content) ? parsed.message.content : []; + const resultContent = extractToolResultContent(content as unknown[]); + if (resultContent !== "") { + state.turns.push({ + index: state.turnIndex++, + role: "tool_result", + content: resultContent, + toolCalls: null, + }); + } +} +function processLine(line: string, state: ParseState): void { + let parsed: unknown; + try { + parsed = JSON.parse(line); + } catch { + return; + } + if (!isRecord(parsed)) return; + const type = parsed.type; + if (type === "system") processSystemLine(parsed, state); + else if (type === "assistant") processAssistantLine(parsed, state); + else if (type === "user") processUserLine(parsed, state); + else if (type === "result") state.resultLine = parsed; +} + +function assembleResult(state: ParseState): ClaudeCodeParsedResult | null { + if (state.resultLine === null) return null; + const sessionId = state.resultLine.session_id; + const result = state.resultLine.result; + const subtype = state.resultLine.subtype; if (typeof sessionId !== "string" || typeof result !== "string" || typeof subtype !== "string") { return null; } - - const usage = isRecord(resultLine.usage) ? resultLine.usage : {}; - + const usage = isRecord(state.resultLine.usage) ? state.resultLine.usage : {}; return { - type: safeString(resultLine.type, "result"), + type: safeString(state.resultLine.type, "result"), subtype: subtype as ClaudeCodeParsedResult["subtype"], result, sessionId, - numTurns: safeNumber(resultLine.num_turns), - totalCostUsd: safeNumber(resultLine.total_cost_usd), - durationMs: safeNumber(resultLine.duration_ms), - model, - stopReason: safeString(resultLine.stop_reason), + numTurns: safeNumber(state.resultLine.num_turns), + totalCostUsd: safeNumber(state.resultLine.total_cost_usd), + durationMs: safeNumber(state.resultLine.duration_ms), + model: state.model, + stopReason: safeString(state.resultLine.stop_reason), usage: { inputTokens: safeNumber(usage.input_tokens), outputTokens: safeNumber(usage.output_tokens), cacheReadInputTokens: safeNumber(usage.cache_read_input_tokens), cacheCreationInputTokens: safeNumber(usage.cache_creation_input_tokens), }, - turns, + turns: state.turns, }; } +/** + * Parse Claude Code stream-json (NDJSON) output. + * Each line is a JSON object with type: "system" | "assistant" | "user" | "result". + */ +export function parseClaudeCodeStreamOutput(stdout: string): ClaudeCodeParsedResult | null { + const lines = stdout.trim().split("\n"); + const state: ParseState = { turns: [], resultLine: null, model: "", turnIndex: 0 }; + for (const line of lines) { + processLine(line, state); + } + return assembleResult(state); +} + /** * Legacy: parse Claude Code plain JSON output (non-streaming). * Falls back when stream-json is not available. diff --git a/packages/workflow-agent-hermes/__tests__/acp-client.test.ts b/packages/workflow-agent-hermes/__tests__/acp-client.test.ts index 10f7b75..e1d6b91 100644 --- a/packages/workflow-agent-hermes/__tests__/acp-client.test.ts +++ b/packages/workflow-agent-hermes/__tests__/acp-client.test.ts @@ -4,6 +4,96 @@ import { HermesAcpClient } from "../src/acp-client.js"; const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i; +describe("handleSessionUpdate — helper extraction", () => { + let client: HermesAcpClient; + + beforeEach(() => { + client = new HermesAcpClient(); + }); + + afterEach(async () => { + await client.close(); + }); + + it("agent_message_chunk accumulates text in messageChunks", () => { + (client as any).handleSessionUpdate({ + sessionUpdate: "agent_message_chunk", + content: { type: "text", text: "hello" }, + }); + (client as any).handleSessionUpdate({ + sessionUpdate: "agent_message_chunk", + content: { type: "text", text: " world" }, + }); + expect((client as any).messageChunks).toEqual(["hello", " world"]); + }); + + it("agent_thought_chunk accumulates reasoning in reasoningChunks", () => { + (client as any).handleSessionUpdate({ + sessionUpdate: "agent_thought_chunk", + content: { type: "text", text: "thinking" }, + }); + expect((client as any).reasoningChunks).toEqual(["thinking"]); + }); + + it("tool_call registers a pending tool and flushes message chunks", () => { + (client as any).messageChunks = ["pre-tool text"]; + (client as any).handleSessionUpdate({ + sessionUpdate: "tool_call", + title: "Bash", + rawInput: { command: "ls" }, + toolCallId: "tc-1", + }); + expect((client as any).pendingTools.get("tc-1")).toEqual({ + name: "Bash", + args: JSON.stringify({ command: "ls" }), + }); + expect((client as any).messageChunks).toEqual([]); + expect((client as any).messages).toHaveLength(1); + expect((client as any).messages[0].role).toBe("assistant"); + }); + + it("tool_call_update completed pushes tool_call and tool messages", () => { + (client as any).pendingTools.set("tc-2", { name: "Read", args: '{"path":"/foo"}' }); + (client as any).handleSessionUpdate({ + sessionUpdate: "tool_call_update", + status: "completed", + toolCallId: "tc-2", + rawOutput: "file contents", + }); + const msgs = (client as any).messages as Array<{ + role: string; + tool_calls: unknown; + content: string | null; + }>; + expect(msgs).toHaveLength(2); + expect(msgs[0].role).toBe("assistant"); + expect(msgs[0].tool_calls).toEqual([ + { function: { name: "Read", arguments: '{"path":"/foo"}' } }, + ]); + expect(msgs[1].role).toBe("tool"); + expect(msgs[1].content).toBe("file contents"); + expect((client as any).pendingTools.has("tc-2")).toBe(false); + }); + + it("tool_call_update with non-string rawOutput JSON-stringifies it", () => { + (client as any).pendingTools.set("tc-3", { name: "Fetch", args: "" }); + (client as any).handleSessionUpdate({ + sessionUpdate: "tool_call_update", + status: "completed", + toolCallId: "tc-3", + rawOutput: { html: "

page

" }, + }); + const msgs = (client as any).messages as Array<{ role: string; content: string | null }>; + expect(msgs[1].content).toBe(JSON.stringify({ html: "

page

" })); + }); + + it("unknown updateType is a no-op", () => { + (client as any).handleSessionUpdate({ sessionUpdate: "unknown_type", data: {} }); + expect((client as any).messages).toHaveLength(0); + expect((client as any).messageChunks).toHaveLength(0); + }); +}); + describe("HermesAcpClient", () => { let client: HermesAcpClient; diff --git a/packages/workflow-agent-hermes/src/acp-client.ts b/packages/workflow-agent-hermes/src/acp-client.ts index beb6213..4b2ab45 100644 --- a/packages/workflow-agent-hermes/src/acp-client.ts +++ b/packages/workflow-agent-hermes/src/acp-client.ts @@ -245,72 +245,75 @@ export class HermesAcpClient { // ---- Session update → structured messages ---- private handleSessionUpdate(update: Record): void { - const updateType = update.sessionUpdate as string; - - switch (updateType) { - case "agent_message_chunk": { - const content = update.content as { type?: string; text?: string } | undefined; - if (content?.type === "text" && typeof content.text === "string") { - this.messageChunks.push(content.text); - } + switch (update.sessionUpdate as string) { + case "agent_message_chunk": + this.handleAgentMessageChunk(update); break; - } - - case "agent_thought_chunk": { - const content = update.content as { type?: string; text?: string } | undefined; - if (content?.type === "text" && typeof content.text === "string") { - this.reasoningChunks.push(content.text); - } + case "agent_thought_chunk": + this.handleAgentThoughtChunk(update); break; - } - - case "tool_call": { - const title = (update.title as string) ?? ""; - const rawInput = update.rawInput; - const args = rawInput !== undefined && rawInput !== null ? JSON.stringify(rawInput) : ""; - const toolCallId = update.toolCallId as string; - this.pendingTools.set(toolCallId, { name: title, args }); - - // Flush accumulated assistant text before tool call - this.flushAssistantMessage(); + case "tool_call": + this.handleToolCall(update); break; - } - - case "tool_call_update": { - const status = update.status as string | undefined; - if (status === "completed" || status === "failed") { - const toolCallId = update.toolCallId as string; - const pending = this.pendingTools.get(toolCallId); - const toolName = pending?.name ?? toolCallId; - const rawOutput = update.rawOutput; - const outputStr = - rawOutput !== undefined && rawOutput !== null - ? typeof rawOutput === "string" - ? rawOutput - : JSON.stringify(rawOutput) - : ""; - this.messages.push({ - role: "assistant", - content: null, - reasoning: null, - tool_calls: [{ function: { name: toolName, arguments: pending?.args ?? "" } }], - }); - this.messages.push({ - role: "tool", - content: outputStr, - reasoning: null, - tool_calls: null, - }); - this.pendingTools.delete(toolCallId); - } + case "tool_call_update": + this.handleToolCallUpdate(update); break; - } - default: break; } } + private handleAgentMessageChunk(update: Record): void { + const content = update.content as { type?: string; text?: string } | undefined; + if (content?.type === "text" && typeof content.text === "string") { + this.messageChunks.push(content.text); + } + } + + private handleAgentThoughtChunk(update: Record): void { + const content = update.content as { type?: string; text?: string } | undefined; + if (content?.type === "text" && typeof content.text === "string") { + this.reasoningChunks.push(content.text); + } + } + + private handleToolCall(update: Record): void { + const title = (update.title as string) ?? ""; + const rawInput = update.rawInput; + const args = rawInput !== undefined && rawInput !== null ? JSON.stringify(rawInput) : ""; + const toolCallId = update.toolCallId as string; + this.pendingTools.set(toolCallId, { name: title, args }); + this.flushAssistantMessage(); + } + + private handleToolCallUpdate(update: Record): void { + const status = update.status as string | undefined; + if (status !== "completed" && status !== "failed") return; + const toolCallId = update.toolCallId as string; + const pending = this.pendingTools.get(toolCallId); + const toolName = pending?.name ?? toolCallId; + const rawOutput = update.rawOutput; + const outputStr = + rawOutput !== undefined && rawOutput !== null + ? typeof rawOutput === "string" + ? rawOutput + : JSON.stringify(rawOutput) + : ""; + this.messages.push({ + role: "assistant", + content: null, + reasoning: null, + tool_calls: [{ function: { name: toolName, arguments: pending?.args ?? "" } }], + }); + this.messages.push({ + role: "tool", + content: outputStr, + reasoning: null, + tool_calls: null, + }); + this.pendingTools.delete(toolCallId); + } + /** Flush any accumulated text/reasoning into an assistant message. */ private flushAssistantMessage(): void { const text = this.messageChunks.join(""); diff --git a/scripts/batch-solve.sh b/scripts/batch-solve.sh new file mode 100755 index 0000000..76cdbe5 --- /dev/null +++ b/scripts/batch-solve.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +# batch-solve.sh — solve multiple Gitea issues via solve-issue workflow +# +# Usage: +# ./scripts/batch-solve.sh [--agent CMD] [--repo OWNER/REPO] [--count N] ISSUE_NUM... +# +# Examples: +# ./scripts/batch-solve.sh 448 449 +# ./scripts/batch-solve.sh --agent "bun run $(pwd)/packages/workflow-agent-claude-code/src/cli.ts" 448 449 +# ./scripts/batch-solve.sh --repo uncaged/workflow --count 15 448 449 + +set -euo pipefail + +AGENT="" +REPO="uncaged/workflow" +COUNT=10 +ISSUES=() + +while [[ $# -gt 0 ]]; do + case "$1" in + --agent) AGENT="$2"; shift 2 ;; + --repo) REPO="$2"; shift 2 ;; + --count) COUNT="$2"; shift 2 ;; + *) ISSUES+=("$1"); shift ;; + esac +done + +if [[ ${#ISSUES[@]} -eq 0 ]]; then + echo "Usage: $0 [--agent CMD] [--repo OWNER/REPO] [--count N] ISSUE_NUM..." >&2 + exit 1 +fi + +AGENT_FLAG="" +if [[ -n "$AGENT" ]]; then + AGENT_FLAG="--agent $AGENT" +fi + +TOTAL=${#ISSUES[@]} +PASSED=0 +FAILED=0 +RESULTS=() + +echo "━━━ Batch solve: ${TOTAL} issues ━━━" +echo "" + +for i in "${!ISSUES[@]}"; do + ISSUE="${ISSUES[$i]}" + NUM=$((i + 1)) + echo "┌─── [$NUM/$TOTAL] Issue #${ISSUE} ───" + + # Read issue title + TITLE=$(tea issues "$ISSUE" -r "$REPO" 2>/dev/null | head -1 | sed 's/^# #[0-9]* //' | sed 's/ (.*//' || echo "unknown") + echo "│ Title: $TITLE" + + # Start thread + PROMPT="Fix issue #${ISSUE} in ${REPO}. Read the issue first with 'tea issues ${ISSUE} -r ${REPO}' for full spec." + THREAD_JSON=$(uwf thread start solve-issue -p "$PROMPT" 2>&1) + THREAD_ID=$(echo "$THREAD_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin)['thread'])") + echo "│ Thread: $THREAD_ID" + + # Run steps + echo "│ Running (max $COUNT steps)..." + # shellcheck disable=SC2086 + if STEP_OUTPUT=$(uwf thread step "$THREAD_ID" $AGENT_FLAG -c "$COUNT" 2>&1); then + # Check if done + LAST_DONE=$(echo "$STEP_OUTPUT" | python3 -c "import json,sys; lines=sys.stdin.read().strip(); data=json.loads(lines); print(data[-1].get('done', False))") + if [[ "$LAST_DONE" == "True" ]]; then + echo "│ ✅ Done!" + PASSED=$((PASSED + 1)) + RESULTS+=("✅ #${ISSUE} — ${TITLE}") + else + echo "│ ⚠️ Ran out of steps (not done)" + FAILED=$((FAILED + 1)) + RESULTS+=("⚠️ #${ISSUE} — ${TITLE} (incomplete)") + fi + else + echo "│ ❌ Failed" + FAILED=$((FAILED + 1)) + RESULTS+=("❌ #${ISSUE} — ${TITLE} (error)") + fi + + echo "└───" + echo "" +done + +echo "━━━ Results: ${PASSED}/${TOTAL} passed, ${FAILED} failed ━━━" +for R in "${RESULTS[@]}"; do + echo " $R" +done