Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4dff320d5c | |||
| 176844d7f5 | |||
| 31695e89a8 |
@@ -1,83 +0,0 @@
|
||||
# Test Spec: uwf setup model connectivity validation (#335)
|
||||
|
||||
## Context
|
||||
|
||||
File: `packages/cli-workflow/src/commands/setup.ts`
|
||||
Test file: `packages/cli-workflow/src/__tests__/setup-validate.test.ts`
|
||||
|
||||
After `cmdSetup` writes config, it should send a test chat completion request to verify the configured model is reachable. If validation fails, warn the user (don't abort — config is already saved).
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
- Add a `validateModel(baseUrl, apiKey, model)` function that sends a minimal chat completion request (`POST /chat/completions` with `messages: [{role:"user",content:"hi"}]`, `max_tokens: 1`)
|
||||
- Returns `Result<void, string>` — ok if 2xx response, error with reason string otherwise
|
||||
- Use `AbortSignal.timeout(15_000)` for the request
|
||||
- Both `cmdSetup` and `cmdSetupInteractive` should call it after saving config
|
||||
- `cmdSetup` returns validation result in its return object: `{ ...existing, validation: { ok: true } | { ok: false, error: string } }`
|
||||
- `cmdSetupInteractive` prints a warning to console if validation fails, success message if it passes
|
||||
- Use the project logger (`createLogger`) — no raw `console.log` except in interactive CLI output (per CLAUDE.md)
|
||||
|
||||
## Test Cases (vitest)
|
||||
|
||||
### 1. `validateModel` — success path
|
||||
- Mock `fetch` to return `{ status: 200, ok: true, json: () => ({}) }`
|
||||
- Call `validateModel(baseUrl, apiKey, model)`
|
||||
- Assert returns `{ ok: true, value: undefined }`
|
||||
- Assert fetch was called with correct URL (`${baseUrl}/chat/completions`), correct headers (`Authorization: Bearer ${apiKey}`), correct body (model, messages, max_tokens: 1)
|
||||
|
||||
### 2. `validateModel` — HTTP error (401 unauthorized)
|
||||
- Mock `fetch` to return `{ status: 401, ok: false, statusText: "Unauthorized" }`
|
||||
- Call `validateModel(baseUrl, apiKey, model)`
|
||||
- Assert returns `{ ok: false, error: <string containing "401"> }`
|
||||
|
||||
### 3. `validateModel` — HTTP error (404 model not found)
|
||||
- Mock `fetch` to return `{ status: 404, ok: false, statusText: "Not Found" }`
|
||||
- Assert returns `{ ok: false, error: <string containing "404"> }`
|
||||
|
||||
### 4. `validateModel` — network timeout
|
||||
- Mock `fetch` to throw `DOMException` with name `AbortError`
|
||||
- Assert returns `{ ok: false, error: <string containing "timeout" or "unreachable"> }`
|
||||
|
||||
### 5. `validateModel` — network error (DNS failure, connection refused)
|
||||
- Mock `fetch` to throw `TypeError("fetch failed")`
|
||||
- Assert returns `{ ok: false, error: <string mentioning connectivity> }`
|
||||
|
||||
### 6. `cmdSetup` — includes validation result on success
|
||||
- Mock global `fetch` for `/chat/completions` to succeed
|
||||
- Call `cmdSetup({ provider, baseUrl, apiKey, model, storageRoot })`
|
||||
- Assert returned object has `validation: { ok: true, value: undefined }`
|
||||
- Assert config files are still written (existing behavior preserved)
|
||||
|
||||
### 7. `cmdSetup` — includes validation result on failure (config still saved)
|
||||
- Mock global `fetch` for `/chat/completions` to return 401
|
||||
- Call `cmdSetup({ ... })`
|
||||
- Assert returned object has `validation: { ok: false, error: ... }`
|
||||
- Assert `config.yaml` and `.env` are still written (validation failure doesn't prevent saving)
|
||||
|
||||
### 8. `cmdSetupInteractive` — prints success message on validation pass
|
||||
- Mock `fetch` for both `/models` and `/chat/completions` to succeed
|
||||
- Mock stdin to provide valid selections
|
||||
- Capture console output
|
||||
- Assert output contains a success message like "Model verified" or "✓"
|
||||
|
||||
### 9. `cmdSetupInteractive` — prints warning on validation failure
|
||||
- Mock `fetch`: `/models` succeeds, `/chat/completions` returns 401
|
||||
- Mock stdin for valid selections
|
||||
- Capture console output
|
||||
- Assert output contains a warning about model not being reachable and suggests trying a different model
|
||||
|
||||
### 10. `validateModel` — request body correctness
|
||||
- Mock `fetch` to capture the request body
|
||||
- Call `validateModel(baseUrl, apiKey, "test-model")`
|
||||
- Assert body is `{ model: "test-model", messages: [{role: "user", content: "hi"}], max_tokens: 1 }`
|
||||
|
||||
## Export Requirements
|
||||
|
||||
- `validateModel` must be exported (for direct unit testing)
|
||||
- Signature: `async function validateModel(baseUrl: string, apiKey: string, model: string): Promise<Result<void, string>>`
|
||||
- `Result` type: `{ ok: true; value: T } | { ok: false; error: E }` (project convention)
|
||||
|
||||
## Files to Create/Modify
|
||||
|
||||
- **New**: `packages/cli-workflow/src/__tests__/setup-validate.test.ts` — all test cases above
|
||||
- **Modify**: `packages/cli-workflow/src/commands/setup.ts` — add `validateModel`, integrate into `cmdSetup` and `cmdSetupInteractive`
|
||||
@@ -1,150 +0,0 @@
|
||||
import { mkdtemp, rm } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, test, vi } from "vitest";
|
||||
import { cmdSetup, validateModel } from "../commands/setup.js";
|
||||
|
||||
describe("validateModel", () => {
|
||||
const BASE_URL = "https://api.example.com/v1";
|
||||
const API_KEY = "sk-test-key";
|
||||
const MODEL = "test-model";
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
test("success path — returns ok on 200", async () => {
|
||||
const mockFetch = vi
|
||||
.spyOn(globalThis, "fetch")
|
||||
.mockResolvedValue(new Response(JSON.stringify({}), { status: 200 }));
|
||||
|
||||
const result = await validateModel(BASE_URL, API_KEY, MODEL);
|
||||
|
||||
expect(result).toEqual({ ok: true, value: undefined });
|
||||
expect(mockFetch).toHaveBeenCalledOnce();
|
||||
|
||||
const [url, opts] = mockFetch.mock.calls[0]!;
|
||||
expect(url).toBe(`${BASE_URL}/chat/completions`);
|
||||
expect((opts as RequestInit).headers).toEqual(
|
||||
expect.objectContaining({ Authorization: `Bearer ${API_KEY}` }),
|
||||
);
|
||||
const body = JSON.parse((opts as RequestInit).body as string);
|
||||
expect(body).toEqual({
|
||||
model: MODEL,
|
||||
messages: [{ role: "user", content: "hi" }],
|
||||
max_tokens: 1,
|
||||
});
|
||||
});
|
||||
|
||||
test("HTTP 401 — returns error containing 401", async () => {
|
||||
vi.spyOn(globalThis, "fetch").mockResolvedValue(
|
||||
new Response("Unauthorized", { status: 401, statusText: "Unauthorized" }),
|
||||
);
|
||||
|
||||
const result = await validateModel(BASE_URL, API_KEY, MODEL);
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (!result.ok) {
|
||||
expect(result.error).toContain("401");
|
||||
}
|
||||
});
|
||||
|
||||
test("HTTP 404 — returns error containing 404", async () => {
|
||||
vi.spyOn(globalThis, "fetch").mockResolvedValue(
|
||||
new Response("Not Found", { status: 404, statusText: "Not Found" }),
|
||||
);
|
||||
|
||||
const result = await validateModel(BASE_URL, API_KEY, MODEL);
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (!result.ok) {
|
||||
expect(result.error).toContain("404");
|
||||
}
|
||||
});
|
||||
|
||||
test("network timeout — returns error mentioning timeout", async () => {
|
||||
const err = new DOMException("signal timed out", "AbortError");
|
||||
vi.spyOn(globalThis, "fetch").mockRejectedValue(err);
|
||||
|
||||
const result = await validateModel(BASE_URL, API_KEY, MODEL);
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (!result.ok) {
|
||||
expect(result.error.toLowerCase()).toMatch(/timeout|timed out/);
|
||||
}
|
||||
});
|
||||
|
||||
test("network error (DNS/connection) — returns error mentioning connectivity", async () => {
|
||||
vi.spyOn(globalThis, "fetch").mockRejectedValue(new TypeError("fetch failed"));
|
||||
|
||||
const result = await validateModel(BASE_URL, API_KEY, MODEL);
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (!result.ok) {
|
||||
expect(result.error.toLowerCase()).toMatch(/connect|reach|network/);
|
||||
}
|
||||
});
|
||||
|
||||
test("request body correctness", async () => {
|
||||
const mockFetch = vi
|
||||
.spyOn(globalThis, "fetch")
|
||||
.mockResolvedValue(new Response(JSON.stringify({}), { status: 200 }));
|
||||
|
||||
await validateModel(BASE_URL, API_KEY, "my-special-model");
|
||||
|
||||
const body = JSON.parse((mockFetch.mock.calls[0]![1] as RequestInit).body as string);
|
||||
expect(body).toEqual({
|
||||
model: "my-special-model",
|
||||
messages: [{ role: "user", content: "hi" }],
|
||||
max_tokens: 1,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("cmdSetup with validation", () => {
|
||||
let storageRoot: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
storageRoot = await mkdtemp(join(tmpdir(), "uwf-setup-validate-"));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
vi.restoreAllMocks();
|
||||
await rm(storageRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
const setupArgs = () => ({
|
||||
provider: "testprovider",
|
||||
baseUrl: "https://api.test.com/v1",
|
||||
apiKey: "sk-test",
|
||||
model: "test-model",
|
||||
storageRoot,
|
||||
});
|
||||
|
||||
test("includes validation result on success", async () => {
|
||||
vi.spyOn(globalThis, "fetch").mockResolvedValue(
|
||||
new Response(JSON.stringify({}), { status: 200 }),
|
||||
);
|
||||
|
||||
const result = await cmdSetup(setupArgs());
|
||||
|
||||
expect(result.validation).toEqual({ ok: true, value: undefined });
|
||||
// Config files should still be written
|
||||
expect(result.configPath).toBeTruthy();
|
||||
expect(result.envPath).toBeTruthy();
|
||||
});
|
||||
|
||||
test("includes validation failure — config still saved", async () => {
|
||||
vi.spyOn(globalThis, "fetch").mockResolvedValue(
|
||||
new Response("Unauthorized", { status: 401, statusText: "Unauthorized" }),
|
||||
);
|
||||
|
||||
const result = await cmdSetup(setupArgs());
|
||||
|
||||
expect(result.validation).toBeDefined();
|
||||
expect((result.validation as { ok: boolean }).ok).toBe(false);
|
||||
// Config files should still be written despite validation failure
|
||||
expect(result.configPath).toBeTruthy();
|
||||
expect(result.envPath).toBeTruthy();
|
||||
});
|
||||
});
|
||||
@@ -2,44 +2,8 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { stdin as input, stdout as output } from "node:process";
|
||||
import { createInterface } from "node:readline/promises";
|
||||
import type { Result } from "@uncaged/workflow-util";
|
||||
import { parse, stringify } from "yaml";
|
||||
|
||||
/**
|
||||
* Send a minimal chat completion request to verify the model is reachable.
|
||||
* Returns ok on 2xx, error with reason string otherwise.
|
||||
*/
|
||||
export async function validateModel(
|
||||
baseUrl: string,
|
||||
apiKey: string,
|
||||
model: string,
|
||||
): Promise<Result<void, string>> {
|
||||
try {
|
||||
const url = `${baseUrl.replace(/\/+$/, "")}/chat/completions`;
|
||||
const res = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
messages: [{ role: "user", content: "hi" }],
|
||||
max_tokens: 1,
|
||||
}),
|
||||
signal: AbortSignal.timeout(15_000),
|
||||
});
|
||||
if (!res.ok) {
|
||||
return { ok: false, error: `HTTP ${res.status} ${res.statusText}` };
|
||||
}
|
||||
return { ok: true, value: undefined };
|
||||
} catch (err: unknown) {
|
||||
if (err instanceof DOMException && err.name === "AbortError") {
|
||||
return { ok: false, error: "Request timed out — model endpoint unreachable" };
|
||||
}
|
||||
return { ok: false, error: `Network error — could not reach endpoint (${String(err)})` };
|
||||
}
|
||||
}
|
||||
import { parse, stringify } from "yaml";
|
||||
|
||||
/**
|
||||
* Preset provider list — embedded to avoid runtime YAML loading dependency.
|
||||
@@ -199,16 +163,12 @@ export async function cmdSetup(args: SetupArgs): Promise<Record<string, unknown>
|
||||
envData[envName] = args.apiKey;
|
||||
saveEnvFile(envPath, envData);
|
||||
|
||||
// Validate model connectivity
|
||||
const validation = await validateModel(args.baseUrl, args.apiKey, args.model);
|
||||
|
||||
return {
|
||||
configPath,
|
||||
envPath,
|
||||
provider: args.provider,
|
||||
model: args.model,
|
||||
defaultAgent: merged.defaultAgent,
|
||||
validation,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -368,7 +328,7 @@ export async function cmdSetupInteractive(storageRoot: string): Promise<Record<s
|
||||
|
||||
console.log(` → ${providerName}/${model}\n`);
|
||||
|
||||
const setupResult = await cmdSetup({
|
||||
await cmdSetup({
|
||||
provider: providerName,
|
||||
baseUrl,
|
||||
apiKey,
|
||||
@@ -376,19 +336,6 @@ export async function cmdSetupInteractive(storageRoot: string): Promise<Record<s
|
||||
storageRoot,
|
||||
});
|
||||
|
||||
// Show validation result
|
||||
if (setupResult.validation && typeof setupResult.validation === "object") {
|
||||
const v = setupResult.validation as { ok: boolean; error?: string };
|
||||
if (v.ok) {
|
||||
console.log("✓ Model verified — connection successful.\n");
|
||||
} else {
|
||||
console.log(`\n⚠ Warning: Could not reach model — ${v.error}`);
|
||||
console.log(
|
||||
" Config saved, but you may want to try a different model or check your API key.\n",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
console.log("Setup complete! Get started:\n");
|
||||
console.log(" uwf workflow put <workflow.yaml> Register a workflow");
|
||||
console.log(' uwf thread start <name> -p "..." Start a thread');
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import type { AgentContext } from "@uncaged/workflow-agent-kit";
|
||||
import { buildClaudeCodePrompt } from "../src/claude-code.js";
|
||||
|
||||
function makeCtx(overrides: Partial<AgentContext> = {}): AgentContext {
|
||||
return {
|
||||
workflow: {
|
||||
roles: {
|
||||
developer: {
|
||||
description: "TDD implementation per test spec",
|
||||
goal: "Write code",
|
||||
capabilities: ["coding"],
|
||||
procedure: "1. Read spec\n2. Write code",
|
||||
output: "List files changed",
|
||||
frontmatter: "",
|
||||
},
|
||||
},
|
||||
conditions: {},
|
||||
graph: {},
|
||||
},
|
||||
role: "developer",
|
||||
start: { prompt: "Fix the bug", workflowHash: "abc123", threadId: "t1" },
|
||||
steps: [],
|
||||
store: {} as AgentContext["store"],
|
||||
outputFormatInstruction: "Use YAML frontmatter",
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("buildClaudeCodePrompt", () => {
|
||||
test("assembles outputFormatInstruction + role prompt + task prompt", () => {
|
||||
const result = buildClaudeCodePrompt(makeCtx());
|
||||
expect(result).toMatch(/^Use YAML frontmatter/);
|
||||
expect(result).toContain("Write code");
|
||||
expect(result).toContain("## Task\nFix the bug");
|
||||
});
|
||||
|
||||
test("includes previous steps as history summary", () => {
|
||||
const ctx = makeCtx({
|
||||
steps: [{ role: "planner", output: '{"plan":"do X"}', agent: "hermes" }],
|
||||
});
|
||||
const result = buildClaudeCodePrompt(ctx);
|
||||
expect(result).toContain("## Previous Steps");
|
||||
expect(result).toContain("Step 1: planner");
|
||||
expect(result).toContain("do X");
|
||||
});
|
||||
|
||||
test("omits history section when steps array is empty", () => {
|
||||
const result = buildClaudeCodePrompt(makeCtx({ steps: [] }));
|
||||
expect(result).not.toContain("## Previous Steps");
|
||||
});
|
||||
|
||||
test("works without outputFormatInstruction", () => {
|
||||
const result = buildClaudeCodePrompt(makeCtx({ outputFormatInstruction: "" }));
|
||||
expect(result).not.toMatch(/^\s*\n/);
|
||||
expect(result).toContain("Write code");
|
||||
expect(result).toContain("## Task");
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,115 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { createMemoryStore, walk } from "@uncaged/json-cas";
|
||||
import {
|
||||
parseClaudeCodeJsonOutput,
|
||||
storeClaudeCodeDetail,
|
||||
storeClaudeCodeRawOutput,
|
||||
} from "../src/session-detail.js";
|
||||
import type { ClaudeCodeParsedResult } from "../src/types.js";
|
||||
|
||||
describe("parseClaudeCodeJsonOutput", () => {
|
||||
test("parses valid claude -p --output-format json output", () => {
|
||||
const stdout = JSON.stringify({
|
||||
type: "result",
|
||||
subtype: "success",
|
||||
result: "Done fixing bug",
|
||||
session_id: "75e2167f-abc",
|
||||
num_turns: 3,
|
||||
total_cost_usd: 0.08,
|
||||
duration_ms: 10276,
|
||||
});
|
||||
const parsed = parseClaudeCodeJsonOutput(stdout);
|
||||
expect(parsed).not.toBeNull();
|
||||
expect(parsed!.type).toBe("result");
|
||||
expect(parsed!.subtype).toBe("success");
|
||||
expect(parsed!.result).toBe("Done fixing bug");
|
||||
expect(parsed!.sessionId).toBe("75e2167f-abc");
|
||||
expect(parsed!.numTurns).toBe(3);
|
||||
expect(parsed!.totalCostUsd).toBe(0.08);
|
||||
expect(parsed!.durationMs).toBe(10276);
|
||||
});
|
||||
|
||||
test("parses error_max_turns result", () => {
|
||||
const stdout = JSON.stringify({
|
||||
type: "result",
|
||||
subtype: "error_max_turns",
|
||||
result: "Ran out of turns",
|
||||
session_id: "abc-def",
|
||||
num_turns: 90,
|
||||
total_cost_usd: 1.5,
|
||||
duration_ms: 50000,
|
||||
});
|
||||
const parsed = parseClaudeCodeJsonOutput(stdout);
|
||||
expect(parsed).not.toBeNull();
|
||||
expect(parsed!.subtype).toBe("error_max_turns");
|
||||
expect(parsed!.result).toBe("Ran out of turns");
|
||||
});
|
||||
|
||||
test("returns null for non-JSON output", () => {
|
||||
const parsed = parseClaudeCodeJsonOutput("Some random text\nwithout JSON");
|
||||
expect(parsed).toBeNull();
|
||||
});
|
||||
|
||||
test("returns null when session_id is missing", () => {
|
||||
const stdout = JSON.stringify({ type: "result", result: "hi", subtype: "success" });
|
||||
const parsed = parseClaudeCodeJsonOutput(stdout);
|
||||
expect(parsed).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("storeClaudeCodeDetail", () => {
|
||||
test("stores claude-code-detail CAS node and returns output + detailHash", async () => {
|
||||
const store = createMemoryStore();
|
||||
const parsed: ClaudeCodeParsedResult = {
|
||||
type: "result",
|
||||
subtype: "success",
|
||||
result: "The answer",
|
||||
sessionId: "abc-123",
|
||||
numTurns: 5,
|
||||
totalCostUsd: 0.12,
|
||||
durationMs: 15000,
|
||||
};
|
||||
|
||||
const { detailHash, output, sessionId } = await storeClaudeCodeDetail(store, parsed);
|
||||
expect(detailHash).toHaveLength(13);
|
||||
expect(output).toBe("The answer");
|
||||
expect(sessionId).toBe("abc-123");
|
||||
|
||||
const node = await store.get(detailHash);
|
||||
expect(node).not.toBeNull();
|
||||
expect(node!.payload.sessionId).toBe("abc-123");
|
||||
expect(node!.payload.numTurns).toBe(5);
|
||||
expect(node!.payload.totalCostUsd).toBe(0.12);
|
||||
expect(node!.payload.durationMs).toBe(15000);
|
||||
});
|
||||
|
||||
test("detail node is walkable from root", async () => {
|
||||
const store = createMemoryStore();
|
||||
const parsed: ClaudeCodeParsedResult = {
|
||||
type: "result",
|
||||
subtype: "success",
|
||||
result: "walkable test",
|
||||
sessionId: "walk-123",
|
||||
numTurns: 1,
|
||||
totalCostUsd: 0.01,
|
||||
durationMs: 1000,
|
||||
};
|
||||
|
||||
const { detailHash } = await storeClaudeCodeDetail(store, parsed);
|
||||
const visited: string[] = [];
|
||||
walk(store, detailHash, (hash) => visited.push(hash));
|
||||
expect(visited.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("storeClaudeCodeRawOutput", () => {
|
||||
test("stores raw text when JSON parsing fails", async () => {
|
||||
const store = createMemoryStore();
|
||||
const rawText = "Claude produced plain text without JSON";
|
||||
const hash = await storeClaudeCodeRawOutput(store, rawText);
|
||||
expect(hash).toHaveLength(13);
|
||||
const node = await store.get(hash);
|
||||
expect(node).not.toBeNull();
|
||||
expect(node!.payload.text).toBe(rawText);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"name": "@uncaged/workflow-agent-claude-code",
|
||||
"version": "0.1.0",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
"package.json"
|
||||
],
|
||||
"type": "module",
|
||||
"bin": {
|
||||
"uwf-claude-code": "./src/cli.ts"
|
||||
},
|
||||
"exports": {
|
||||
".": {
|
||||
"bun": "./src/index.ts",
|
||||
"types": "./dist/index.d.ts",
|
||||
"import": "./dist/index.js"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"test": "bun test"
|
||||
},
|
||||
"dependencies": {
|
||||
"@uncaged/json-cas": "^0.4.0",
|
||||
"@uncaged/workflow-agent-kit": "workspace:^"
|
||||
},
|
||||
"devDependencies": {
|
||||
"typescript": "^5.8.3"
|
||||
},
|
||||
"publishConfig": {
|
||||
"access": "public"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,148 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import type { Store } from "@uncaged/json-cas";
|
||||
|
||||
import {
|
||||
type AgentContext,
|
||||
type AgentRunResult,
|
||||
buildRolePrompt,
|
||||
createAgent,
|
||||
} from "@uncaged/workflow-agent-kit";
|
||||
|
||||
import { parseClaudeCodeJsonOutput, storeClaudeCodeDetail } from "./session-detail.js";
|
||||
|
||||
const CLAUDE_COMMAND = "claude";
|
||||
const CLAUDE_MAX_TURNS = 90;
|
||||
|
||||
function buildHistorySummary(steps: AgentContext["steps"]): string {
|
||||
if (steps.length === 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const lines: string[] = ["## Previous Steps"];
|
||||
for (let i = 0; i < steps.length; i++) {
|
||||
const step = steps[i];
|
||||
if (step === undefined) {
|
||||
continue;
|
||||
}
|
||||
lines.push("");
|
||||
lines.push(`### Step ${i + 1}: ${step.role}`);
|
||||
lines.push(`Output: ${JSON.stringify(step.output)}`);
|
||||
lines.push(`Agent: ${step.agent}`);
|
||||
}
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
/** Assemble system prompt, task, and prior step outputs for Claude Code. */
|
||||
export function buildClaudeCodePrompt(ctx: AgentContext): string {
|
||||
const roleDef = ctx.workflow.roles[ctx.role];
|
||||
const rolePrompt = roleDef !== undefined ? buildRolePrompt(roleDef) : "";
|
||||
const parts: string[] = [];
|
||||
if (ctx.outputFormatInstruction !== undefined && ctx.outputFormatInstruction !== "") {
|
||||
parts.push(ctx.outputFormatInstruction, "");
|
||||
}
|
||||
parts.push(rolePrompt, "", "## Task", ctx.start.prompt);
|
||||
const historyBlock = buildHistorySummary(ctx.steps);
|
||||
if (historyBlock !== "") {
|
||||
parts.push("", historyBlock);
|
||||
}
|
||||
return parts.join("\n");
|
||||
}
|
||||
|
||||
function spawnClaude(args: string[]): Promise<{ stdout: string; stderr: string }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const child = spawn(CLAUDE_COMMAND, args, {
|
||||
env: process.env,
|
||||
shell: false,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
child.stdout?.on("data", (chunk: Buffer) => {
|
||||
stdout += chunk.toString();
|
||||
});
|
||||
child.stderr?.on("data", (chunk: Buffer) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
child.on("error", (cause) => {
|
||||
const message = cause instanceof Error ? cause.message : String(cause);
|
||||
reject(new Error(`claude spawn failed: ${message}`));
|
||||
});
|
||||
|
||||
child.on("close", (code) => {
|
||||
if (code === 0) {
|
||||
resolve({ stdout, stderr });
|
||||
return;
|
||||
}
|
||||
const detail = stderr.trim() !== "" ? ` stderr=${stderr.trim()}` : "";
|
||||
reject(new Error(`claude exited with code ${code ?? "null"}${detail}`));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function spawnClaudeRun(prompt: string): Promise<{ stdout: string; stderr: string }> {
|
||||
return spawnClaude([
|
||||
"-p",
|
||||
prompt,
|
||||
"--output-format",
|
||||
"json",
|
||||
"--dangerously-skip-permissions",
|
||||
"--max-turns",
|
||||
String(CLAUDE_MAX_TURNS),
|
||||
]);
|
||||
}
|
||||
|
||||
function spawnClaudeResume(
|
||||
sessionId: string,
|
||||
message: string,
|
||||
): Promise<{ stdout: string; stderr: string }> {
|
||||
return spawnClaude([
|
||||
"-p",
|
||||
message,
|
||||
"--resume",
|
||||
sessionId,
|
||||
"--output-format",
|
||||
"json",
|
||||
"--dangerously-skip-permissions",
|
||||
"--max-turns",
|
||||
String(CLAUDE_MAX_TURNS),
|
||||
]);
|
||||
}
|
||||
|
||||
async function processClaudeOutput(stdout: string, store: Store): Promise<AgentRunResult> {
|
||||
const parsed = parseClaudeCodeJsonOutput(stdout);
|
||||
|
||||
if (parsed !== null) {
|
||||
const { detailHash, output, sessionId } = await storeClaudeCodeDetail(store, parsed);
|
||||
return { output, detailHash, sessionId };
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`Claude Code returned non-JSON output (first 200 chars): ${stdout.slice(0, 200)}`,
|
||||
);
|
||||
}
|
||||
|
||||
async function runClaudeCode(ctx: AgentContext): Promise<AgentRunResult> {
|
||||
const fullPrompt = buildClaudeCodePrompt(ctx);
|
||||
const { stdout } = await spawnClaudeRun(fullPrompt);
|
||||
return processClaudeOutput(stdout, ctx.store);
|
||||
}
|
||||
|
||||
async function continueClaudeCode(
|
||||
sessionId: string,
|
||||
message: string,
|
||||
store: Store,
|
||||
): Promise<AgentRunResult> {
|
||||
const { stdout } = await spawnClaudeResume(sessionId, message);
|
||||
return processClaudeOutput(stdout, store);
|
||||
}
|
||||
|
||||
/** Agent CLI factory: parses argv, runs Claude Code, extracts output, writes StepNode. */
|
||||
export function createClaudeCodeAgent(): () => Promise<void> {
|
||||
return createAgent({
|
||||
name: "claude-code",
|
||||
run: runClaudeCode,
|
||||
continue: continueClaudeCode,
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
import { createClaudeCodeAgent } from "./claude-code.js";
|
||||
|
||||
const main = createClaudeCodeAgent();
|
||||
void main();
|
||||
@@ -0,0 +1,6 @@
|
||||
export { buildClaudeCodePrompt, createClaudeCodeAgent } from "./claude-code.js";
|
||||
export {
|
||||
parseClaudeCodeJsonOutput,
|
||||
storeClaudeCodeDetail,
|
||||
storeClaudeCodeRawOutput,
|
||||
} from "./session-detail.js";
|
||||
@@ -0,0 +1,25 @@
|
||||
import type { JSONSchema } from "@uncaged/json-cas";
|
||||
|
||||
export const CLAUDE_CODE_DETAIL_SCHEMA: JSONSchema = {
|
||||
title: "claude-code-detail",
|
||||
type: "object",
|
||||
required: ["sessionId", "numTurns", "totalCostUsd", "durationMs", "subtype"],
|
||||
properties: {
|
||||
sessionId: { type: "string" },
|
||||
numTurns: { type: "integer" },
|
||||
totalCostUsd: { type: "number" },
|
||||
durationMs: { type: "integer" },
|
||||
subtype: { type: "string" },
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
export const CLAUDE_CODE_RAW_OUTPUT_SCHEMA: JSONSchema = {
|
||||
title: "claude-code-raw-output",
|
||||
type: "object",
|
||||
required: ["text"],
|
||||
properties: {
|
||||
text: { type: "string" },
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
@@ -0,0 +1,79 @@
|
||||
import { bootstrap, putSchema, type Store } from "@uncaged/json-cas";
|
||||
|
||||
import { CLAUDE_CODE_DETAIL_SCHEMA, CLAUDE_CODE_RAW_OUTPUT_SCHEMA } from "./schemas.js";
|
||||
import type { ClaudeCodeDetailPayload, ClaudeCodeParsedResult } from "./types.js";
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
/** Parse Claude Code JSON stdout (`claude -p --output-format json`). */
|
||||
export function parseClaudeCodeJsonOutput(stdout: string): ClaudeCodeParsedResult | null {
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(stdout.trim());
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!isRecord(parsed)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const sessionId = parsed.session_id;
|
||||
const result = parsed.result;
|
||||
const subtype = parsed.subtype;
|
||||
|
||||
if (typeof sessionId !== "string" || typeof result !== "string" || typeof subtype !== "string") {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
type: typeof parsed.type === "string" ? parsed.type : "result",
|
||||
subtype: subtype as ClaudeCodeParsedResult["subtype"],
|
||||
result,
|
||||
sessionId,
|
||||
numTurns: typeof parsed.num_turns === "number" ? parsed.num_turns : 0,
|
||||
totalCostUsd: typeof parsed.total_cost_usd === "number" ? parsed.total_cost_usd : 0,
|
||||
durationMs: typeof parsed.duration_ms === "number" ? parsed.duration_ms : 0,
|
||||
};
|
||||
}
|
||||
|
||||
type ClaudeCodeSchemaHashes = {
|
||||
detail: string;
|
||||
rawOutput: string;
|
||||
};
|
||||
|
||||
async function registerSchemas(store: Store): Promise<ClaudeCodeSchemaHashes> {
|
||||
await bootstrap(store);
|
||||
const [detail, rawOutput] = await Promise.all([
|
||||
putSchema(store, CLAUDE_CODE_DETAIL_SCHEMA),
|
||||
putSchema(store, CLAUDE_CODE_RAW_OUTPUT_SCHEMA),
|
||||
]);
|
||||
return { detail, rawOutput };
|
||||
}
|
||||
|
||||
/** Store parsed Claude Code result as a CAS detail node. */
|
||||
export async function storeClaudeCodeDetail(
|
||||
store: Store,
|
||||
parsed: ClaudeCodeParsedResult,
|
||||
): Promise<{ detailHash: string; output: string; sessionId: string }> {
|
||||
const schemas = await registerSchemas(store);
|
||||
|
||||
const detail: ClaudeCodeDetailPayload = {
|
||||
sessionId: parsed.sessionId,
|
||||
numTurns: parsed.numTurns,
|
||||
totalCostUsd: parsed.totalCostUsd,
|
||||
durationMs: parsed.durationMs,
|
||||
subtype: parsed.subtype,
|
||||
};
|
||||
|
||||
const detailHash = await store.put(schemas.detail, detail);
|
||||
return { detailHash, output: parsed.result, sessionId: parsed.sessionId };
|
||||
}
|
||||
|
||||
/** Fallback: store raw text output when JSON parsing fails. */
|
||||
export async function storeClaudeCodeRawOutput(store: Store, rawOutput: string): Promise<string> {
|
||||
const schemas = await registerSchemas(store);
|
||||
return store.put(schemas.rawOutput, { text: rawOutput });
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
export type ClaudeCodeResultSubtype = "success" | "error_max_turns" | "error_budget";
|
||||
|
||||
export type ClaudeCodeParsedResult = {
|
||||
type: string;
|
||||
subtype: ClaudeCodeResultSubtype;
|
||||
result: string;
|
||||
sessionId: string;
|
||||
numTurns: number;
|
||||
totalCostUsd: number;
|
||||
durationMs: number;
|
||||
};
|
||||
|
||||
export type ClaudeCodeDetailPayload = {
|
||||
sessionId: string;
|
||||
numTurns: number;
|
||||
totalCostUsd: number;
|
||||
durationMs: number;
|
||||
subtype: string;
|
||||
};
|
||||
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"extends": "../../tsconfig.json",
|
||||
"compilerOptions": { "rootDir": "src", "outDir": "dist" },
|
||||
"include": ["src"],
|
||||
"references": [{ "path": "../workflow-agent-kit" }]
|
||||
}
|
||||
@@ -141,28 +141,6 @@ describe("buildOutputFormatInstruction", () => {
|
||||
expect(result).toContain("shared: <string> # required");
|
||||
});
|
||||
|
||||
test("explicitly forbids extra frontmatter fields", () => {
|
||||
const result = buildOutputFormatInstruction(PLANNER_SCHEMA);
|
||||
expect(result).toMatch(/\b(only|exclusively)\b.*fields/i);
|
||||
expect(result).toMatch(/do not add (extra|additional|other) fields/i);
|
||||
});
|
||||
|
||||
test("forbids extra fields even for empty schema", () => {
|
||||
const result = buildOutputFormatInstruction({});
|
||||
expect(result).toMatch(/do not add (extra|additional|other) fields/i);
|
||||
});
|
||||
|
||||
test("forbids extra fields for anyOf/oneOf schemas", () => {
|
||||
const schema = {
|
||||
anyOf: [
|
||||
{ type: "object", properties: { alpha: { type: "string" } } },
|
||||
{ type: "object", properties: { beta: { type: "number" } } },
|
||||
],
|
||||
};
|
||||
const result = buildOutputFormatInstruction(schema);
|
||||
expect(result).toMatch(/do not add (extra|additional|other) fields/i);
|
||||
});
|
||||
|
||||
test("includes focus reminder about role scope", () => {
|
||||
const result = buildOutputFormatInstruction({});
|
||||
expect(result).toContain("Focus exclusively on YOUR role");
|
||||
|
||||
@@ -191,7 +191,5 @@ Your meta output must satisfy these fields:
|
||||
|
||||
${fieldList}
|
||||
|
||||
Output ONLY the fields listed above. Do not add extra fields that are not specified in the schema.
|
||||
|
||||
Focus exclusively on YOUR role's deliverable. Do not perform actions outside your role's scope.`;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user