Compare commits
30 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 7b6f113113 | |||
| cfa890f83c | |||
| 81c08ac7e2 | |||
| fdcfcc7eba | |||
| 4972f99ca0 | |||
| 48bf701281 | |||
| d8cba5eea0 | |||
| d9f7648fdd | |||
| a2e9dd9785 | |||
| 3b498069b6 | |||
| 984d93a6f5 | |||
| 2274de29c3 | |||
| 911cbf2a8a | |||
| 09a5da2df2 | |||
| e4c228d36e | |||
| f8de0e913b | |||
| cb97507e9a | |||
| 4b442bb251 | |||
| ac53128ff7 | |||
| 607366c469 | |||
| 577fb27470 | |||
| 5475dd3f5c | |||
| 09b7ddf6d0 | |||
| c4e94bbe56 | |||
| dbefe793f2 | |||
| 6483bc4861 | |||
| fecb02b115 | |||
| 87938c1886 | |||
| aba5642908 | |||
| 168e604602 |
@@ -22,4 +22,4 @@ jobs:
|
||||
run: bun run check
|
||||
|
||||
- name: Test
|
||||
run: bun run test
|
||||
run: bun run test:ci
|
||||
|
||||
@@ -143,6 +143,44 @@ defaultModel: default
|
||||
const masked = maskApiKeys(config);
|
||||
expect(masked).toEqual(config);
|
||||
});
|
||||
|
||||
test("does not mask non-provider apiKey fields", () => {
|
||||
const config = {
|
||||
apiKey: "root-level-key",
|
||||
providers: {
|
||||
dashscope: { apiKey: "sk-secret" },
|
||||
},
|
||||
models: {
|
||||
default: { provider: "dashscope" },
|
||||
},
|
||||
};
|
||||
const masked = maskApiKeys(config);
|
||||
// Root-level apiKey should NOT be masked
|
||||
expect(masked.apiKey).toBe("root-level-key");
|
||||
// Provider apiKey SHOULD be masked
|
||||
const providers = masked.providers as Record<string, Record<string, unknown>>;
|
||||
expect(providers.dashscope.apiKey).toBe("***MASKED***");
|
||||
});
|
||||
|
||||
test("handles empty provider object", () => {
|
||||
const config = {
|
||||
providers: { dashscope: {} },
|
||||
};
|
||||
const masked = maskApiKeys(config);
|
||||
expect(masked).toEqual({ providers: { dashscope: {} } });
|
||||
});
|
||||
|
||||
test("handles provider with null apiKey", () => {
|
||||
const config = {
|
||||
providers: {
|
||||
dashscope: { apiKey: null, baseUrl: "https://example.com" },
|
||||
},
|
||||
};
|
||||
const masked = maskApiKeys(config);
|
||||
const providers = masked.providers as Record<string, Record<string, unknown>>;
|
||||
expect(providers.dashscope.apiKey).toBe("***MASKED***");
|
||||
expect(providers.dashscope.baseUrl).toBe("https://example.com");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -618,5 +656,82 @@ defaultModel: default
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("agentOverrides — accepts valid 3-segment path", async () => {
|
||||
const tempDir = mkdtempSync(join(tmpdir(), "test-config-"));
|
||||
try {
|
||||
createTestConfig(tempDir, sampleConfig);
|
||||
await cmdConfigSet(tempDir, "agentOverrides.solve-issue.planner", "claude-code");
|
||||
const value = await cmdConfigGet(tempDir, "agentOverrides.solve-issue.planner");
|
||||
expect(value).toBe("claude-code");
|
||||
} finally {
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("agentOverrides — rejects incomplete path (2 segments)", async () => {
|
||||
const tempDir = mkdtempSync(join(tmpdir(), "test-config-"));
|
||||
try {
|
||||
createTestConfig(tempDir, sampleConfig);
|
||||
await expect(cmdConfigSet(tempDir, "agentOverrides.solve-issue", "hermes")).rejects.toThrow(
|
||||
/incomplete path|must specify a field/i,
|
||||
);
|
||||
} finally {
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("modelOverrides — accepts valid 2-segment path", async () => {
|
||||
const tempDir = mkdtempSync(join(tmpdir(), "test-config-"));
|
||||
try {
|
||||
createTestConfig(tempDir, sampleConfig);
|
||||
await cmdConfigSet(tempDir, "modelOverrides.extract", "gpt4");
|
||||
const value = await cmdConfigGet(tempDir, "modelOverrides.extract");
|
||||
expect(value).toBe("gpt4");
|
||||
} finally {
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("modelOverrides — rejects incomplete path (1 segment only)", async () => {
|
||||
const tempDir = mkdtempSync(join(tmpdir(), "test-config-"));
|
||||
try {
|
||||
createTestConfig(tempDir, sampleConfig);
|
||||
await expect(cmdConfigSet(tempDir, "modelOverrides", "gpt4")).rejects.toThrow(
|
||||
/incomplete path|must specify a field/i,
|
||||
);
|
||||
} finally {
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("rejects unknown top-level key (regression)", async () => {
|
||||
const tempDir = mkdtempSync(join(tmpdir(), "test-config-"));
|
||||
try {
|
||||
createTestConfig(tempDir, sampleConfig);
|
||||
await expect(cmdConfigSet(tempDir, "randomKey", "value")).rejects.toThrow(
|
||||
/Unknown config key/,
|
||||
);
|
||||
} finally {
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("no legacy apiKeyEnv references", () => {
|
||||
test("config.ts has no references to apiKeyEnv", () => {
|
||||
const configSource = readFileSync(join(__dirname, "..", "commands", "config.ts"), "utf8");
|
||||
expect(configSource).not.toContain("apiKeyEnv");
|
||||
});
|
||||
|
||||
test("config.test.ts has no references to apiKeyEnv (except this test)", () => {
|
||||
const testSource = readFileSync(__filename, "utf8");
|
||||
// Remove this test block's own mentions before checking
|
||||
const withoutThisTest = testSource.replace(
|
||||
/describe\("no legacy apiKeyEnv references"[\s\S]*$/,
|
||||
"",
|
||||
);
|
||||
expect(withoutThisTest).not.toContain("apiKeyEnv");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -5,17 +5,17 @@ import { evaluate } from "../moderator/evaluate.js";
|
||||
|
||||
const solveIssueGraph: WorkflowPayload["graph"] = {
|
||||
$START: {
|
||||
_: { role: "planner", prompt: "Start planning from the issue in the task." },
|
||||
_: { role: "planner", prompt: "Start planning from the issue in the task.", location: null },
|
||||
},
|
||||
planner: {
|
||||
_: { role: "developer", prompt: "Implement the plan: {{plan}}" },
|
||||
_: { role: "developer", prompt: "Implement the plan: {{plan}}", location: null },
|
||||
},
|
||||
developer: {
|
||||
_: { role: "reviewer", prompt: "Review the changes: {{summary}}" },
|
||||
_: { role: "reviewer", prompt: "Review the changes: {{summary}}", location: null },
|
||||
},
|
||||
reviewer: {
|
||||
approved: { role: "$END", prompt: "Done." },
|
||||
rejected: { role: "developer", prompt: "Fix: {{comments}}" },
|
||||
approved: { role: "$END", prompt: "Done.", location: null },
|
||||
rejected: { role: "developer", prompt: "Fix: {{comments}}", location: null },
|
||||
},
|
||||
};
|
||||
|
||||
@@ -24,7 +24,11 @@ describe("evaluate", () => {
|
||||
const result = evaluate(solveIssueGraph, "$START", { $status: "_" });
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "planner", prompt: "Start planning from the issue in the task." },
|
||||
value: {
|
||||
role: "planner",
|
||||
prompt: "Start planning from the issue in the task.",
|
||||
location: null,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
@@ -35,7 +39,7 @@ describe("evaluate", () => {
|
||||
});
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "developer", prompt: "Fix: missing tests" },
|
||||
value: { role: "developer", prompt: "Fix: missing tests", location: null },
|
||||
});
|
||||
});
|
||||
|
||||
@@ -43,7 +47,7 @@ describe("evaluate", () => {
|
||||
const result = evaluate(solveIssueGraph, "reviewer", { $status: "approved" });
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "$END", prompt: "Done." },
|
||||
value: { role: "$END", prompt: "Done.", location: null },
|
||||
});
|
||||
});
|
||||
|
||||
@@ -70,7 +74,11 @@ describe("evaluate", () => {
|
||||
});
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "developer", prompt: "Implement the plan: Add auth middleware" },
|
||||
value: {
|
||||
role: "developer",
|
||||
prompt: "Implement the plan: Add auth middleware",
|
||||
location: null,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
@@ -81,14 +89,14 @@ describe("evaluate", () => {
|
||||
});
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "developer", prompt: 'Fix: use <T> & "Result<T, E>" types' },
|
||||
value: { role: "developer", prompt: 'Fix: use <T> & "Result<T, E>" types', location: null },
|
||||
});
|
||||
});
|
||||
|
||||
test("triple mustache also works for unescaped output", () => {
|
||||
const graph: Record<string, Record<string, Target>> = {
|
||||
reviewer: {
|
||||
_: { role: "developer", prompt: "Fix: {{{comments}}}" },
|
||||
_: { role: "developer", prompt: "Fix: {{{comments}}}", location: null },
|
||||
},
|
||||
};
|
||||
const result = evaluate(graph, "reviewer", {
|
||||
@@ -97,7 +105,7 @@ describe("evaluate", () => {
|
||||
});
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "developer", prompt: "Fix: <script>alert(1)</script>" },
|
||||
value: { role: "developer", prompt: "Fix: <script>alert(1)</script>", location: null },
|
||||
});
|
||||
});
|
||||
|
||||
@@ -107,7 +115,11 @@ describe("evaluate", () => {
|
||||
});
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "developer", prompt: "Implement the plan: Add auth middleware" },
|
||||
value: {
|
||||
role: "developer",
|
||||
prompt: "Implement the plan: Add auth middleware",
|
||||
location: null,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
@@ -117,6 +129,7 @@ describe("evaluate", () => {
|
||||
_: {
|
||||
role: "developer",
|
||||
prompt: "Address: {{review.comments}}",
|
||||
location: null,
|
||||
},
|
||||
},
|
||||
};
|
||||
@@ -126,7 +139,7 @@ describe("evaluate", () => {
|
||||
});
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "developer", prompt: "Address: refactor the handler" },
|
||||
value: { role: "developer", prompt: "Address: refactor the handler", location: null },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -7,10 +7,14 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
import {
|
||||
cmdSkillActor,
|
||||
cmdSkillAdapter,
|
||||
cmdSkillArchitecture,
|
||||
cmdSkillAuthor,
|
||||
cmdSkillCli,
|
||||
cmdSkillDeveloper,
|
||||
cmdSkillList,
|
||||
cmdSkillModerator,
|
||||
cmdSkillUser,
|
||||
cmdSkillYaml,
|
||||
} from "../commands/skill.js";
|
||||
|
||||
@@ -23,6 +27,10 @@ describe("skill commands", () => {
|
||||
expect(result).toContain("yaml");
|
||||
expect(result).toContain("moderator");
|
||||
expect(result).toContain("actor");
|
||||
expect(result).toContain("user");
|
||||
expect(result).toContain("author");
|
||||
expect(result).toContain("developer");
|
||||
expect(result).toContain("adapter");
|
||||
for (const name of result) {
|
||||
expect(name).toMatch(/^\S+$/);
|
||||
}
|
||||
@@ -72,6 +80,45 @@ describe("skill commands", () => {
|
||||
expect(result.length).toBeGreaterThan(200);
|
||||
});
|
||||
|
||||
test("skill user returns non-empty markdown string", () => {
|
||||
const result = cmdSkillUser();
|
||||
expect(typeof result).toBe("string");
|
||||
expect(result).toContain("uwf");
|
||||
expect(result).toContain("thread");
|
||||
expect(result).toContain("workflow");
|
||||
expect(result).toContain("Quick Start");
|
||||
expect(result.length).toBeGreaterThan(500);
|
||||
});
|
||||
|
||||
test("skill author returns non-empty markdown string", () => {
|
||||
const result = cmdSkillAuthor();
|
||||
expect(typeof result).toBe("string");
|
||||
expect(result).toContain("frontmatter");
|
||||
expect(result).toContain("graph");
|
||||
expect(result).toContain("$START");
|
||||
expect(result).toContain("$END");
|
||||
expect(result).toContain("$status");
|
||||
expect(result.length).toBeGreaterThan(500);
|
||||
});
|
||||
|
||||
test("skill developer returns non-empty markdown string", () => {
|
||||
const result = cmdSkillDeveloper();
|
||||
expect(typeof result).toBe("string");
|
||||
expect(result).toContain("Monorepo");
|
||||
expect(result).toContain("CAS");
|
||||
expect(result).toContain("Biome");
|
||||
expect(result.length).toBeGreaterThan(500);
|
||||
});
|
||||
|
||||
test("skill adapter returns non-empty markdown string", () => {
|
||||
const result = cmdSkillAdapter();
|
||||
expect(typeof result).toBe("string");
|
||||
expect(result).toContain("createAgent");
|
||||
expect(result).toContain("AgentContext");
|
||||
expect(result).toContain("frontmatter");
|
||||
expect(result.length).toBeGreaterThan(500);
|
||||
});
|
||||
|
||||
test("skill help subcommand is suppressed", () => {
|
||||
const output = execFileSync("bun", ["src/cli.ts", "skill", "--help"], {
|
||||
cwd: join(__dirname, "..", ".."),
|
||||
@@ -84,6 +131,10 @@ describe("skill commands", () => {
|
||||
expect(output).toContain("yaml");
|
||||
expect(output).toContain("moderator");
|
||||
expect(output).toContain("actor");
|
||||
expect(output).toContain("user");
|
||||
expect(output).toContain("author");
|
||||
expect(output).toContain("developer");
|
||||
expect(output).toContain("adapter");
|
||||
expect(output).toContain("list");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,363 @@
|
||||
import { mkdir, mkdtemp, rm } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { bootstrap, type Hash, type JSONSchema, putSchema } from "@uncaged/json-cas";
|
||||
import { createFsStore } from "@uncaged/json-cas-fs";
|
||||
import type { CasRef, StepNodePayload } from "@uncaged/workflow-protocol";
|
||||
import { afterEach, beforeEach, describe, expect, test } from "vitest";
|
||||
import { cmdStepShow } from "../commands/step.js";
|
||||
import { formatOutput } from "../format.js";
|
||||
import { registerUwfSchemas } from "../schemas.js";
|
||||
|
||||
const TURN_SCHEMA: JSONSchema = {
|
||||
title: "test-turn",
|
||||
type: "object",
|
||||
required: ["index", "role", "content"],
|
||||
properties: {
|
||||
index: { type: "integer" },
|
||||
role: { type: "string", enum: ["assistant", "tool"] },
|
||||
content: { type: "string" },
|
||||
toolCalls: {
|
||||
anyOf: [
|
||||
{
|
||||
type: "array",
|
||||
items: {
|
||||
type: "object",
|
||||
required: ["name", "args"],
|
||||
properties: {
|
||||
name: { type: "string" },
|
||||
args: { type: "string" },
|
||||
},
|
||||
additionalProperties: false,
|
||||
},
|
||||
},
|
||||
{ type: "null" },
|
||||
],
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
const DETAIL_SCHEMA: JSONSchema = {
|
||||
title: "test-detail",
|
||||
type: "object",
|
||||
required: ["turns"],
|
||||
properties: {
|
||||
turns: {
|
||||
type: "array",
|
||||
items: { type: "string", format: "cas_ref" },
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
type TestSetup = {
|
||||
store: ReturnType<typeof createFsStore>;
|
||||
schemas: {
|
||||
workflow: Hash;
|
||||
startNode: Hash;
|
||||
stepNode: Hash;
|
||||
text: Hash;
|
||||
};
|
||||
turnType: Hash;
|
||||
detailType: Hash;
|
||||
};
|
||||
|
||||
async function setupTest(casDir: string): Promise<TestSetup> {
|
||||
const store = createFsStore(casDir);
|
||||
await bootstrap(store);
|
||||
const schemas = await registerUwfSchemas(store);
|
||||
const [turnType, detailType] = await Promise.all([
|
||||
putSchema(store, TURN_SCHEMA),
|
||||
putSchema(store, DETAIL_SCHEMA),
|
||||
]);
|
||||
return { store, schemas, turnType, detailType };
|
||||
}
|
||||
|
||||
async function createTestStep(
|
||||
setup: TestSetup,
|
||||
turnPayloads: Array<{
|
||||
index: number;
|
||||
role: string;
|
||||
content: string;
|
||||
toolCalls: Array<{ name: string; args: string }> | null;
|
||||
}>,
|
||||
): Promise<CasRef> {
|
||||
const { store, schemas, turnType, detailType } = setup;
|
||||
|
||||
// Create turn nodes
|
||||
const turnHashes: CasRef[] = [];
|
||||
for (const payload of turnPayloads) {
|
||||
const turnHash = await store.put(turnType, payload);
|
||||
turnHashes.push(turnHash);
|
||||
}
|
||||
|
||||
// Create detail node
|
||||
const detailHash = await store.put(detailType, { turns: turnHashes });
|
||||
|
||||
// Create dummy start node
|
||||
const startHash = await store.put(schemas.startNode, {
|
||||
workflow: "0000000000000" as CasRef,
|
||||
prompt: "test prompt",
|
||||
cwd: "/tmp",
|
||||
});
|
||||
|
||||
// Create dummy output node
|
||||
const outputHash = await store.put(schemas.text, { $status: "done" });
|
||||
|
||||
// Create step node
|
||||
const stepPayload: StepNodePayload = {
|
||||
prev: null,
|
||||
start: startHash,
|
||||
role: "test-role",
|
||||
agent: "test-agent",
|
||||
output: outputHash,
|
||||
detail: detailHash,
|
||||
edgePrompt: "",
|
||||
startedAtMs: Date.now(),
|
||||
completedAtMs: Date.now() + 1000,
|
||||
cwd: "/tmp",
|
||||
};
|
||||
return store.put(schemas.stepNode, stepPayload);
|
||||
}
|
||||
|
||||
describe("cmdStepShow JSON serialization", () => {
|
||||
let testDir: string;
|
||||
let casDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
testDir = await mkdtemp(join(tmpdir(), "uwf-test-"));
|
||||
casDir = join(testDir, "cas");
|
||||
await mkdir(casDir, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(testDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test("escapes newlines in tool call args", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "Running command",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: "echo 'line1'\necho 'line2'",
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
expect(jsonOutput).toContain("\\n");
|
||||
|
||||
const parsed = JSON.parse(jsonOutput);
|
||||
expect(parsed.turns[0].toolCalls[0].args).toContain("\n");
|
||||
});
|
||||
|
||||
test("escapes tabs in tool call args", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: "cat <<EOF\nfield1\tfield2\tfield3\nEOF",
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
expect(jsonOutput).toContain("\\t");
|
||||
});
|
||||
|
||||
test("escapes carriage returns", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "Committing changes",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: 'git commit -m "First line\r\nSecond line"',
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
expect(jsonOutput).toContain("\\r\\n");
|
||||
});
|
||||
|
||||
test("escapes backslashes and quotes", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: 'echo "He said \\"hello\\""',
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
const parsed = JSON.parse(jsonOutput);
|
||||
expect(parsed.turns).toBeDefined();
|
||||
});
|
||||
|
||||
test("handles Unicode control characters", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: "echo '\u0001\u001F'",
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
});
|
||||
|
||||
test("handles nested CAS refs with control characters", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "First turn\nwith newline",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: "cmd1\nline2",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
index: 1,
|
||||
role: "assistant",
|
||||
content: "Second turn\twith tab",
|
||||
toolCalls: null,
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
const parsed = JSON.parse(jsonOutput);
|
||||
expect(parsed.turns).toHaveLength(2);
|
||||
});
|
||||
|
||||
test("YAML output format is unaffected", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "Running command",
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: "echo 'line1'\necho 'line2'",
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const yamlOutput = formatOutput(result, "yaml");
|
||||
|
||||
expect(yamlOutput).toContain("turns:");
|
||||
expect(yamlOutput.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test("handles empty and null values", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
const stepHash = await createTestStep(setup, [
|
||||
{
|
||||
index: 0,
|
||||
role: "assistant",
|
||||
content: "",
|
||||
toolCalls: null,
|
||||
},
|
||||
]);
|
||||
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
const parsed = JSON.parse(jsonOutput);
|
||||
expect(parsed.turns).toBeDefined();
|
||||
});
|
||||
|
||||
test("handles large step with multiple tool calls", async () => {
|
||||
const setup = await setupTest(casDir);
|
||||
|
||||
const turns = [];
|
||||
for (let i = 0; i < 25; i++) {
|
||||
turns.push({
|
||||
index: i,
|
||||
role: "assistant" as const,
|
||||
content: `Turn ${i}\nwith newline`,
|
||||
toolCalls: [
|
||||
{
|
||||
name: "Bash",
|
||||
args: `command${i}\nline2\tfield${i}`,
|
||||
},
|
||||
{
|
||||
name: "Read",
|
||||
args: `/path/to/file${i}`,
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
|
||||
const stepHash = await createTestStep(setup, turns);
|
||||
|
||||
const startTime = Date.now();
|
||||
const result = await cmdStepShow(testDir, stepHash);
|
||||
const jsonOutput = formatOutput(result, "json");
|
||||
const duration = Date.now() - startTime;
|
||||
|
||||
expect(duration).toBeLessThan(2000);
|
||||
expect(() => JSON.parse(jsonOutput)).not.toThrow();
|
||||
|
||||
const parsed = JSON.parse(jsonOutput);
|
||||
expect(parsed.turns).toHaveLength(25);
|
||||
});
|
||||
});
|
||||
@@ -85,6 +85,7 @@ describe("protocol types", () => {
|
||||
edgePrompt: "",
|
||||
startedAtMs: 1000,
|
||||
completedAtMs: 2000,
|
||||
cwd: "/test/path",
|
||||
};
|
||||
expect(record.startedAtMs).toBe(1000);
|
||||
expect(record.completedAtMs).toBe(2000);
|
||||
@@ -239,8 +240,8 @@ describe("thread read timing", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "go" } },
|
||||
worker: { _: { role: "$END", prompt: "" } },
|
||||
$START: { _: { role: "worker", prompt: "go", location: null } },
|
||||
worker: { _: { role: "$END", prompt: "", location: null } },
|
||||
},
|
||||
});
|
||||
|
||||
@@ -305,8 +306,8 @@ describe("thread read timing", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "go" } },
|
||||
worker: { _: { role: "$END", prompt: "" } },
|
||||
$START: { _: { role: "worker", prompt: "go", location: null } },
|
||||
worker: { _: { role: "$END", prompt: "", location: null } },
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
@@ -0,0 +1,174 @@
|
||||
import { mkdir, rm, writeFile } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import type { CasRef, StartNodePayload, ThreadId } from "@uncaged/workflow-protocol";
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { cmdThreadStart } from "../commands/thread.js";
|
||||
import { createUwfStore } from "../store.js";
|
||||
|
||||
describe("Thread and edge location integration", () => {
|
||||
let tmpDir: string;
|
||||
let storageRoot: string;
|
||||
|
||||
async function setupTestEnv() {
|
||||
tmpDir = join(tmpdir(), `uwf-test-location-${Date.now()}`);
|
||||
storageRoot = join(tmpDir, "storage");
|
||||
await mkdir(storageRoot, { recursive: true });
|
||||
}
|
||||
|
||||
async function teardown() {
|
||||
if (tmpDir) {
|
||||
await rm(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
test("thread start captures cwd in StartNode", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowYaml = `
|
||||
name: test-location
|
||||
description: Test workflow for location feature
|
||||
roles:
|
||||
planner:
|
||||
description: Plans the work
|
||||
goal: Plan implementation
|
||||
capabilities: ["planning"]
|
||||
procedure: Plan
|
||||
output: |
|
||||
$status: "ready"
|
||||
frontmatter:
|
||||
type: object
|
||||
required: ["$status"]
|
||||
properties:
|
||||
$status: { type: string }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
role: planner
|
||||
prompt: "Plan the work"
|
||||
location: null
|
||||
planner:
|
||||
_:
|
||||
role: $END
|
||||
prompt: "Done"
|
||||
location: null
|
||||
`;
|
||||
|
||||
const workflowPath = join(tmpDir, "test-location.yaml");
|
||||
await writeFile(workflowPath, workflowYaml, "utf8");
|
||||
|
||||
const testCwd = "/test/project/path";
|
||||
const result = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir, testCwd);
|
||||
|
||||
expect(result.thread).toBeDefined();
|
||||
expect(result.workflow).toBeDefined();
|
||||
|
||||
// Verify StartNode has the cwd field
|
||||
const uwf = await createUwfStore(storageRoot);
|
||||
const index = await import("../store.js").then((m) => m.loadThreadsIndex(storageRoot));
|
||||
const headHash = index[result.thread as ThreadId];
|
||||
expect(headHash).toBeDefined();
|
||||
|
||||
const startNode = uwf.store.get(headHash as CasRef);
|
||||
expect(startNode).not.toBe(null);
|
||||
expect(startNode?.type).toBe(uwf.schemas.startNode);
|
||||
|
||||
const startPayload = startNode?.payload as StartNodePayload;
|
||||
expect(startPayload.cwd).toBe(testCwd);
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("thread start validates cwd is absolute path", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowYaml = `
|
||||
name: test-location
|
||||
description: Test workflow
|
||||
roles:
|
||||
planner:
|
||||
description: Plans
|
||||
goal: Plan
|
||||
capabilities: ["planning"]
|
||||
procedure: Plan
|
||||
output: |
|
||||
$status: "ready"
|
||||
frontmatter:
|
||||
type: object
|
||||
required: ["$status"]
|
||||
properties:
|
||||
$status: { type: string }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
role: planner
|
||||
prompt: "Plan"
|
||||
location: null
|
||||
planner:
|
||||
_:
|
||||
role: $END
|
||||
prompt: "Done"
|
||||
location: null
|
||||
`;
|
||||
|
||||
const workflowPath = join(tmpDir, "test-location.yaml");
|
||||
await writeFile(workflowPath, workflowYaml, "utf8");
|
||||
|
||||
// Relative path should fail (process.exit is wrapped by vitest)
|
||||
await expect(
|
||||
cmdThreadStart(storageRoot, workflowPath, "test", tmpDir, "relative/path"),
|
||||
).rejects.toThrow();
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("thread start uses process.cwd() as default", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowYaml = `
|
||||
name: test-default-cwd
|
||||
description: Test default cwd
|
||||
roles:
|
||||
planner:
|
||||
description: Plans
|
||||
goal: Plan
|
||||
capabilities: ["planning"]
|
||||
procedure: Plan
|
||||
output: |
|
||||
$status: "ready"
|
||||
frontmatter:
|
||||
type: object
|
||||
required: ["$status"]
|
||||
properties:
|
||||
$status: { type: string }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
role: planner
|
||||
prompt: "Plan"
|
||||
location: null
|
||||
planner:
|
||||
_:
|
||||
role: $END
|
||||
prompt: "Done"
|
||||
location: null
|
||||
`;
|
||||
|
||||
const workflowPath = join(tmpDir, "test-default-cwd.yaml");
|
||||
await writeFile(workflowPath, workflowYaml, "utf8");
|
||||
|
||||
const result = await cmdThreadStart(storageRoot, workflowPath, "test", tmpDir);
|
||||
|
||||
const uwf = await createUwfStore(storageRoot);
|
||||
const index = await import("../store.js").then((m) => m.loadThreadsIndex(storageRoot));
|
||||
const headHash = index[result.thread as ThreadId];
|
||||
|
||||
const startNode = uwf.store.get(headHash as CasRef);
|
||||
const startPayload = startNode?.payload as StartNodePayload;
|
||||
|
||||
// Should default to process.cwd()
|
||||
expect(startPayload.cwd).toBe(process.cwd());
|
||||
|
||||
await teardown();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,227 @@
|
||||
import { mkdir, rm, writeFile } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import type { ThreadId } from "@uncaged/workflow-protocol";
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { createMarker, deleteMarker } from "../background/index.js";
|
||||
import { cmdThreadShow, cmdThreadStart } from "../commands/thread.js";
|
||||
import { appendThreadHistory, loadThreadsIndex } from "../store.js";
|
||||
|
||||
const TEST_WORKFLOW_YAML = `
|
||||
name: test-status
|
||||
description: Test workflow for status field
|
||||
roles:
|
||||
planner:
|
||||
description: Plans the work
|
||||
goal: Plan implementation
|
||||
capabilities: ["planning"]
|
||||
procedure: Plan
|
||||
output: |
|
||||
$status: "ready"
|
||||
frontmatter:
|
||||
type: object
|
||||
required: ["$status"]
|
||||
properties:
|
||||
$status: { type: string }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
role: planner
|
||||
prompt: "Plan the work"
|
||||
location: null
|
||||
planner:
|
||||
_:
|
||||
role: $END
|
||||
prompt: "Done"
|
||||
location: null
|
||||
`;
|
||||
|
||||
describe("thread show status field", () => {
|
||||
let tmpDir: string;
|
||||
let storageRoot: string;
|
||||
|
||||
async function setupTestEnv() {
|
||||
tmpDir = join(tmpdir(), `uwf-test-status-${Date.now()}`);
|
||||
storageRoot = join(tmpDir, "storage");
|
||||
await mkdir(storageRoot, { recursive: true });
|
||||
}
|
||||
|
||||
async function teardown() {
|
||||
if (tmpDir) {
|
||||
await rm(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
test("active idle thread shows status 'idle'", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = join(tmpDir, "test-status.yaml");
|
||||
await writeFile(workflowPath, TEST_WORKFLOW_YAML, "utf8");
|
||||
|
||||
// Create a thread
|
||||
const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
|
||||
const threadId = startResult.thread as ThreadId;
|
||||
|
||||
// Show the thread (should be idle)
|
||||
const result = await cmdThreadShow(storageRoot, threadId);
|
||||
|
||||
expect(result.status).toBe("idle");
|
||||
expect(result.done).toBe(false);
|
||||
expect(result.background).toBe(null);
|
||||
expect(result.thread).toBe(threadId);
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("active running thread shows status 'running'", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = join(tmpDir, "test-status.yaml");
|
||||
await writeFile(workflowPath, TEST_WORKFLOW_YAML, "utf8");
|
||||
|
||||
// Create a thread
|
||||
const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
|
||||
const threadId = startResult.thread as ThreadId;
|
||||
const workflow = startResult.workflow;
|
||||
|
||||
// Create a running marker
|
||||
await createMarker(storageRoot, {
|
||||
thread: threadId,
|
||||
workflow,
|
||||
pid: process.pid,
|
||||
startedAt: Date.now(),
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await cmdThreadShow(storageRoot, threadId);
|
||||
|
||||
expect(result.status).toBe("running");
|
||||
expect(result.done).toBe(false);
|
||||
expect(result.background).toBe(null);
|
||||
expect(result.thread).toBe(threadId);
|
||||
} finally {
|
||||
// Cleanup: delete marker
|
||||
await deleteMarker(storageRoot, threadId);
|
||||
await teardown();
|
||||
}
|
||||
});
|
||||
|
||||
test("completed thread shows status 'completed'", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = join(tmpDir, "test-status.yaml");
|
||||
await writeFile(workflowPath, TEST_WORKFLOW_YAML, "utf8");
|
||||
|
||||
// Create a thread
|
||||
const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
|
||||
const threadId = startResult.thread as ThreadId;
|
||||
const workflow = startResult.workflow;
|
||||
|
||||
// Get the head hash before moving to history
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const head = index[threadId];
|
||||
if (!head) throw new Error("Thread not found in index");
|
||||
|
||||
// Move thread to history with reason 'completed'
|
||||
const { saveThreadsIndex } = await import("../store.js");
|
||||
const newIndex = { ...index };
|
||||
delete newIndex[threadId];
|
||||
await saveThreadsIndex(storageRoot, newIndex);
|
||||
|
||||
await appendThreadHistory(storageRoot, {
|
||||
thread: threadId,
|
||||
workflow,
|
||||
head,
|
||||
completedAt: Date.now(),
|
||||
reason: "completed",
|
||||
});
|
||||
|
||||
const result = await cmdThreadShow(storageRoot, threadId);
|
||||
|
||||
expect(result.status).toBe("completed");
|
||||
expect(result.done).toBe(true);
|
||||
expect(result.background).toBe(null);
|
||||
expect(result.thread).toBe(threadId);
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("cancelled thread shows status 'cancelled'", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = join(tmpDir, "test-status.yaml");
|
||||
await writeFile(workflowPath, TEST_WORKFLOW_YAML, "utf8");
|
||||
|
||||
// Create a thread
|
||||
const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
|
||||
const threadId = startResult.thread as ThreadId;
|
||||
const workflow = startResult.workflow;
|
||||
|
||||
// Get the head hash before moving to history
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const head = index[threadId];
|
||||
if (!head) throw new Error("Thread not found in index");
|
||||
|
||||
// Move thread to history with reason 'cancelled'
|
||||
const { saveThreadsIndex } = await import("../store.js");
|
||||
const newIndex = { ...index };
|
||||
delete newIndex[threadId];
|
||||
await saveThreadsIndex(storageRoot, newIndex);
|
||||
|
||||
await appendThreadHistory(storageRoot, {
|
||||
thread: threadId,
|
||||
workflow,
|
||||
head,
|
||||
completedAt: Date.now(),
|
||||
reason: "cancelled",
|
||||
});
|
||||
|
||||
const result = await cmdThreadShow(storageRoot, threadId);
|
||||
|
||||
expect(result.status).toBe("cancelled");
|
||||
expect(result.done).toBe(true);
|
||||
expect(result.background).toBe(null);
|
||||
expect(result.thread).toBe(threadId);
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("legacy completed thread without reason shows status 'completed'", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = join(tmpDir, "test-status.yaml");
|
||||
await writeFile(workflowPath, TEST_WORKFLOW_YAML, "utf8");
|
||||
|
||||
// Create a thread
|
||||
const startResult = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
|
||||
const threadId = startResult.thread as ThreadId;
|
||||
const workflow = startResult.workflow;
|
||||
|
||||
// Get the head hash before moving to history
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const head = index[threadId];
|
||||
if (!head) throw new Error("Thread not found in index");
|
||||
|
||||
// Move thread to history with reason null (legacy format)
|
||||
const { saveThreadsIndex } = await import("../store.js");
|
||||
const newIndex = { ...index };
|
||||
delete newIndex[threadId];
|
||||
await saveThreadsIndex(storageRoot, newIndex);
|
||||
|
||||
await appendThreadHistory(storageRoot, {
|
||||
thread: threadId,
|
||||
workflow,
|
||||
head,
|
||||
completedAt: Date.now(),
|
||||
reason: null,
|
||||
});
|
||||
|
||||
const result = await cmdThreadShow(storageRoot, threadId);
|
||||
|
||||
expect(result.status).toBe("completed");
|
||||
expect(result.done).toBe(true);
|
||||
expect(result.background).toBe(null);
|
||||
|
||||
await teardown();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,148 @@
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { mkdir, rm, writeFile } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import type { CasRef, StartNodePayload, ThreadId } from "@uncaged/workflow-protocol";
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { cmdThreadStart } from "../commands/thread.js";
|
||||
import { createUwfStore, loadThreadsIndex } from "../store.js";
|
||||
|
||||
describe("thread start --cwd CLI option", () => {
|
||||
let tmpDir: string;
|
||||
let storageRoot: string;
|
||||
|
||||
async function setupTestEnv() {
|
||||
tmpDir = join(tmpdir(), `uwf-test-cwd-cli-${Date.now()}`);
|
||||
storageRoot = join(tmpDir, "storage");
|
||||
await mkdir(storageRoot, { recursive: true });
|
||||
}
|
||||
|
||||
async function teardown() {
|
||||
if (tmpDir) {
|
||||
await rm(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
|
||||
async function createTestWorkflow(): Promise<string> {
|
||||
const workflowYaml = `
|
||||
name: test-cwd-cli
|
||||
description: Test workflow for CLI cwd option
|
||||
roles:
|
||||
planner:
|
||||
description: Plans the work
|
||||
goal: Plan implementation
|
||||
capabilities: ["planning"]
|
||||
procedure: Plan
|
||||
output: |
|
||||
$status: "ready"
|
||||
frontmatter:
|
||||
type: object
|
||||
required: ["$status"]
|
||||
properties:
|
||||
$status: { type: string }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
role: planner
|
||||
prompt: "Plan the work"
|
||||
location: null
|
||||
planner:
|
||||
_:
|
||||
role: $END
|
||||
prompt: "Done"
|
||||
location: null
|
||||
`;
|
||||
|
||||
const workflowPath = join(tmpDir, "test-cwd-cli.yaml");
|
||||
await writeFile(workflowPath, workflowYaml, "utf8");
|
||||
return workflowPath;
|
||||
}
|
||||
|
||||
async function getStartNodeCwd(threadId: string): Promise<string> {
|
||||
const uwf = await createUwfStore(storageRoot);
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const headHash = index[threadId as ThreadId];
|
||||
expect(headHash).toBeDefined();
|
||||
|
||||
const startNode = uwf.store.get(headHash as CasRef);
|
||||
expect(startNode).not.toBe(null);
|
||||
expect(startNode?.type).toBe(uwf.schemas.startNode);
|
||||
|
||||
const startPayload = startNode?.payload as StartNodePayload;
|
||||
return startPayload.cwd;
|
||||
}
|
||||
|
||||
test("thread start with custom cwd via cmdThreadStart", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = await createTestWorkflow();
|
||||
const testCwd = "/test/custom/path";
|
||||
|
||||
const result = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir, testCwd);
|
||||
|
||||
expect(result.thread).toBeDefined();
|
||||
const actualCwd = await getStartNodeCwd(result.thread);
|
||||
expect(actualCwd).toBe(testCwd);
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("thread start without cwd defaults to process.cwd()", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = await createTestWorkflow();
|
||||
|
||||
// Call without cwd parameter (it defaults to process.cwd())
|
||||
const result = await cmdThreadStart(storageRoot, workflowPath, "test prompt", tmpDir);
|
||||
|
||||
expect(result.thread).toBeDefined();
|
||||
const actualCwd = await getStartNodeCwd(result.thread);
|
||||
expect(actualCwd).toBe(process.cwd());
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("thread start with relative path fails", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = await createTestWorkflow();
|
||||
|
||||
await expect(
|
||||
cmdThreadStart(storageRoot, workflowPath, "test", tmpDir, "relative/path"),
|
||||
).rejects.toThrow();
|
||||
|
||||
await teardown();
|
||||
});
|
||||
|
||||
test("CLI accepts --cwd option without error", async () => {
|
||||
await setupTestEnv();
|
||||
|
||||
const workflowPath = await createTestWorkflow();
|
||||
const testCwd = "/test/cli/path";
|
||||
const uwfBin = join(process.cwd(), "dist", "cli.js");
|
||||
|
||||
// Register the workflow
|
||||
execFileSync("node", [uwfBin, "workflow", "add", workflowPath], {
|
||||
env: { ...process.env, UWF_STORAGE_ROOT: storageRoot },
|
||||
encoding: "utf8",
|
||||
});
|
||||
|
||||
// Verify CLI accepts --cwd option (no error thrown)
|
||||
const output = execFileSync(
|
||||
"node",
|
||||
[uwfBin, "thread", "start", "test-cwd-cli", "-p", "test prompt", "--cwd", testCwd],
|
||||
{
|
||||
env: { ...process.env, UWF_STORAGE_ROOT: storageRoot },
|
||||
encoding: "utf8",
|
||||
},
|
||||
);
|
||||
|
||||
const result = JSON.parse(output);
|
||||
expect(result.thread).toBeDefined();
|
||||
expect(result.workflow).toBeDefined();
|
||||
|
||||
// The fact that we got here without throwing means CLI accepted the --cwd option
|
||||
// The actual cwd functionality is tested by the other tests using cmdThreadStart directly
|
||||
await teardown();
|
||||
});
|
||||
});
|
||||
@@ -51,11 +51,11 @@ function makeWorkflow(overrides?: Partial<WorkflowPayload>): WorkflowPayload {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "writer", prompt: "Begin writing" } },
|
||||
writer: { _: { role: "reviewer", prompt: "Review this: {{{plan}}}" } },
|
||||
$START: { _: { role: "writer", prompt: "Begin writing", location: null } },
|
||||
writer: { _: { role: "reviewer", prompt: "Review this: {{{plan}}}", location: null } },
|
||||
reviewer: {
|
||||
approved: { role: "$END", prompt: "Done: {{{summary}}}" },
|
||||
rejected: { role: "writer", prompt: "Fix: {{{reason}}}" },
|
||||
approved: { role: "$END", prompt: "Done: {{{summary}}}", location: null },
|
||||
rejected: { role: "writer", prompt: "Fix: {{{reason}}}", location: null },
|
||||
},
|
||||
},
|
||||
};
|
||||
@@ -67,7 +67,7 @@ function makeWorkflow(overrides?: Partial<WorkflowPayload>): WorkflowPayload {
|
||||
describe("Suite 1: Role Reference Integrity", () => {
|
||||
test("1.1 graph references unknown role", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.nonexistent = { _: { role: "$END", prompt: "done" } };
|
||||
wf.graph.nonexistent = { _: { role: "$END", prompt: "done", location: null } };
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors.some((e) => e.includes('unknown role "nonexistent"'))).toBe(true);
|
||||
});
|
||||
@@ -138,8 +138,8 @@ describe("Suite 2: Graph Structure", () => {
|
||||
test("2.2 $START has multiple status keys", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.$START = {
|
||||
_: { role: "writer", prompt: "Begin" },
|
||||
other: { role: "reviewer", prompt: "Also" },
|
||||
_: { role: "writer", prompt: "Begin", location: null },
|
||||
other: { role: "reviewer", prompt: "Also", location: null },
|
||||
};
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(
|
||||
@@ -149,7 +149,7 @@ describe("Suite 2: Graph Structure", () => {
|
||||
|
||||
test("2.3 $START edge uses non-_ status", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.$START = { ready: { role: "writer", prompt: "Begin" } };
|
||||
wf.graph.$START = { ready: { role: "writer", prompt: "Begin", location: null } };
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(
|
||||
errors.some((e) => e.includes('$START must have exactly one edge with status "_"')),
|
||||
@@ -158,7 +158,7 @@ describe("Suite 2: Graph Structure", () => {
|
||||
|
||||
test("2.4 $END has outgoing edges", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.$END = { _: { role: "writer", prompt: "Loop" } };
|
||||
wf.graph.$END = { _: { role: "writer", prompt: "Loop", location: null } };
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors.some((e) => e.includes("$END must not have outgoing edges"))).toBe(true);
|
||||
});
|
||||
@@ -177,7 +177,7 @@ describe("Suite 2: Graph Structure", () => {
|
||||
required: ["$status"],
|
||||
} as unknown as string,
|
||||
};
|
||||
wf.graph.isolated = { _: { role: "$END", prompt: "done" } };
|
||||
wf.graph.isolated = { _: { role: "$END", prompt: "done", location: null } };
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors.some((e) => e.includes('role "isolated" is not reachable from $START'))).toBe(
|
||||
true,
|
||||
@@ -186,7 +186,7 @@ describe("Suite 2: Graph Structure", () => {
|
||||
|
||||
test("2.6 edge target references invalid role", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.writer = { _: { role: "ghost", prompt: "Go to ghost" } };
|
||||
wf.graph.writer = { _: { role: "ghost", prompt: "Go to ghost", location: null } };
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors.some((e) => e.includes('unknown target role "ghost"'))).toBe(true);
|
||||
});
|
||||
@@ -196,8 +196,8 @@ describe("Suite 3: Status-Edge Consistency", () => {
|
||||
test("3.1 single-exit role with multiple graph keys", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.writer = {
|
||||
_: { role: "reviewer", prompt: "Review" },
|
||||
extra: { role: "$END", prompt: "Done" },
|
||||
_: { role: "reviewer", prompt: "Review", location: null },
|
||||
extra: { role: "$END", prompt: "Done", location: null },
|
||||
};
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(
|
||||
@@ -209,7 +209,7 @@ describe("Suite 3: Status-Edge Consistency", () => {
|
||||
|
||||
test("3.2 single-exit role missing _ key", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.writer = { done: { role: "reviewer", prompt: "Review" } };
|
||||
wf.graph.writer = { done: { role: "reviewer", prompt: "Review", location: null } };
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(
|
||||
errors.some((e) => e.includes('role "writer" is single-exit but graph has no "_" key')),
|
||||
@@ -219,9 +219,9 @@ describe("Suite 3: Status-Edge Consistency", () => {
|
||||
test("3.3 multi-exit role with extra statuses", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.reviewer = {
|
||||
approved: { role: "$END", prompt: "Done" },
|
||||
rejected: { role: "writer", prompt: "Fix" },
|
||||
timeout: { role: "$END", prompt: "Timed out" },
|
||||
approved: { role: "$END", prompt: "Done", location: null },
|
||||
rejected: { role: "writer", prompt: "Fix", location: null },
|
||||
timeout: { role: "$END", prompt: "Timed out", location: null },
|
||||
};
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(
|
||||
@@ -232,7 +232,7 @@ describe("Suite 3: Status-Edge Consistency", () => {
|
||||
test("3.4 multi-exit role missing a status", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.reviewer = {
|
||||
approved: { role: "$END", prompt: "Done" },
|
||||
approved: { role: "$END", prompt: "Done", location: null },
|
||||
};
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(
|
||||
@@ -242,7 +242,7 @@ describe("Suite 3: Status-Edge Consistency", () => {
|
||||
|
||||
test("3.5 multi-exit role with _ key", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.reviewer = { _: { role: "$END", prompt: "Done" } };
|
||||
wf.graph.reviewer = { _: { role: "$END", prompt: "Done", location: null } };
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors.some((e) => e.includes('role "reviewer" is multi-exit but graph uses "_"'))).toBe(
|
||||
true,
|
||||
@@ -265,8 +265,8 @@ describe("Suite 3b: Enum-Based Multi-Exit", () => {
|
||||
} as unknown as string,
|
||||
};
|
||||
wf.graph.reviewer = {
|
||||
approved: { role: "$END", prompt: "Done" },
|
||||
rejected: { role: "writer", prompt: "Fix: {{{comments}}}" },
|
||||
approved: { role: "$END", prompt: "Done", location: null },
|
||||
rejected: { role: "writer", prompt: "Fix: {{{comments}}}", location: null },
|
||||
};
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors).toEqual([]);
|
||||
@@ -286,9 +286,9 @@ describe("Suite 3b: Enum-Based Multi-Exit", () => {
|
||||
} as unknown as string,
|
||||
};
|
||||
wf.graph.reviewer = {
|
||||
approved: { role: "$END", prompt: "Done" },
|
||||
rejected: { role: "writer", prompt: "Fix" },
|
||||
timeout: { role: "$END", prompt: "Timed out" },
|
||||
approved: { role: "$END", prompt: "Done", location: null },
|
||||
rejected: { role: "writer", prompt: "Fix", location: null },
|
||||
timeout: { role: "$END", prompt: "Timed out", location: null },
|
||||
};
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors.some((e) => e.includes("extra status keys: timeout"))).toBe(true);
|
||||
@@ -308,7 +308,7 @@ describe("Suite 3b: Enum-Based Multi-Exit", () => {
|
||||
} as unknown as string,
|
||||
};
|
||||
wf.graph.reviewer = {
|
||||
approved: { role: "$END", prompt: "Done" },
|
||||
approved: { role: "$END", prompt: "Done", location: null },
|
||||
};
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors.some((e) => e.includes("missing status keys: rejected"))).toBe(true);
|
||||
@@ -327,7 +327,7 @@ describe("Suite 3b: Enum-Based Multi-Exit", () => {
|
||||
required: ["$status", "plan"],
|
||||
} as unknown as string,
|
||||
};
|
||||
wf.graph.writer = { _: { role: "reviewer", prompt: "Review: {{{plan}}}" } };
|
||||
wf.graph.writer = { _: { role: "reviewer", prompt: "Review: {{{plan}}}", location: null } };
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors).toEqual([]);
|
||||
});
|
||||
@@ -346,8 +346,8 @@ describe("Suite 3b: Enum-Based Multi-Exit", () => {
|
||||
} as unknown as string,
|
||||
};
|
||||
wf.graph.reviewer = {
|
||||
approved: { role: "$END", prompt: "Done: {{{nonexistent}}}" },
|
||||
rejected: { role: "writer", prompt: "Fix: {{{comments}}}" },
|
||||
approved: { role: "$END", prompt: "Done: {{{nonexistent}}}", location: null },
|
||||
rejected: { role: "writer", prompt: "Fix: {{{comments}}}", location: null },
|
||||
};
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors.some((e) => e.includes("nonexistent") && e.includes("not found"))).toBe(true);
|
||||
@@ -357,7 +357,7 @@ describe("Suite 3b: Enum-Based Multi-Exit", () => {
|
||||
describe("Suite 4: Mustache Template Variable Existence", () => {
|
||||
test("4.1 prompt references nonexistent variable (single-exit)", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.writer = { _: { role: "reviewer", prompt: "Review: {{{branch}}}" } };
|
||||
wf.graph.writer = { _: { role: "reviewer", prompt: "Review: {{{branch}}}", location: null } };
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(
|
||||
errors.some((e) =>
|
||||
@@ -369,8 +369,8 @@ describe("Suite 4: Mustache Template Variable Existence", () => {
|
||||
test("4.2 prompt references nonexistent variable (multi-exit)", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.reviewer = {
|
||||
approved: { role: "$END", prompt: "Done: {{{branch}}}" },
|
||||
rejected: { role: "writer", prompt: "Fix: {{{reason}}}" },
|
||||
approved: { role: "$END", prompt: "Done: {{{branch}}}", location: null },
|
||||
rejected: { role: "writer", prompt: "Fix: {{{reason}}}", location: null },
|
||||
};
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(
|
||||
@@ -388,7 +388,7 @@ describe("Suite 4: Mustache Template Variable Existence", () => {
|
||||
|
||||
test("4.4 $status variable is always valid", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.writer = { _: { role: "reviewer", prompt: "Status: {{$status}}" } };
|
||||
wf.graph.writer = { _: { role: "reviewer", prompt: "Status: {{$status}}", location: null } };
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors).toEqual([]);
|
||||
});
|
||||
@@ -461,9 +461,9 @@ describe("Suite 6: Multiple Errors Collection", () => {
|
||||
} as unknown as string,
|
||||
};
|
||||
// unknown graph reference
|
||||
wf.graph.nonexistent = { _: { role: "$END", prompt: "done" } };
|
||||
wf.graph.nonexistent = { _: { role: "$END", prompt: "done", location: null } };
|
||||
// bad mustache var
|
||||
wf.graph.writer = { _: { role: "reviewer", prompt: "{{{badvar}}}" } };
|
||||
wf.graph.writer = { _: { role: "reviewer", prompt: "{{{badvar}}}", location: null } };
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors.length).toBeGreaterThanOrEqual(3);
|
||||
});
|
||||
|
||||
@@ -41,8 +41,8 @@ function makeMinimalPayload(name: string, description: string): WorkflowPayload
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "start working" } },
|
||||
worker: { _: { role: "$END", prompt: "done" } },
|
||||
$START: { _: { role: "worker", prompt: "start working", location: null } },
|
||||
worker: { _: { role: "$END", prompt: "done", location: null } },
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import type { CasRef, ThreadId } from "@uncaged/workflow-protocol";
|
||||
import type { CasRef, ThreadId, ThreadStatus } from "@uncaged/workflow-protocol";
|
||||
import { Command } from "commander";
|
||||
import {
|
||||
cmdCasGet,
|
||||
@@ -18,10 +18,14 @@ import { cmdLogClean, cmdLogList, cmdLogShow } from "./commands/log.js";
|
||||
import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js";
|
||||
import {
|
||||
cmdSkillActor,
|
||||
cmdSkillAdapter,
|
||||
cmdSkillArchitecture,
|
||||
cmdSkillAuthor,
|
||||
cmdSkillCli,
|
||||
cmdSkillDeveloper,
|
||||
cmdSkillList,
|
||||
cmdSkillModerator,
|
||||
cmdSkillUser,
|
||||
cmdSkillYaml,
|
||||
} from "./commands/skill.js";
|
||||
import { cmdStepFork, cmdStepList, cmdStepRead, cmdStepShow } from "./commands/step.js";
|
||||
@@ -34,7 +38,6 @@ import {
|
||||
cmdThreadStart,
|
||||
cmdThreadStop,
|
||||
THREAD_READ_DEFAULT_QUOTA,
|
||||
type ThreadStatus,
|
||||
} from "./commands/thread.js";
|
||||
import { parseTimeInput } from "./commands/thread-time-parser.js";
|
||||
import { cmdWorkflowAdd, cmdWorkflowList, cmdWorkflowShow } from "./commands/workflow.js";
|
||||
@@ -114,10 +117,17 @@ thread
|
||||
.description("Create a thread without executing")
|
||||
.argument("<workflow>", "Workflow name or hash")
|
||||
.requiredOption("-p, --prompt <text>", "User prompt")
|
||||
.action((workflow: string, opts: { prompt: string }) => {
|
||||
.option("--cwd <path>", "Working directory for thread execution (default: process.cwd())")
|
||||
.action((workflow: string, opts: { prompt: string; cwd: string | undefined }) => {
|
||||
const storageRoot = resolveStorageRoot();
|
||||
runAction(async () => {
|
||||
const result = await cmdThreadStart(storageRoot, workflow, opts.prompt, process.cwd());
|
||||
const result = await cmdThreadStart(
|
||||
storageRoot,
|
||||
workflow,
|
||||
opts.prompt,
|
||||
process.cwd(),
|
||||
opts.cwd ?? process.cwd(),
|
||||
);
|
||||
writeOutput(result);
|
||||
});
|
||||
});
|
||||
@@ -511,6 +521,27 @@ skill
|
||||
console.log(cmdSkillActor());
|
||||
});
|
||||
|
||||
skill
|
||||
.command("adapter")
|
||||
.description("Print the adapter reference (building agent adapters)")
|
||||
.action(() => {
|
||||
console.log(cmdSkillAdapter());
|
||||
});
|
||||
|
||||
skill
|
||||
.command("author")
|
||||
.description("Print the author reference (workflow YAML design guide)")
|
||||
.action(() => {
|
||||
console.log(cmdSkillAuthor());
|
||||
});
|
||||
|
||||
skill
|
||||
.command("developer")
|
||||
.description("Print the developer reference (coding conventions + architecture)")
|
||||
.action(() => {
|
||||
console.log(cmdSkillDeveloper());
|
||||
});
|
||||
|
||||
skill
|
||||
.command("moderator")
|
||||
.description("Print the moderator reference")
|
||||
@@ -518,6 +549,13 @@ skill
|
||||
console.log(cmdSkillModerator());
|
||||
});
|
||||
|
||||
skill
|
||||
.command("user")
|
||||
.description("Print the user reference (CLI guide + typical workflows)")
|
||||
.action(() => {
|
||||
console.log(cmdSkillUser());
|
||||
});
|
||||
|
||||
skill
|
||||
.command("list")
|
||||
.description("List all available skill names")
|
||||
@@ -532,7 +570,7 @@ program
|
||||
.option("--base-url <url>", "OpenAI-compatible API base URL")
|
||||
.option("--api-key <key>", "API key")
|
||||
.option("--model <name>", "Default model name")
|
||||
.option("--agent <name>", "Default agent alias")
|
||||
.option("--agent <name>", "Default agent adapter (e.g. hermes → uwf-hermes)")
|
||||
.action(
|
||||
(opts: {
|
||||
provider?: string;
|
||||
|
||||
@@ -5,7 +5,10 @@ import { parse, stringify } from "yaml";
|
||||
/**
|
||||
* Valid configuration key schema
|
||||
*/
|
||||
const VALID_CONFIG_KEYS: Record<string, { nested: boolean; knownFields?: string[] }> = {
|
||||
const VALID_CONFIG_KEYS: Record<
|
||||
string,
|
||||
{ nested: boolean; knownFields?: string[]; minDepth?: number }
|
||||
> = {
|
||||
providers: {
|
||||
nested: true,
|
||||
knownFields: ["baseUrl", "apiKey"],
|
||||
@@ -18,6 +21,17 @@ const VALID_CONFIG_KEYS: Record<string, { nested: boolean; knownFields?: string[
|
||||
nested: true,
|
||||
knownFields: ["command", "args"],
|
||||
},
|
||||
agentOverrides: {
|
||||
nested: true,
|
||||
// agentOverrides.<workflowName>.<roleName> = agentAlias (string value)
|
||||
// No knownFields — workflow/role names are user-defined
|
||||
},
|
||||
modelOverrides: {
|
||||
nested: true,
|
||||
minDepth: 2,
|
||||
// modelOverrides.<scenario> = modelAlias (string value)
|
||||
// No knownFields — scenarios are user-defined
|
||||
},
|
||||
defaultAgent: { nested: false },
|
||||
defaultModel: { nested: false },
|
||||
};
|
||||
@@ -43,8 +57,9 @@ function validateConfigKey(path: string[]): void {
|
||||
throw new Error(`${topLevel} is a scalar key and cannot have nested properties`);
|
||||
}
|
||||
|
||||
// Nested keys must have at least 3 segments (e.g., providers.myProvider.baseUrl)
|
||||
if (schema.nested && path.length < 3) {
|
||||
// Nested keys must have at least minDepth segments (default 3)
|
||||
const minDepth = schema.minDepth ?? 3;
|
||||
if (schema.nested && path.length < minDepth) {
|
||||
const fields = schema.knownFields?.join(", ") ?? "";
|
||||
throw new Error(
|
||||
`Incomplete path for ${topLevel}. Must specify a field (e.g., ${topLevel}.<name>.<field>). Valid fields: ${fields}`,
|
||||
|
||||
@@ -1,12 +1,26 @@
|
||||
export {
|
||||
generateActorReference as cmdSkillActor,
|
||||
generateAdapterReference as cmdSkillAdapter,
|
||||
generateArchitectureReference as cmdSkillArchitecture,
|
||||
generateAuthorReference as cmdSkillAuthor,
|
||||
generateCliReference as cmdSkillCli,
|
||||
generateDeveloperReference as cmdSkillDeveloper,
|
||||
generateModeratorReference as cmdSkillModerator,
|
||||
generateUserReference as cmdSkillUser,
|
||||
generateYamlReference as cmdSkillYaml,
|
||||
} from "@uncaged/workflow-util";
|
||||
|
||||
const SKILL_NAMES = ["cli", "architecture", "yaml", "moderator", "actor"] as const;
|
||||
const SKILL_NAMES = [
|
||||
"cli",
|
||||
"architecture",
|
||||
"yaml",
|
||||
"moderator",
|
||||
"actor",
|
||||
"user",
|
||||
"author",
|
||||
"developer",
|
||||
"adapter",
|
||||
] as const;
|
||||
|
||||
export function cmdSkillList(): ReadonlyArray<string> {
|
||||
return [...SKILL_NAMES];
|
||||
|
||||
@@ -12,6 +12,7 @@ import type {
|
||||
StepOutput,
|
||||
ThreadId,
|
||||
ThreadListItem,
|
||||
ThreadStatus,
|
||||
ThreadsIndex,
|
||||
WorkflowConfig,
|
||||
WorkflowPayload,
|
||||
@@ -266,7 +267,13 @@ export async function cmdThreadStart(
|
||||
workflowId: string,
|
||||
prompt: string,
|
||||
projectRoot: string,
|
||||
cwd: string = process.cwd(),
|
||||
): Promise<StartOutput> {
|
||||
// Validate cwd is an absolute path
|
||||
if (!isAbsolute(cwd)) {
|
||||
fail("cwd must be an absolute path");
|
||||
}
|
||||
|
||||
const uwf = await createUwfStore(storageRoot);
|
||||
const workflowHash = await resolveWorkflowCasRef(uwf, storageRoot, workflowId, projectRoot);
|
||||
|
||||
@@ -278,6 +285,7 @@ export async function cmdThreadStart(
|
||||
const startPayload: StartNodePayload = {
|
||||
workflow: workflowHash,
|
||||
prompt,
|
||||
cwd,
|
||||
};
|
||||
|
||||
const headHash = await uwf.store.put(uwf.schemas.startNode, startPayload);
|
||||
@@ -308,10 +316,16 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
|
||||
if (workflow === null) {
|
||||
fail(`failed to resolve workflow from head: ${activeHead}`);
|
||||
}
|
||||
|
||||
// Check if thread is running
|
||||
const runningMarker = await isThreadRunning(storageRoot, threadId);
|
||||
const status: ThreadStatus = runningMarker !== null ? "running" : "idle";
|
||||
|
||||
return {
|
||||
workflow,
|
||||
thread: threadId,
|
||||
head: activeHead,
|
||||
status,
|
||||
done: false,
|
||||
background: null,
|
||||
};
|
||||
@@ -319,10 +333,13 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
|
||||
|
||||
const hist = await findThreadInHistory(storageRoot, threadId);
|
||||
if (hist !== null) {
|
||||
const status: ThreadStatus = hist.reason === "cancelled" ? "cancelled" : "completed";
|
||||
|
||||
return {
|
||||
workflow: hist.workflow,
|
||||
thread: threadId,
|
||||
head: hist.head,
|
||||
status,
|
||||
done: true,
|
||||
background: null,
|
||||
};
|
||||
@@ -331,8 +348,6 @@ export async function cmdThreadShow(storageRoot: string, threadId: ThreadId): Pr
|
||||
fail(`thread not found: ${threadId}`);
|
||||
}
|
||||
|
||||
export type ThreadStatus = "idle" | "running" | "completed" | "cancelled";
|
||||
|
||||
export type ThreadListItemWithStatus = ThreadListItem & {
|
||||
status: ThreadStatus;
|
||||
};
|
||||
@@ -772,6 +787,7 @@ function spawnAgent(
|
||||
threadId: ThreadId,
|
||||
role: string,
|
||||
edgePrompt: string,
|
||||
cwd: string,
|
||||
): CasRef {
|
||||
const argv = [...agent.args, "--thread", threadId, "--role", role, "--prompt", edgePrompt];
|
||||
let stdout: string;
|
||||
@@ -780,6 +796,7 @@ function spawnAgent(
|
||||
encoding: "utf8",
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
maxBuffer: 50 * 1024 * 1024, // 50 MB — stream-json output can be large
|
||||
cwd,
|
||||
});
|
||||
} catch (e) {
|
||||
const err = e as NodeJS.ErrnoException & { stderr?: Buffer | string | null };
|
||||
@@ -938,6 +955,7 @@ async function cmdThreadStepBackground(
|
||||
workflow: workflowHash,
|
||||
thread: threadId,
|
||||
head: headHash,
|
||||
status: "running",
|
||||
done: false,
|
||||
background: true,
|
||||
},
|
||||
@@ -980,6 +998,7 @@ async function cmdThreadStepOnce(
|
||||
workflow: workflowHash,
|
||||
thread: threadId,
|
||||
head: headHash,
|
||||
status: "completed",
|
||||
done: true,
|
||||
background: null,
|
||||
};
|
||||
@@ -987,6 +1006,11 @@ async function cmdThreadStepOnce(
|
||||
|
||||
const role = nextResult.value.role;
|
||||
const edgePrompt = nextResult.value.prompt;
|
||||
|
||||
// Resolve cwd: use edge location if provided, otherwise inherit thread.cwd
|
||||
const threadCwd = chain.start.cwd;
|
||||
const effectiveCwd = nextResult.value.location !== null ? nextResult.value.location : threadCwd;
|
||||
|
||||
const config = await loadWorkflowConfig(storageRoot);
|
||||
const agent = resolveAgentConfig(config, workflow, role, agentOverride);
|
||||
|
||||
@@ -995,7 +1019,7 @@ async function cmdThreadStepOnce(
|
||||
});
|
||||
|
||||
loadDotenv({ path: getEnvPath(storageRoot) });
|
||||
const newHead = spawnAgent(plog, agent, threadId, role, edgePrompt);
|
||||
const newHead = spawnAgent(plog, agent, threadId, role, edgePrompt, effectiveCwd);
|
||||
|
||||
plog.log(PL_AGENT_DONE, `agent returned head=${newHead}`, null);
|
||||
|
||||
@@ -1027,10 +1051,14 @@ async function cmdThreadStepOnce(
|
||||
await archiveThread(storageRoot, threadId, workflowHash, newHead);
|
||||
}
|
||||
|
||||
// Determine status based on whether thread is done and running state
|
||||
const status: ThreadStatus = done ? "completed" : "idle";
|
||||
|
||||
return {
|
||||
workflow: workflowHash,
|
||||
thread: threadId,
|
||||
head: newHead,
|
||||
status,
|
||||
done,
|
||||
background: null,
|
||||
};
|
||||
|
||||
@@ -61,6 +61,7 @@ function normalizeGraph(
|
||||
normalized[status] = {
|
||||
role: target.role,
|
||||
prompt: target.prompt,
|
||||
location: target.location ?? null,
|
||||
};
|
||||
}
|
||||
result[node] = normalized;
|
||||
|
||||
@@ -0,0 +1,198 @@
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { evaluate } from "../evaluate.js";
|
||||
|
||||
describe("Edge prompt template variable resolution", () => {
|
||||
test("returns error when rendered prompt is empty string", () => {
|
||||
const graph = {
|
||||
$START: {
|
||||
_: { role: "classifier", prompt: "{{{userPrompt}}}", location: null },
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "$START", {});
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (!result.ok) {
|
||||
expect(result.error.message).toContain("prompt");
|
||||
expect(result.error.message).toContain("empty");
|
||||
}
|
||||
});
|
||||
|
||||
test("returns error when rendered prompt is whitespace-only", () => {
|
||||
const graph = {
|
||||
$START: {
|
||||
_: { role: "classifier", prompt: " {{{userPrompt}}} ", location: null },
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "$START", {});
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (!result.ok) {
|
||||
expect(result.error.message).toContain("prompt");
|
||||
expect(result.error.message).toContain("empty");
|
||||
}
|
||||
});
|
||||
|
||||
test("succeeds when all template variables resolve to non-empty values", () => {
|
||||
const graph = {
|
||||
$START: {
|
||||
_: { role: "classifier", prompt: "{{{userPrompt}}}", location: null },
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "$START", { userPrompt: "Fix the bug" });
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (result.ok) {
|
||||
expect(result.value.prompt).toBe("Fix the bug");
|
||||
}
|
||||
});
|
||||
|
||||
test("succeeds with static (no-variable) prompt", () => {
|
||||
const graph = {
|
||||
$START: {
|
||||
_: { role: "classifier", prompt: "Classify this input", location: null },
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "$START", {});
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (result.ok) {
|
||||
expect(result.value.prompt).toBe("Classify this input");
|
||||
}
|
||||
});
|
||||
|
||||
test("succeeds when prompt has mix of static text and unresolved variables", () => {
|
||||
const graph = {
|
||||
$START: {
|
||||
_: { role: "classifier", prompt: "Please handle: {{{userPrompt}}}", location: null },
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "$START", {});
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (result.ok) {
|
||||
expect(result.value.prompt).toBe("Please handle: ");
|
||||
}
|
||||
});
|
||||
|
||||
test("returns error when ALL variables missing and no static text remains", () => {
|
||||
const graph = {
|
||||
$START: {
|
||||
_: { role: "classifier", prompt: "{{{a}}}{{{b}}}", location: null },
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "$START", {});
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Moderator location resolution", () => {
|
||||
test("returns null location when edge has no location field", () => {
|
||||
const graph = {
|
||||
planner: {
|
||||
ready: {
|
||||
role: "coder",
|
||||
prompt: "Implement the code",
|
||||
location: null,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "planner", { $status: "ready" });
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (result.ok) {
|
||||
expect(result.value.location).toBe(null);
|
||||
}
|
||||
});
|
||||
|
||||
test("resolves static location string", () => {
|
||||
const graph = {
|
||||
planner: {
|
||||
ready: {
|
||||
role: "coder",
|
||||
prompt: "Implement the code",
|
||||
location: "/static/path",
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "planner", { $status: "ready" });
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (result.ok) {
|
||||
expect(result.value.location).toBe("/static/path");
|
||||
}
|
||||
});
|
||||
|
||||
test("resolves mustache template location", () => {
|
||||
const graph = {
|
||||
planner: {
|
||||
ready: {
|
||||
role: "coder",
|
||||
prompt: "Implement the code",
|
||||
location: "{{{repoPath}}}",
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "planner", {
|
||||
$status: "ready",
|
||||
repoPath: "/home/user/repo",
|
||||
});
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (result.ok) {
|
||||
expect(result.value.location).toBe("/home/user/repo");
|
||||
}
|
||||
});
|
||||
|
||||
test("resolves mustache template with multiple variables", () => {
|
||||
const graph = {
|
||||
planner: {
|
||||
ready: {
|
||||
role: "coder",
|
||||
prompt: "Implement the code",
|
||||
location: "{{{basePath}}}/{{{projectName}}}",
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "planner", {
|
||||
$status: "ready",
|
||||
basePath: "/home/user",
|
||||
projectName: "myproject",
|
||||
});
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (result.ok) {
|
||||
expect(result.value.location).toBe("/home/user/myproject");
|
||||
}
|
||||
});
|
||||
|
||||
test("handles missing template variable gracefully", () => {
|
||||
const graph = {
|
||||
planner: {
|
||||
ready: {
|
||||
role: "coder",
|
||||
prompt: "Implement the code",
|
||||
location: "{{{repoPath}}}",
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "planner", { $status: "ready" });
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (result.ok) {
|
||||
// Mustache renders missing variables as empty string
|
||||
expect(result.value.location).toBe("");
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -43,7 +43,16 @@ export function evaluate(
|
||||
|
||||
try {
|
||||
const prompt = mustache.render(target.prompt, lastOutput);
|
||||
return { ok: true, value: { role: target.role, prompt } };
|
||||
if (prompt.trim() === "") {
|
||||
return {
|
||||
ok: false,
|
||||
error: new Error(
|
||||
`edge prompt resolved to empty string for role "${target.role}" (template: "${target.prompt}"). Check that upstream output includes required variables.`,
|
||||
),
|
||||
};
|
||||
}
|
||||
const location = target.location !== null ? mustache.render(target.location, lastOutput) : null;
|
||||
return { ok: true, value: { role: target.role, prompt, location } };
|
||||
} catch (error) {
|
||||
return {
|
||||
ok: false,
|
||||
|
||||
@@ -4,4 +4,6 @@ export type Result<T, E> = { ok: true; value: T } | { ok: false; error: E };
|
||||
export type EvaluateResult = {
|
||||
role: string;
|
||||
prompt: string;
|
||||
/** Resolved working directory from edge location field (null = inherit thread cwd). */
|
||||
location: string | null;
|
||||
};
|
||||
|
||||
@@ -36,8 +36,13 @@ function isTarget(value: unknown): boolean {
|
||||
if (!isRecord(value)) {
|
||||
return false;
|
||||
}
|
||||
const hasValidLocation =
|
||||
value.location === undefined || value.location === null || typeof value.location === "string";
|
||||
return (
|
||||
typeof value.role === "string" && typeof value.prompt === "string" && value.prompt.trim() !== ""
|
||||
typeof value.role === "string" &&
|
||||
typeof value.prompt === "string" &&
|
||||
value.prompt.trim() !== "" &&
|
||||
hasValidLocation
|
||||
);
|
||||
}
|
||||
|
||||
@@ -95,5 +100,22 @@ export function parseWorkflowPayload(raw: unknown): WorkflowPayload | null {
|
||||
if (!isStringRecord(raw.roles, isRoleDefinition) || !isGraph(raw.graph)) {
|
||||
return null;
|
||||
}
|
||||
return raw as WorkflowPayload;
|
||||
|
||||
// Normalize location field: undefined → null
|
||||
const normalized = { ...raw } as WorkflowPayload;
|
||||
for (const roleName of Object.keys(normalized.graph)) {
|
||||
const statusMap = normalized.graph[roleName];
|
||||
if (statusMap !== undefined) {
|
||||
for (const status of Object.keys(statusMap)) {
|
||||
const target = statusMap[status];
|
||||
if (target !== undefined) {
|
||||
if (target.location === undefined) {
|
||||
target.location = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
# @uncaged/workflow-agent-hermes
|
||||
|
||||
`uwf-hermes` agent — spawns Hermes chat via ACP and captures session detail.
|
||||
`uwf-hermes` — an **agent adapter** that bridges the `uwf` workflow engine and the Hermes CLI.
|
||||
|
||||
## Overview
|
||||
|
||||
Layer 3 agent implementation. Wraps the Hermes CLI using the Agent Client Protocol (ACP). On first visit to a role it sends a composed prompt (role definition, task, history, edge prompt); on continuation it resumes the cached session. Session transcripts and raw output are stored as CAS detail nodes.
|
||||
`uwf-hermes` is an adapter (not the Hermes CLI itself). The `uwf` engine speaks a generic agent protocol (stdin/stdout frontmatter contract); `uwf-hermes` translates that protocol into Hermes ACP (Agent Client Protocol) calls. Other adapters (e.g. `uwf-claude-code`, `uwf-cursor`) do the same for their respective CLIs.
|
||||
|
||||
On first visit to a role it sends a composed prompt (role definition, task, history, edge prompt); on continuation it resumes the cached session. Session transcripts and raw output are stored as CAS detail nodes.
|
||||
|
||||
**Dependencies:** `@uncaged/json-cas`, `@uncaged/workflow-util-agent`, `@uncaged/workflow-protocol`, `@uncaged/workflow-util`
|
||||
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
const PKG_ROOT = join(import.meta.dir, "..");
|
||||
|
||||
describe("Issue #551 — bin entry & engines", () => {
|
||||
test("package.json declares bun in engines", () => {
|
||||
const pkg = JSON.parse(readFileSync(join(PKG_ROOT, "package.json"), "utf-8"));
|
||||
expect(pkg.engines).toBeDefined();
|
||||
expect(pkg.engines.bun).toBeDefined();
|
||||
expect(pkg.engines.bun).toMatch(/^>=?\s*[\d.]+/);
|
||||
});
|
||||
|
||||
test("bin entry file has bun shebang", () => {
|
||||
const pkg = JSON.parse(readFileSync(join(PKG_ROOT, "package.json"), "utf-8"));
|
||||
const binPath = pkg.bin["uwf-hermes"];
|
||||
const content = readFileSync(join(PKG_ROOT, binPath), "utf-8");
|
||||
expect(content.startsWith("#!/usr/bin/env bun")).toBe(true);
|
||||
});
|
||||
|
||||
test("README.md explains uwf-hermes is an adapter", () => {
|
||||
const readme = readFileSync(join(PKG_ROOT, "README.md"), "utf-8");
|
||||
expect(readme.toLowerCase()).toContain("adapter");
|
||||
expect(readme).toMatch(/uwf-hermes/);
|
||||
expect(readme).toMatch(/hermes/);
|
||||
});
|
||||
});
|
||||
@@ -42,5 +42,8 @@
|
||||
"bugs": {
|
||||
"url": "https://github.com/shazhou-ww/uncaged-workflow/issues"
|
||||
},
|
||||
"engines": {
|
||||
"bun": ">= 1.0.0"
|
||||
},
|
||||
"license": "MIT"
|
||||
}
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import type { StartNodePayload, StepRecord, Target } from "../types.js";
|
||||
|
||||
describe("Protocol types for thread/edge location", () => {
|
||||
describe("StartNodePayload", () => {
|
||||
test("has required cwd field", () => {
|
||||
const payload: StartNodePayload = {
|
||||
workflow: "0123456789ABC",
|
||||
prompt: "Test prompt",
|
||||
cwd: "/home/user/project",
|
||||
};
|
||||
|
||||
expect(payload.cwd).toBe("/home/user/project");
|
||||
expect(typeof payload.cwd).toBe("string");
|
||||
});
|
||||
});
|
||||
|
||||
describe("StepRecord", () => {
|
||||
test("has required cwd field", () => {
|
||||
const record: StepRecord = {
|
||||
role: "planner",
|
||||
output: "0123456789ABC",
|
||||
detail: "DEF0123456789",
|
||||
agent: "uwf-hermes",
|
||||
edgePrompt: "Plan the implementation",
|
||||
startedAtMs: Date.now(),
|
||||
completedAtMs: Date.now() + 1000,
|
||||
cwd: "/home/user/project",
|
||||
};
|
||||
|
||||
expect(record.cwd).toBe("/home/user/project");
|
||||
expect(typeof record.cwd).toBe("string");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Target", () => {
|
||||
test("has location field that accepts string", () => {
|
||||
const target: Target = {
|
||||
role: "coder",
|
||||
prompt: "Implement the code",
|
||||
location: "/custom/path",
|
||||
};
|
||||
|
||||
expect(target.location).toBe("/custom/path");
|
||||
expect(typeof target.location).toBe("string");
|
||||
});
|
||||
|
||||
test("has location field that accepts null", () => {
|
||||
const target: Target = {
|
||||
role: "coder",
|
||||
prompt: "Implement the code",
|
||||
location: null,
|
||||
};
|
||||
|
||||
expect(target.location).toBe(null);
|
||||
});
|
||||
|
||||
test("location supports mustache template syntax", () => {
|
||||
const target: Target = {
|
||||
role: "coder",
|
||||
prompt: "Implement the code",
|
||||
location: "{{{repoPath}}}",
|
||||
};
|
||||
|
||||
expect(target.location).toBe("{{{repoPath}}}");
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -29,6 +29,7 @@ export type {
|
||||
ThreadForkOutput,
|
||||
ThreadId,
|
||||
ThreadListItem,
|
||||
ThreadStatus,
|
||||
ThreadStepsOutput,
|
||||
ThreadsIndex,
|
||||
WorkflowConfig,
|
||||
|
||||
@@ -20,6 +20,9 @@ const TARGET: JSONSchema = {
|
||||
properties: {
|
||||
role: { type: "string" },
|
||||
prompt: { type: "string" },
|
||||
location: {
|
||||
anyOf: [{ type: "string" }, { type: "null" }],
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
@@ -49,10 +52,11 @@ export const WORKFLOW_SCHEMA: JSONSchema = {
|
||||
export const START_NODE_SCHEMA: JSONSchema = {
|
||||
title: "StartNode",
|
||||
type: "object",
|
||||
required: ["workflow", "prompt"],
|
||||
required: ["workflow", "prompt", "cwd"],
|
||||
properties: {
|
||||
workflow: { type: "string", format: "cas_ref" },
|
||||
prompt: { type: "string" },
|
||||
cwd: { type: "string" },
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
@@ -60,7 +64,17 @@ export const START_NODE_SCHEMA: JSONSchema = {
|
||||
export const STEP_NODE_SCHEMA: JSONSchema = {
|
||||
title: "StepNode",
|
||||
type: "object",
|
||||
required: ["start", "prev", "role", "output", "detail", "agent", "startedAtMs", "completedAtMs"],
|
||||
required: [
|
||||
"start",
|
||||
"prev",
|
||||
"role",
|
||||
"output",
|
||||
"detail",
|
||||
"agent",
|
||||
"startedAtMs",
|
||||
"completedAtMs",
|
||||
"cwd",
|
||||
],
|
||||
properties: {
|
||||
start: { type: "string", format: "cas_ref" },
|
||||
prev: {
|
||||
@@ -73,6 +87,7 @@ export const STEP_NODE_SCHEMA: JSONSchema = {
|
||||
edgePrompt: { type: "string" },
|
||||
startedAtMs: { type: "integer" },
|
||||
completedAtMs: { type: "integer" },
|
||||
cwd: { type: "string" },
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
@@ -18,6 +18,8 @@ export type StepRecord = {
|
||||
startedAtMs: number;
|
||||
/** Date.now() after agent returns */
|
||||
completedAtMs: number;
|
||||
/** Working directory where the agent executed. Missing in legacy nodes → "". */
|
||||
cwd: string;
|
||||
};
|
||||
|
||||
// ── 4.2 Workflow 定义 ───────────────────────────────────────────────
|
||||
@@ -34,6 +36,8 @@ export type RoleDefinition = {
|
||||
export type Target = {
|
||||
role: string;
|
||||
prompt: string;
|
||||
/** Optional working directory override via mustache template. */
|
||||
location: string | null;
|
||||
};
|
||||
|
||||
export type WorkflowPayload = {
|
||||
@@ -48,6 +52,8 @@ export type WorkflowPayload = {
|
||||
export type StartNodePayload = {
|
||||
workflow: CasRef;
|
||||
prompt: string;
|
||||
/** Working directory where the thread was created. */
|
||||
cwd: string;
|
||||
};
|
||||
|
||||
export type StepNodePayload = StepRecord & {
|
||||
@@ -70,17 +76,27 @@ export type ModeratorContext = {
|
||||
|
||||
// ── 4.5 CLI 输出 ────────────────────────────────────────────────────
|
||||
|
||||
/** Thread status — unified status representation */
|
||||
export type ThreadStatus = "idle" | "running" | "completed" | "cancelled";
|
||||
|
||||
/** uwf thread start */
|
||||
export type StartOutput = {
|
||||
workflow: CasRef;
|
||||
thread: ThreadId;
|
||||
};
|
||||
|
||||
/** uwf thread step / uwf thread show */
|
||||
/**
|
||||
* Output from thread show and thread exec commands.
|
||||
*
|
||||
* @property status - Current thread status (idle/running/completed/cancelled)
|
||||
* @property done - @deprecated Use status field instead. True if thread is completed or cancelled.
|
||||
* @property background - @deprecated Use status field instead. Always null in current implementation.
|
||||
*/
|
||||
export type StepOutput = {
|
||||
workflow: CasRef;
|
||||
thread: ThreadId;
|
||||
head: CasRef;
|
||||
status: ThreadStatus;
|
||||
done: boolean;
|
||||
background: boolean | null;
|
||||
};
|
||||
|
||||
@@ -5,17 +5,13 @@ import { tryFrontmatterFastPath } from "../src/frontmatter.js";
|
||||
|
||||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
/** JSON Schema that exactly matches the AgentFrontmatter fields. */
|
||||
const FRONTMATTER_SCHEMA = {
|
||||
/** JSON Schema that matches the new status-only AgentFrontmatter. */
|
||||
const STATUS_ONLY_SCHEMA = {
|
||||
type: "object",
|
||||
properties: {
|
||||
status: { anyOf: [{ type: "string" }, { type: "null" }] },
|
||||
next: { anyOf: [{ type: "string" }, { type: "null" }] },
|
||||
confidence: { anyOf: [{ type: "number" }, { type: "null" }] },
|
||||
artifacts: { type: "array", items: { type: "string" } },
|
||||
scope: { type: "string" },
|
||||
},
|
||||
required: ["status", "next", "confidence", "artifacts", "scope"],
|
||||
required: ["status"],
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
@@ -56,24 +52,41 @@ async function makeStoreWithSchema(schema: Record<string, unknown>) {
|
||||
return { store, schemaHash };
|
||||
}
|
||||
|
||||
// ── STANDARD_KEYS ────────────────────────────────────────────────────────────
|
||||
|
||||
describe("STANDARD_KEYS contains only status", () => {
|
||||
test("STANDARD_KEYS is ['status']", async () => {
|
||||
// We verify indirectly: defaultCandidate (no schema fields) returns only { status }
|
||||
const { store, schemaHash } = await makeStoreWithSchema({
|
||||
type: "object",
|
||||
properties: {
|
||||
status: { anyOf: [{ type: "string" }, { type: "null" }] },
|
||||
},
|
||||
});
|
||||
|
||||
const raw = "---\nstatus: done\n---\n\nBody.";
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).not.toBeNull();
|
||||
|
||||
const node = store.get(result!.outputHash);
|
||||
expect(node).not.toBeNull();
|
||||
const payload = node!.payload as Record<string, unknown>;
|
||||
expect(payload.status).toBe("done");
|
||||
// Legacy fields must NOT be present
|
||||
expect(payload.next).toBeUndefined();
|
||||
expect(payload.confidence).toBeUndefined();
|
||||
expect(payload.artifacts).toBeUndefined();
|
||||
expect(payload.scope).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Happy path ─────────────────────────────────────────────────────────────────
|
||||
|
||||
describe("tryFrontmatterFastPath — happy path", () => {
|
||||
test("parses valid frontmatter and returns outputHash + stripped body", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
|
||||
const { store, schemaHash } = await makeStoreWithSchema(STATUS_ONLY_SCHEMA);
|
||||
|
||||
const raw = [
|
||||
"---",
|
||||
"status: done",
|
||||
"next: reviewer",
|
||||
"confidence: 0.9",
|
||||
"artifacts: [src/foo.ts]",
|
||||
"scope: role",
|
||||
"---",
|
||||
"",
|
||||
"## Summary",
|
||||
"Work is complete.",
|
||||
].join("\n");
|
||||
const raw = ["---", "status: done", "---", "", "## Summary", "Work is complete."].join("\n");
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
|
||||
@@ -85,11 +98,10 @@ describe("tryFrontmatterFastPath — happy path", () => {
|
||||
expect((result?.outputHash ?? "").length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test("stored CAS node payload matches frontmatter fields", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
|
||||
test("stored CAS node payload has only status", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(STATUS_ONLY_SCHEMA);
|
||||
|
||||
const raw =
|
||||
"---\nstatus: done\nnext: null\nconfidence: null\nartifacts: []\nscope: role\n---\n\nBody.";
|
||||
const raw = "---\nstatus: done\n---\n\nBody.";
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).not.toBeNull();
|
||||
@@ -98,10 +110,29 @@ describe("tryFrontmatterFastPath — happy path", () => {
|
||||
expect(node).not.toBeNull();
|
||||
const payload = node!.payload as Record<string, unknown>;
|
||||
expect(payload.status).toBe("done");
|
||||
expect(payload.next).toBeNull();
|
||||
expect(payload.confidence).toBeNull();
|
||||
expect(payload.artifacts).toEqual([]);
|
||||
expect(payload.scope).toBe("role");
|
||||
expect(Object.keys(payload)).toEqual(["status"]);
|
||||
});
|
||||
});
|
||||
|
||||
// ── Legacy fields in input are ignored ──────────────────────────────────────
|
||||
|
||||
describe("tryFrontmatterFastPath — legacy fields ignored", () => {
|
||||
test("legacy fields in input do not appear in CAS output", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(STATUS_ONLY_SCHEMA);
|
||||
|
||||
const raw =
|
||||
"---\nstatus: done\nnext: reviewer\nconfidence: 0.9\nartifacts: [a.ts]\nscope: thread\n---\n\nBody.";
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).not.toBeNull();
|
||||
|
||||
const node = store.get(result!.outputHash);
|
||||
const payload = node!.payload as Record<string, unknown>;
|
||||
expect(payload.status).toBe("done");
|
||||
expect(payload.next).toBeUndefined();
|
||||
expect(payload.confidence).toBeUndefined();
|
||||
expect(payload.artifacts).toBeUndefined();
|
||||
expect(payload.scope).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -109,7 +140,7 @@ describe("tryFrontmatterFastPath — happy path", () => {
|
||||
|
||||
describe("tryFrontmatterFastPath — fallback: no frontmatter", () => {
|
||||
test("returns null for plain markdown without frontmatter block", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
|
||||
const { store, schemaHash } = await makeStoreWithSchema(STATUS_ONLY_SCHEMA);
|
||||
|
||||
const result = await tryFrontmatterFastPath(
|
||||
"This is plain markdown without any frontmatter.",
|
||||
@@ -121,35 +152,13 @@ describe("tryFrontmatterFastPath — fallback: no frontmatter", () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ── Fallback: invalid frontmatter ─────────────────────────────────────────────
|
||||
|
||||
describe("tryFrontmatterFastPath — fallback: invalid frontmatter", () => {
|
||||
test("returns null when confidence is out of range [0, 1]", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
|
||||
|
||||
const raw = "---\nstatus: done\nconfidence: 1.5\nscope: role\n---\n\nBody.";
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("returns null when next contains whitespace", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(FRONTMATTER_SCHEMA);
|
||||
|
||||
const raw = "---\nstatus: done\nnext: some role\nscope: role\n---\n\nBody.";
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ── Fallback: schema mismatch ─────────────────────────────────────────────────
|
||||
|
||||
describe("tryFrontmatterFastPath — fallback: schema mismatch", () => {
|
||||
test("returns null when outputSchema requires fields not in frontmatter", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(STRICT_SCHEMA);
|
||||
|
||||
const raw = "---\nstatus: done\nscope: role\n---\n\nBody.";
|
||||
const raw = "---\nstatus: done\n---\n\nBody.";
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).toBeNull();
|
||||
@@ -194,7 +203,7 @@ describe("tryFrontmatterFastPath — role-specific fields", () => {
|
||||
test("returns null when required role-specific field is missing", async () => {
|
||||
const { store, schemaHash } = await makeStoreWithSchema(REVIEWER_SCHEMA);
|
||||
|
||||
const raw = "---\nstatus: done\nscope: role\n---\n\nBody.";
|
||||
const raw = "---\nstatus: done\n---\n\nBody.";
|
||||
|
||||
const result = await tryFrontmatterFastPath(raw, schemaHash, store);
|
||||
expect(result).toBeNull();
|
||||
|
||||
@@ -0,0 +1,45 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
||||
|
||||
describe("parseArgv empty prompt error message", () => {
|
||||
let stderrOutput: string;
|
||||
let _exitCode: number | null;
|
||||
const originalExit = process.exit;
|
||||
const originalStderrWrite = process.stderr.write;
|
||||
|
||||
beforeEach(() => {
|
||||
stderrOutput = "";
|
||||
_exitCode = null;
|
||||
process.exit = ((code?: number) => {
|
||||
_exitCode = code ?? 1;
|
||||
throw new Error("process.exit called");
|
||||
}) as any;
|
||||
process.stderr.write = ((chunk: string) => {
|
||||
stderrOutput += chunk;
|
||||
return true;
|
||||
}) as any;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
process.exit = originalExit;
|
||||
process.stderr.write = originalStderrWrite;
|
||||
});
|
||||
|
||||
test("empty prompt produces error message mentioning template variables", async () => {
|
||||
const { parseArgv } = await import("../run.js");
|
||||
const argv = [
|
||||
"node",
|
||||
"uwf-hermes",
|
||||
"--thread",
|
||||
"01ABCDEFGHIJKLMNOPQRSTUVWX",
|
||||
"--role",
|
||||
"classifier",
|
||||
"--prompt",
|
||||
"",
|
||||
];
|
||||
|
||||
expect(() => parseArgv(argv)).toThrow("process.exit called");
|
||||
expect(stderrOutput).toContain("prompt");
|
||||
expect(stderrOutput).toContain("empty");
|
||||
expect(stderrOutput).toContain("template");
|
||||
});
|
||||
});
|
||||
@@ -130,6 +130,7 @@ async function buildHistory(
|
||||
edgePrompt: step.edgePrompt ?? "",
|
||||
startedAtMs: step.startedAtMs,
|
||||
completedAtMs: step.completedAtMs,
|
||||
cwd: step.cwd ?? "",
|
||||
content,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ import { extractSchemaFields } from "./build-output-format-instruction.js";
|
||||
|
||||
const log = createLogger({ sink: { kind: "stderr" } });
|
||||
|
||||
const STANDARD_KEYS = ["status", "next", "confidence", "artifacts", "scope"] as const;
|
||||
const STANDARD_KEYS = ["status"] as const;
|
||||
|
||||
type StandardKey = (typeof STANDARD_KEYS)[number];
|
||||
|
||||
@@ -62,10 +62,6 @@ function parseRawFrontmatterFields(raw: string): Record<string, unknown> {
|
||||
function defaultCandidate(frontmatter: AgentFrontmatter): Record<string, unknown> {
|
||||
return {
|
||||
status: frontmatter.status,
|
||||
next: frontmatter.next,
|
||||
confidence: frontmatter.confidence,
|
||||
artifacts: [...frontmatter.artifacts],
|
||||
scope: frontmatter.scope,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -73,14 +69,6 @@ function pickStandardField(frontmatter: AgentFrontmatter, key: StandardKey): unk
|
||||
switch (key) {
|
||||
case "status":
|
||||
return frontmatter.status;
|
||||
case "next":
|
||||
return frontmatter.next;
|
||||
case "confidence":
|
||||
return frontmatter.confidence;
|
||||
case "artifacts":
|
||||
return [...frontmatter.artifacts];
|
||||
case "scope":
|
||||
return frontmatter.scope;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,9 +86,6 @@ function pickFieldValue(
|
||||
}
|
||||
|
||||
const coerced = pickStandardField(frontmatter, field);
|
||||
if (field === "artifacts" || field === "scope") {
|
||||
return coerced;
|
||||
}
|
||||
if (coerced !== null) {
|
||||
return coerced;
|
||||
}
|
||||
@@ -110,8 +95,8 @@ function pickFieldValue(
|
||||
/**
|
||||
* Build a CAS candidate object from schema property keys and parsed frontmatter.
|
||||
*
|
||||
* When the schema has no inspectable properties, falls back to the five standard
|
||||
* agent frontmatter fields for backward compatibility.
|
||||
* When the schema has no inspectable properties, falls back to the standard
|
||||
* agent frontmatter field (status only).
|
||||
*/
|
||||
function buildCandidate(
|
||||
frontmatter: AgentFrontmatter,
|
||||
|
||||
@@ -11,7 +11,7 @@ export {
|
||||
} from "./extract.js";
|
||||
export type { FrontmatterFastPathResult } from "./frontmatter.js";
|
||||
export { tryFrontmatterFastPath } from "./frontmatter.js";
|
||||
export { createAgent } from "./run.js";
|
||||
export { createAgent, parseArgv } from "./run.js";
|
||||
export { getCachedSessionId, getCachePath, setCachedSessionId } from "./session-cache.js";
|
||||
export { getConfigPath, getEnvPath, loadWorkflowConfig, resolveStorageRoot } from "./storage.js";
|
||||
export type {
|
||||
|
||||
@@ -32,13 +32,16 @@ function getNamedArg(argv: string[], name: string): string {
|
||||
return argv[idx + 1];
|
||||
}
|
||||
|
||||
function parseArgv(argv: string[]): { threadId: ThreadId; role: string; prompt: string } {
|
||||
export function parseArgv(argv: string[]): { threadId: ThreadId; role: string; prompt: string } {
|
||||
const threadId = getNamedArg(argv, "--thread");
|
||||
const role = getNamedArg(argv, "--role");
|
||||
const prompt = getNamedArg(argv, "--prompt");
|
||||
if (threadId === "") fail(USAGE);
|
||||
if (role === "") fail(USAGE);
|
||||
if (prompt === "") fail(USAGE);
|
||||
if (prompt === "")
|
||||
fail(
|
||||
`--prompt is empty. If this agent was spawned by uwf, the edge prompt template may have unresolved variables. ${USAGE}`,
|
||||
);
|
||||
return { threadId: threadId as ThreadId, role, prompt };
|
||||
}
|
||||
|
||||
@@ -72,6 +75,7 @@ async function writeStepNode(options: {
|
||||
edgePrompt: options.edgePrompt,
|
||||
startedAtMs: options.startedAtMs,
|
||||
completedAtMs: options.completedAtMs,
|
||||
cwd: process.cwd(),
|
||||
};
|
||||
const hash = await options.store.put(options.schemas.stepNode, payload);
|
||||
const node = options.store.get(hash);
|
||||
|
||||
@@ -41,31 +41,13 @@ describe("parseFrontmatterMarkdown", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("full frontmatter document", () => {
|
||||
it("parses all fields from a well-formed document", () => {
|
||||
const raw = `---
|
||||
status: done
|
||||
next: reviewer
|
||||
confidence: 0.9
|
||||
artifacts:
|
||||
- src/foo.ts
|
||||
- src/bar.ts
|
||||
scope: thread
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
Everything looks good.`;
|
||||
|
||||
describe("status-only frontmatter", () => {
|
||||
it("parses status-only frontmatter", () => {
|
||||
const raw = "---\nstatus: done\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter).not.toBeNull();
|
||||
const fm = result.frontmatter!;
|
||||
expect(fm.status).toBe("done");
|
||||
expect(fm.next).toBe("reviewer");
|
||||
expect(fm.confidence).toBe(0.9);
|
||||
expect(fm.artifacts).toEqual(["src/foo.ts", "src/bar.ts"]);
|
||||
expect(fm.scope).toBe("thread");
|
||||
expect(result.body).toBe("## Summary\n\nEverything looks good.");
|
||||
expect(result.frontmatter).toEqual({ status: "done" });
|
||||
expect(result.body).toBe("body");
|
||||
});
|
||||
|
||||
it("strips leading newline from body", () => {
|
||||
@@ -87,6 +69,22 @@ Everything looks good.`;
|
||||
});
|
||||
});
|
||||
|
||||
describe("ignores legacy fields", () => {
|
||||
it("legacy fields next/confidence/artifacts/scope are NOT present on result", () => {
|
||||
const raw =
|
||||
"---\nstatus: done\nnext: reviewer\nconfidence: 0.9\nartifacts:\n - src/foo.ts\nscope: thread\n---\n\nBody.";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter).not.toBeNull();
|
||||
const fm = result.frontmatter!;
|
||||
expect(fm.status).toBe("done");
|
||||
// Legacy fields must not exist on the object at all
|
||||
expect("next" in fm).toBe(false);
|
||||
expect("confidence" in fm).toBe(false);
|
||||
expect("artifacts" in fm).toBe(false);
|
||||
expect("scope" in fm).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("status field", () => {
|
||||
it.each([
|
||||
"done",
|
||||
@@ -106,109 +104,18 @@ Everything looks good.`;
|
||||
});
|
||||
|
||||
it("returns null status when omitted", () => {
|
||||
const raw = "---\nconfidence: 0.5\n---\nbody";
|
||||
const raw = "---\nfoo: bar\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.status).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("confidence field", () => {
|
||||
it("parses integer as number", () => {
|
||||
const raw = "---\nconfidence: 1\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.confidence).toBe(1);
|
||||
});
|
||||
|
||||
it("parses decimal", () => {
|
||||
const raw = "---\nconfidence: 0.75\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.confidence).toBe(0.75);
|
||||
});
|
||||
|
||||
it("returns null when omitted", () => {
|
||||
const raw = "---\nstatus: done\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.confidence).toBeNull();
|
||||
});
|
||||
|
||||
it("returns null for non-numeric value", () => {
|
||||
const raw = "---\nconfidence: high\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.confidence).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("artifacts field", () => {
|
||||
it("parses block sequence", () => {
|
||||
const raw = "---\nartifacts:\n - a.ts\n - b.ts\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.artifacts).toEqual(["a.ts", "b.ts"]);
|
||||
});
|
||||
|
||||
it("parses inline sequence", () => {
|
||||
const raw = "---\nartifacts: [a.ts, b.ts]\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.artifacts).toEqual(["a.ts", "b.ts"]);
|
||||
});
|
||||
|
||||
it("returns empty array when omitted", () => {
|
||||
const raw = "---\nstatus: done\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.artifacts).toEqual([]);
|
||||
});
|
||||
|
||||
it("wraps single scalar in array", () => {
|
||||
const raw = "---\nartifacts: only-one.ts\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.artifacts).toEqual(["only-one.ts"]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("scope field", () => {
|
||||
it('parses scope "role"', () => {
|
||||
const raw = "---\nscope: role\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.scope).toBe("role");
|
||||
});
|
||||
|
||||
it('parses scope "thread"', () => {
|
||||
const raw = "---\nscope: thread\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.scope).toBe("thread");
|
||||
});
|
||||
|
||||
it('defaults to "role" when omitted', () => {
|
||||
const raw = "---\nstatus: done\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.scope).toBe("role");
|
||||
});
|
||||
|
||||
it('defaults to "role" for unknown scope value', () => {
|
||||
const raw = "---\nscope: global\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.scope).toBe("role");
|
||||
});
|
||||
});
|
||||
|
||||
describe("next field", () => {
|
||||
it("parses a role name", () => {
|
||||
const raw = "---\nnext: planner\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.next).toBe("planner");
|
||||
});
|
||||
|
||||
it("returns null when omitted", () => {
|
||||
const raw = "---\nstatus: done\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.next).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe("unknown fields", () => {
|
||||
it("ignores unknown keys silently", () => {
|
||||
const raw = "---\nunknown_field: some_value\nstatus: done\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter?.status).toBe("done");
|
||||
expect(Object.keys(result.frontmatter!)).toEqual(["status"]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -221,123 +128,58 @@ Everything looks good.`;
|
||||
});
|
||||
|
||||
describe("empty frontmatter block", () => {
|
||||
it("parses empty frontmatter and uses all defaults", () => {
|
||||
it("parses empty frontmatter with status null", () => {
|
||||
const raw = "---\n---\nbody";
|
||||
const result = parseFrontmatterMarkdown(raw);
|
||||
expect(result.frontmatter).not.toBeNull();
|
||||
const fm = result.frontmatter!;
|
||||
expect(fm.status).toBeNull();
|
||||
expect(fm.next).toBeNull();
|
||||
expect(fm.confidence).toBeNull();
|
||||
expect(fm.artifacts).toEqual([]);
|
||||
expect(fm.scope).toBe("role");
|
||||
expect(Object.keys(fm)).toEqual(["status"]);
|
||||
expect(result.body).toBe("body");
|
||||
});
|
||||
});
|
||||
|
||||
describe("AgentFrontmatter has exactly one field", () => {
|
||||
it("has only status key", () => {
|
||||
const fm: AgentFrontmatter = { status: null };
|
||||
expect(Object.keys(fm)).toEqual(["status"]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("FrontmatterValidationError only has status variant", () => {
|
||||
it("status variant is valid", () => {
|
||||
const err: import("../src/index.js").FrontmatterValidationError = {
|
||||
field: "status",
|
||||
message: "test",
|
||||
};
|
||||
expect(err.field).toBe("status");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ── validateFrontmatter ──────────────────────────────────────────────────────
|
||||
|
||||
function validFm(overrides: Partial<AgentFrontmatter> = {}): AgentFrontmatter {
|
||||
return {
|
||||
status: "done",
|
||||
next: null,
|
||||
confidence: null,
|
||||
artifacts: [],
|
||||
scope: "role",
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("validateFrontmatter", () => {
|
||||
it("returns no errors for a fully valid frontmatter", () => {
|
||||
const errors = validateFrontmatter(validFm());
|
||||
it("returns no errors for a valid status", () => {
|
||||
const errors = validateFrontmatter({ status: "done" });
|
||||
expect(errors).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("returns no errors when all nullable fields are null", () => {
|
||||
const fm: AgentFrontmatter = {
|
||||
status: null,
|
||||
next: null,
|
||||
confidence: null,
|
||||
artifacts: [],
|
||||
scope: "role",
|
||||
};
|
||||
it("returns no errors when status is null", () => {
|
||||
const errors = validateFrontmatter({ status: null });
|
||||
expect(errors).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("returns error for invalid status", () => {
|
||||
const errors = validateFrontmatter({ status: "bogus" as never });
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0]?.field).toBe("status");
|
||||
});
|
||||
|
||||
it("no validation for next/confidence/artifacts/scope — fields do not exist", () => {
|
||||
// AgentFrontmatter only has status — verify at runtime
|
||||
const fm: AgentFrontmatter = { status: "done" };
|
||||
expect(Object.keys(fm)).toEqual(["status"]);
|
||||
expect(validateFrontmatter(fm)).toHaveLength(0);
|
||||
});
|
||||
|
||||
describe("confidence validation", () => {
|
||||
it("accepts 0.0", () => {
|
||||
expect(validateFrontmatter(validFm({ confidence: 0 }))).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("accepts 1.0", () => {
|
||||
expect(validateFrontmatter(validFm({ confidence: 1 }))).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("rejects value below 0", () => {
|
||||
const errors = validateFrontmatter(validFm({ confidence: -0.1 }));
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0]?.field).toBe("confidence");
|
||||
});
|
||||
|
||||
it("rejects value above 1", () => {
|
||||
const errors = validateFrontmatter(validFm({ confidence: 1.01 }));
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0]?.field).toBe("confidence");
|
||||
});
|
||||
});
|
||||
|
||||
describe("next validation", () => {
|
||||
it("accepts a simple role name", () => {
|
||||
expect(validateFrontmatter(validFm({ next: "reviewer" }))).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("accepts kebab-case role name", () => {
|
||||
expect(validateFrontmatter(validFm({ next: "code-reviewer" }))).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("rejects role name with whitespace", () => {
|
||||
const errors = validateFrontmatter(validFm({ next: "role name" }));
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0]?.field).toBe("next");
|
||||
});
|
||||
});
|
||||
|
||||
describe("artifacts validation", () => {
|
||||
it("accepts non-empty path strings", () => {
|
||||
expect(
|
||||
validateFrontmatter(validFm({ artifacts: ["src/foo.ts", "src/bar.ts"] })),
|
||||
).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("rejects empty string artifact entries", () => {
|
||||
const errors = validateFrontmatter(validFm({ artifacts: [""] }));
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0]?.field).toBe("artifacts");
|
||||
});
|
||||
|
||||
it("rejects whitespace-only artifact entries", () => {
|
||||
const errors = validateFrontmatter(validFm({ artifacts: [" "] }));
|
||||
expect(errors).toHaveLength(1);
|
||||
expect(errors[0]?.field).toBe("artifacts");
|
||||
});
|
||||
});
|
||||
|
||||
describe("multiple errors", () => {
|
||||
it("reports multiple violations at once", () => {
|
||||
const fm: AgentFrontmatter = {
|
||||
status: "done",
|
||||
next: "bad role",
|
||||
confidence: 2,
|
||||
artifacts: [""],
|
||||
scope: "role",
|
||||
};
|
||||
const errors = validateFrontmatter(fm);
|
||||
const fields = errors.map((e) => e.field);
|
||||
expect(fields).toContain("next");
|
||||
expect(fields).toContain("confidence");
|
||||
expect(fields).toContain("artifacts");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -0,0 +1,163 @@
|
||||
export function generateAdapterReference(): string {
|
||||
return `# Adapter Reference
|
||||
|
||||
Guide for building a new agent adapter (CLI binary) for the workflow engine.
|
||||
|
||||
## What Is an Adapter
|
||||
|
||||
An adapter is a CLI command (e.g. \`uwf-hermes\`, \`uwf-builtin\`) that the engine spawns to execute a role. It bridges the workflow engine and an LLM/agent backend. The engine calls it with:
|
||||
|
||||
\`\`\`
|
||||
uwf-<name> --thread <id> --role <role> --prompt <text>
|
||||
\`\`\`
|
||||
|
||||
The adapter must produce frontmatter markdown output. The engine handles argument parsing, context building, output extraction, and CAS persistence — you just implement the LLM interaction.
|
||||
|
||||
## Quick Start
|
||||
|
||||
\`\`\`typescript
|
||||
import { createAgent } from "@uncaged/workflow-util-agent";
|
||||
import type { AgentContext, AgentRunResult, AgentContinueFn, AgentRunFn } from "@uncaged/workflow-util-agent";
|
||||
|
||||
const run: AgentRunFn = async (ctx: AgentContext): Promise<AgentRunResult> => {
|
||||
// 1. Build your prompt from ctx
|
||||
// 2. Call your LLM backend
|
||||
// 3. Return the result
|
||||
return { output: rawMarkdown, detailHash, sessionId };
|
||||
};
|
||||
|
||||
const continue_: AgentContinueFn = async (sessionId, message, store) => {
|
||||
// Resume an existing session with a correction message
|
||||
return { output: correctedMarkdown, detailHash, sessionId };
|
||||
};
|
||||
|
||||
const main = createAgent({ name: "my-agent", run, continue: continue_ });
|
||||
main();
|
||||
\`\`\`
|
||||
|
||||
## The \`createAgent\` Factory
|
||||
|
||||
\`createAgent(options)\` returns an async \`main()\` function that handles the full lifecycle:
|
||||
|
||||
1. Parses CLI args (\`--thread\`, \`--role\`, \`--prompt\`)
|
||||
2. Loads \`.env\` from storage root
|
||||
3. Builds \`AgentContext\` (thread history, workflow definition, role prompt)
|
||||
4. Injects \`outputFormatInstruction\` from the role's frontmatter schema
|
||||
5. Calls your \`run(ctx)\` function
|
||||
6. Extracts frontmatter from your output via \`tryFrontmatterFastPath()\`
|
||||
7. If extraction fails, calls your \`continue(sessionId, correctionMessage, store)\` up to 2 times
|
||||
8. Persists the validated output as a CAS step node
|
||||
9. Prints the step hash to stdout
|
||||
|
||||
You only implement \`run\` and \`continue\`.
|
||||
|
||||
## AgentOptions
|
||||
|
||||
\`\`\`typescript
|
||||
type AgentOptions = {
|
||||
name: string; // Adapter name (used in step records as "uwf-<name>")
|
||||
run: AgentRunFn; // Execute a role from scratch
|
||||
continue: AgentContinueFn; // Resume a session for frontmatter correction
|
||||
};
|
||||
\`\`\`
|
||||
|
||||
## AgentContext
|
||||
|
||||
The \`ctx\` object passed to your \`run\` function:
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| \`threadId\` | \`string\` | Thread ULID |
|
||||
| \`role\` | \`string\` | Role name being executed |
|
||||
| \`edgePrompt\` | \`string\` | Moderator's task instruction for this step |
|
||||
| \`workflow\` | \`WorkflowPayload\` | Full workflow definition (roles, graph) |
|
||||
| \`start\` | \`StartNodePayload\` | Thread start data (workflow hash, user prompt) |
|
||||
| \`steps\` | \`StepContext[]\` | Previous steps with expanded outputs |
|
||||
| \`store\` | \`Store\` | CAS store for reading/writing data |
|
||||
| \`outputFormatInstruction\` | \`string\` | Frontmatter format instruction (inject into system prompt) |
|
||||
| \`isFirstVisit\` | \`boolean\` | True if this role hasn't run before in this thread |
|
||||
|
||||
## AgentRunResult
|
||||
|
||||
Your \`run\` and \`continue\` functions must return:
|
||||
|
||||
\`\`\`typescript
|
||||
type AgentRunResult = {
|
||||
output: string; // Raw markdown with frontmatter (must start with ---)
|
||||
detailHash: string; // CAS hash of session detail (turn history, metadata)
|
||||
sessionId: string; // Session ID for potential continue() calls
|
||||
};
|
||||
\`\`\`
|
||||
|
||||
## Building the Prompt
|
||||
|
||||
Use helpers from \`@uncaged/workflow-util-agent\`:
|
||||
|
||||
| Helper | Purpose |
|
||||
|--------|---------|
|
||||
| \`buildRolePrompt(roleDef)\` | Assemble Goal/Capabilities/Prepare/Procedure/Output sections |
|
||||
| \`buildContinuationPrompt(steps, role, edgePrompt)\` | For re-entry: steps since last visit + edge prompt |
|
||||
| \`ctx.outputFormatInstruction\` | Pre-built frontmatter format block (inject into system prompt) |
|
||||
|
||||
Typical system prompt structure:
|
||||
\`\`\`
|
||||
[outputFormatInstruction]
|
||||
[rolePrompt from buildRolePrompt()]
|
||||
[workflow metadata]
|
||||
\`\`\`
|
||||
|
||||
## Storing Session Detail
|
||||
|
||||
Store your turn history as a CAS merkle DAG for debugging and replay:
|
||||
|
||||
\`\`\`typescript
|
||||
// Store each turn as a CAS text node
|
||||
const turnHash = await store.put(textSchema, { content: turnData });
|
||||
|
||||
// Build a detail node referencing all turns
|
||||
const detailHash = await store.put(detailSchema, { turns: turnHashes });
|
||||
\`\`\`
|
||||
|
||||
The \`detailHash\` is preserved from the first \`run()\` call — retry \`continue()\` calls don't overwrite it.
|
||||
|
||||
## Registration
|
||||
|
||||
Register your adapter in \`~/.uncaged/workflow/config.yaml\`:
|
||||
|
||||
\`\`\`yaml
|
||||
agents:
|
||||
my-agent:
|
||||
command: uwf-my-agent
|
||||
args: []
|
||||
\`\`\`
|
||||
|
||||
Use it:
|
||||
\`\`\`bash
|
||||
uwf thread exec <thread-id> --agent my-agent
|
||||
\`\`\`
|
||||
|
||||
Or set as default:
|
||||
\`\`\`yaml
|
||||
defaultAgent: my-agent
|
||||
\`\`\`
|
||||
|
||||
## Existing Adapters
|
||||
|
||||
| Adapter | Package | Backend |
|
||||
|---------|---------|---------|
|
||||
| \`uwf-hermes\` | \`@uncaged/workflow-agent-hermes\` | Hermes ACP (chat sessions) |
|
||||
| \`uwf-builtin\` | \`@uncaged/workflow-agent-builtin\` | Direct OpenAI API (tools + loop) |
|
||||
| \`uwf-claude-code\` | \`@uncaged/workflow-agent-claude-code\` | Claude Code CLI |
|
||||
|
||||
Study these for patterns on prompt building, session management, and detail storage.
|
||||
|
||||
## Checklist
|
||||
|
||||
1. Implement \`run(ctx)\` — build prompt, call LLM, return output + detailHash + sessionId
|
||||
2. Implement \`continue(sessionId, message, store)\` — resume session for frontmatter correction
|
||||
3. Store session detail as CAS nodes (for debugging)
|
||||
4. Ensure output starts with \`---\` frontmatter block
|
||||
5. Add a \`bin\` entry in \`package.json\` for the CLI command
|
||||
6. Register in config.yaml and test with \`uwf thread exec --agent <name>\`
|
||||
`;
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
export function generateAuthorReference(): string {
|
||||
return `# Author Reference
|
||||
|
||||
Guide for designing and writing workflow YAML definitions.
|
||||
|
||||
## Workflow Structure
|
||||
|
||||
\`\`\`yaml
|
||||
name: solve-issue # verb-first kebab-case
|
||||
description: "..." # human-readable summary
|
||||
|
||||
roles: # named actors
|
||||
planner:
|
||||
description: "..." # short purpose
|
||||
goal: "..." # system-level goal for the agent
|
||||
capabilities: [...] # skill keywords the agent should load
|
||||
procedure: | # step-by-step instructions
|
||||
1. Do this
|
||||
2. Do that
|
||||
output: "..." # what the agent should produce
|
||||
frontmatter: # JSON Schema for structured output
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "ready" }
|
||||
plan: { type: string }
|
||||
required: [$status, plan]
|
||||
- properties:
|
||||
$status: { const: "failed" }
|
||||
error: { type: string }
|
||||
required: [$status, error]
|
||||
|
||||
graph: # status-based routing
|
||||
$START:
|
||||
_: { role: planner, prompt: "Analyze the issue." }
|
||||
planner:
|
||||
ready: { role: developer, prompt: "Implement {{{plan}}}." }
|
||||
failed: { role: $END, prompt: "Failed: {{{error}}}" }
|
||||
\`\`\`
|
||||
|
||||
## Role Definition
|
||||
|
||||
| Field | Purpose |
|
||||
|-------|---------|
|
||||
| \`description\` | Short description for humans and moderator context |
|
||||
| \`goal\` | Injected as the agent's system-level objective |
|
||||
| \`capabilities\` | Keyword tags — agent loads matching skills before starting |
|
||||
| \`procedure\` | Step-by-step instructions the agent follows |
|
||||
| \`output\` | Describes what to produce and which \`$status\` values to use |
|
||||
| \`frontmatter\` | JSON Schema defining the structured output fields |
|
||||
|
||||
### Role Design Principles
|
||||
|
||||
- **Single responsibility** — each role does one thing well
|
||||
- **Minimal context** — don't overload a role with too many steps; split if needed
|
||||
- **Clear status values** — each status should map to a distinct graph edge
|
||||
- **Explicit output** — tell the agent exactly what \`$status\` values are valid
|
||||
|
||||
## Frontmatter Schema
|
||||
|
||||
The \`frontmatter\` field is a standard JSON Schema. It defines the structured fields the agent must output in YAML frontmatter.
|
||||
|
||||
### \`$status\` Field
|
||||
|
||||
\`$status\` is the only standard field. Its value determines which graph edge the moderator follows. Use \`const\` to constrain each variant:
|
||||
|
||||
\`\`\`yaml
|
||||
frontmatter:
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "done" }
|
||||
result: { type: string }
|
||||
required: [$status, result]
|
||||
- properties:
|
||||
$status: { const: "failed" }
|
||||
error: { type: string }
|
||||
required: [$status, error]
|
||||
\`\`\`
|
||||
|
||||
### Custom Fields
|
||||
|
||||
Add any fields you need for data passing between roles. These are available in edge prompts via Mustache templates.
|
||||
|
||||
### Flat Schema (Single Status)
|
||||
|
||||
When a role has only one outcome:
|
||||
|
||||
\`\`\`yaml
|
||||
frontmatter:
|
||||
properties:
|
||||
$status: { const: "done" }
|
||||
summary: { type: string }
|
||||
required: [$status, summary]
|
||||
\`\`\`
|
||||
|
||||
## Graph Routing
|
||||
|
||||
The graph maps each role's \`$status\` values to the next role:
|
||||
|
||||
\`\`\`
|
||||
graph[role][$status] → { role: nextRole, prompt: edgePrompt }
|
||||
\`\`\`
|
||||
|
||||
### Special Nodes
|
||||
|
||||
| Node | Purpose |
|
||||
|------|---------|
|
||||
| \`$START\` | Entry point — status key is always \`_\` (unconditional) |
|
||||
| \`$END\` | Terminal — thread completes and is archived |
|
||||
|
||||
### Edge Prompts
|
||||
|
||||
Use triple-brace Mustache (\`{{{field}}}\`) to pass data from the previous step's output:
|
||||
|
||||
\`\`\`yaml
|
||||
graph:
|
||||
planner:
|
||||
ready: { role: developer, prompt: "Implement plan {{{plan}}} in {{{repoPath}}}." }
|
||||
\`\`\`
|
||||
|
||||
The fields referenced must exist in the source role's frontmatter schema.
|
||||
|
||||
### Loops and Branching
|
||||
|
||||
Roles can route back to previous roles (loops) or to different roles based on status (branching):
|
||||
|
||||
\`\`\`yaml
|
||||
graph:
|
||||
reviewer:
|
||||
approved: { role: tester, prompt: "Run tests." }
|
||||
rejected: { role: developer, prompt: "Fix: {{{comments}}}" } # loop back
|
||||
\`\`\`
|
||||
|
||||
### Fail Routing
|
||||
|
||||
Route failures to a cleanup role or \`$END\`:
|
||||
|
||||
\`\`\`yaml
|
||||
graph:
|
||||
developer:
|
||||
done: { role: reviewer, prompt: "Review changes." }
|
||||
failed: { role: cleanup, prompt: "Clean up: {{{error}}}" }
|
||||
\`\`\`
|
||||
|
||||
## Self-Testing
|
||||
|
||||
### Step-by-Step Verification
|
||||
|
||||
\`\`\`bash
|
||||
# Start a thread directly from YAML file (no registration needed)
|
||||
uwf thread start my-workflow.yaml -p "Test prompt"
|
||||
|
||||
# Or register first, then start by name
|
||||
uwf workflow add my-workflow.yaml
|
||||
uwf thread start my-workflow -p "Test prompt"
|
||||
|
||||
# Execute one step at a time to verify routing
|
||||
uwf thread exec <thread-id>
|
||||
|
||||
# Inspect step output
|
||||
uwf step list <thread-id>
|
||||
uwf step show <step-hash>
|
||||
|
||||
# Check the CAS data
|
||||
uwf cas get <output-hash>
|
||||
\`\`\`
|
||||
|
||||
### Validation Checklist
|
||||
|
||||
1. Every \`$status\` value in a role's frontmatter has a matching edge in the graph
|
||||
2. Every field referenced in edge prompts (\`{{{field}}}\`) exists in the source role's schema
|
||||
3. Every role referenced in the graph exists in \`roles\`
|
||||
4. \`$START\` has exactly one edge with key \`_\`
|
||||
5. At least one path leads to \`$END\`
|
||||
6. No orphan roles (defined but never routed to)
|
||||
|
||||
## Common Pitfalls
|
||||
|
||||
- **Missing graph edge** — if a role can produce \`$status: failed\` but the graph has no \`failed\` edge, the moderator will error
|
||||
- **Mustache field mismatch** — referencing \`{{{branch}}}\` in an edge prompt but the source schema has \`branchName\` instead
|
||||
- **Overly complex roles** — a role with 20 steps should be split; each role should be completable in one agent turn
|
||||
- **No fail path** — always handle failure; route to cleanup or \`$END\`
|
||||
`;
|
||||
}
|
||||
@@ -0,0 +1,140 @@
|
||||
export function generateDeveloperReference(): string {
|
||||
return `# Developer Reference
|
||||
|
||||
Guide for contributing to the workflow engine codebase.
|
||||
|
||||
## Monorepo Structure
|
||||
|
||||
\`\`\`
|
||||
packages/
|
||||
workflow-protocol/ # Shared types (WorkflowPayload, StepNodePayload, etc.)
|
||||
workflow-util/ # Base32, ULID, logger, frontmatter parsing, skill references
|
||||
workflow-util-agent/ # createAgent factory, context builder, extract pipeline
|
||||
workflow-agent-hermes/ # uwf-hermes CLI (spawns Hermes chat sessions)
|
||||
workflow-agent-builtin/ # uwf-builtin CLI (direct LLM calls via OpenAI API)
|
||||
cli-workflow/ # uwf CLI (moderator, thread/step/cas/config commands)
|
||||
\`\`\`
|
||||
|
||||
Dependency layers (each only imports from packages above it):
|
||||
\`\`\`
|
||||
protocol → util → util-agent → agent-hermes / agent-builtin / cli-workflow
|
||||
\`\`\`
|
||||
|
||||
External CAS: \`@uncaged/json-cas\` (store API, hashing, schema validation) + \`@uncaged/json-cas-fs\` (filesystem backend).
|
||||
|
||||
## Coding Conventions
|
||||
|
||||
### Functional-first
|
||||
|
||||
| Rule | Description |
|
||||
|------|-------------|
|
||||
| \`type\` over \`interface\` | All type definitions use \`type\` |
|
||||
| \`function\` over \`class\` | Pure functions + closures, no class |
|
||||
| No \`this\` | Functions must not depend on \`this\` context |
|
||||
| No inheritance | No \`extends\`, \`implements\`, \`abstract\` |
|
||||
| No optional properties | Use \`T \\| null\` instead of \`?:\` |
|
||||
| Immutability first | Use \`Readonly<T>\`, \`as const\`, avoid mutation |
|
||||
|
||||
Classes allowed only when required by third-party libraries or for Error subclasses.
|
||||
|
||||
### Error Handling
|
||||
|
||||
- \`Result<T, E>\` type for expected failures (\`ok\`/\`err\` constructors from \`@uncaged/workflow-util\`)
|
||||
- \`throw\` only for unrecoverable bugs
|
||||
- No try-catch for flow control
|
||||
|
||||
### Async
|
||||
|
||||
Always \`async/await\`, never \`.then()\` chains.
|
||||
|
||||
### Logging
|
||||
|
||||
\`console.*\` is banned (Biome \`noConsole\` rule). Use the structured logger:
|
||||
|
||||
\`\`\`typescript
|
||||
import { createLogger } from "@uncaged/workflow-util";
|
||||
const log = createLogger();
|
||||
log("4KNMR2PX", "Loading workflow..."); // 8-char Crockford Base32 tag
|
||||
\`\`\`
|
||||
|
||||
Each call site gets a unique hand-written tag. \`grep "4KNMR2PX"\` in logs → instant code location.
|
||||
|
||||
CLI package (\`@uncaged/cli-workflow\`) may use \`console.log\` for user-facing output with a biome-ignore comment.
|
||||
|
||||
### No Dynamic Import
|
||||
|
||||
No \`await import()\` in production code. Always static top-level \`import\`. Test files are exempt.
|
||||
|
||||
### Naming
|
||||
|
||||
- Workflow names: verb-first kebab-case (\`solve-issue\`, \`review-code\`)
|
||||
- IDs: Crockford Base32 — CAS hash (XXH64, 13-char), Thread ID (ULID, 26-char)
|
||||
|
||||
## Development Workflow
|
||||
|
||||
\`\`\`bash
|
||||
bun install # install all workspace deps
|
||||
bun run build # tsc --build (all packages)
|
||||
bun run check # tsc + biome check + lint-log-tags
|
||||
bun run format # biome format --write
|
||||
bun test # run all tests
|
||||
\`\`\`
|
||||
|
||||
Before committing: \`bun run check\` + \`bun test\` must both pass.
|
||||
|
||||
### Testing
|
||||
|
||||
- \`cli-workflow\`: vitest
|
||||
- Other packages: \`bun test\`
|
||||
- Test files live in \`__tests__/\` directories
|
||||
|
||||
### Publishing
|
||||
|
||||
Fixed-mode versioning — all \`@uncaged/*\` packages share the same version number.
|
||||
|
||||
\`\`\`bash
|
||||
bun changeset # describe the change
|
||||
bun version # bump versions + changelogs
|
||||
bun release # build + test + publish to npmjs
|
||||
\`\`\`
|
||||
|
||||
## Key Modules
|
||||
|
||||
### Moderator (\`cli-workflow/src/moderator/\`)
|
||||
|
||||
Status-based graph evaluator. Reads \`graph[lastRole][output.$status]\` to determine the next role. Zero LLM cost.
|
||||
|
||||
### Extract Pipeline (\`workflow-util-agent/src/\`)
|
||||
|
||||
1. Agent produces frontmatter markdown
|
||||
2. \`parseFrontmatterMarkdown()\` extracts YAML frontmatter
|
||||
3. \`tryFrontmatterFastPath()\` validates against role's output schema
|
||||
4. If fast path fails, retries up to 2 times via agent continue
|
||||
5. Validated output stored as CAS node
|
||||
|
||||
### createAgent Factory (\`workflow-util-agent/src/run.ts\`)
|
||||
|
||||
Shared entry point for all agent CLIs. Handles:
|
||||
- Argument parsing (\`--thread\`, \`--role\`, \`--prompt\`)
|
||||
- Context building (thread history, workflow definition)
|
||||
- Output extraction and CAS persistence
|
||||
- Frontmatter retry loop
|
||||
|
||||
### CAS Integration
|
||||
|
||||
All data is CAS-addressed via \`@uncaged/json-cas\`:
|
||||
- \`store.put(schemaHash, data)\` → content hash
|
||||
- \`store.get(hash)\` → node
|
||||
- \`validate(store, node)\` → schema check
|
||||
- Schemas registered at workflow add time
|
||||
|
||||
## Commit Convention
|
||||
|
||||
\`\`\`
|
||||
<type>(<scope>): <description>
|
||||
|
||||
type: feat | fix | refactor | docs | chore | test
|
||||
scope: workflow | cli | moderator | util-agent | hermes | util | protocol
|
||||
\`\`\`
|
||||
`;
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
import type {
|
||||
AgentFrontmatter,
|
||||
FrontmatterScope,
|
||||
FrontmatterStatus,
|
||||
FrontmatterValidationError,
|
||||
ParsedFrontmatterMarkdown,
|
||||
@@ -159,40 +158,12 @@ function parseMinimalYaml(yaml: string): Record<string, YamlValue> {
|
||||
|
||||
const VALID_STATUS: readonly FrontmatterStatus[] = ["done", "needs_input", "in_progress", "failed"];
|
||||
|
||||
const VALID_SCOPE: readonly FrontmatterScope[] = ["role", "thread"];
|
||||
|
||||
function coerceStatus(raw: YamlValue): FrontmatterStatus | null {
|
||||
if (raw === null || raw === undefined) return null;
|
||||
const s = String(raw).trim().toLowerCase();
|
||||
return VALID_STATUS.includes(s as FrontmatterStatus) ? (s as FrontmatterStatus) : null;
|
||||
}
|
||||
|
||||
function coerceNext(raw: YamlValue): string | null {
|
||||
if (raw === null || raw === undefined) return null;
|
||||
const s = String(raw).trim();
|
||||
return s === "" ? null : s;
|
||||
}
|
||||
|
||||
function coerceConfidence(raw: YamlValue): number | null {
|
||||
if (raw === null || raw === undefined) return null;
|
||||
const n = typeof raw === "number" ? raw : Number(String(raw).trim());
|
||||
if (Number.isNaN(n)) return null;
|
||||
return n;
|
||||
}
|
||||
|
||||
function coerceArtifacts(raw: YamlValue): readonly string[] {
|
||||
if (raw === null || raw === undefined) return [];
|
||||
if (Array.isArray(raw)) return raw.map(String).filter((s) => s !== "");
|
||||
const s = String(raw).trim();
|
||||
return s === "" ? [] : [s];
|
||||
}
|
||||
|
||||
function coerceScope(raw: YamlValue): FrontmatterScope {
|
||||
if (raw === null || raw === undefined) return "role";
|
||||
const s = String(raw).trim().toLowerCase();
|
||||
return VALID_SCOPE.includes(s as FrontmatterScope) ? (s as FrontmatterScope) : "role";
|
||||
}
|
||||
|
||||
// ── Public API ───────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
@@ -220,10 +191,6 @@ export function parseFrontmatterMarkdown(raw: string): ParsedFrontmatterMarkdown
|
||||
|
||||
const frontmatter: AgentFrontmatter = {
|
||||
status: coerceStatus(fields.status ?? null),
|
||||
next: coerceNext(fields.next ?? null),
|
||||
confidence: coerceConfidence(fields.confidence ?? null),
|
||||
artifacts: coerceArtifacts(fields.artifacts ?? null),
|
||||
scope: coerceScope(fields.scope ?? null),
|
||||
};
|
||||
|
||||
return { frontmatter, body };
|
||||
@@ -235,11 +202,7 @@ export function parseFrontmatterMarkdown(raw: string): ParsedFrontmatterMarkdown
|
||||
* An empty array means the frontmatter is valid.
|
||||
*
|
||||
* Validated constraints:
|
||||
* - `status` — must be one of the FrontmatterStatus literals (if non-null)
|
||||
* - `confidence` — must be in [0.0, 1.0] (if non-null)
|
||||
* - `next` — must be a non-empty string with no whitespace (if non-null)
|
||||
* - `artifacts` — each entry must be a non-empty string
|
||||
* - `scope` — must be one of the FrontmatterScope literals
|
||||
* - `status` — must be one of the FrontmatterStatus literals (if non-null)
|
||||
*/
|
||||
export function validateFrontmatter(
|
||||
frontmatter: AgentFrontmatter,
|
||||
@@ -253,39 +216,5 @@ export function validateFrontmatter(
|
||||
});
|
||||
}
|
||||
|
||||
if (frontmatter.confidence !== null) {
|
||||
if (frontmatter.confidence < 0 || frontmatter.confidence > 1) {
|
||||
errors.push({
|
||||
field: "confidence",
|
||||
message: `confidence ${frontmatter.confidence} is out of range; must be between 0.0 and 1.0 inclusive`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (frontmatter.next !== null) {
|
||||
if (frontmatter.next.trim() === "") {
|
||||
errors.push({ field: "next", message: "next must be a non-empty string when present" });
|
||||
} else if (/\s/.test(frontmatter.next)) {
|
||||
errors.push({
|
||||
field: "next",
|
||||
message: `next "${frontmatter.next}" must not contain whitespace`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for (const artifact of frontmatter.artifacts) {
|
||||
if (artifact.trim() === "") {
|
||||
errors.push({ field: "artifacts", message: "artifact entries must be non-empty strings" });
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!VALID_SCOPE.includes(frontmatter.scope)) {
|
||||
errors.push({
|
||||
field: "scope",
|
||||
message: `invalid scope "${frontmatter.scope}"; must be one of: ${VALID_SCOPE.join(", ")}`,
|
||||
});
|
||||
}
|
||||
|
||||
return errors;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
export { parseFrontmatterMarkdown, validateFrontmatter } from "./frontmatter-markdown.js";
|
||||
export type {
|
||||
AgentFrontmatter,
|
||||
FrontmatterScope,
|
||||
FrontmatterStatus,
|
||||
FrontmatterValidationError,
|
||||
ParsedFrontmatterMarkdown,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
* Frontmatter Markdown — agent output format (RFC #351 Phase 1).
|
||||
* Frontmatter Markdown — agent output format.
|
||||
*
|
||||
* An agent response is a Markdown document with an optional YAML frontmatter
|
||||
* block at the top. The frontmatter carries structured signals that the
|
||||
@@ -9,17 +9,12 @@
|
||||
*
|
||||
* ---
|
||||
* status: done
|
||||
* next: reviewer
|
||||
* confidence: 0.9
|
||||
* artifacts:
|
||||
* - src/foo.ts
|
||||
* scope: role
|
||||
* ---
|
||||
*
|
||||
* ... free-form markdown body ...
|
||||
*
|
||||
* All frontmatter fields are optional at the parse level. `validateFrontmatter`
|
||||
* enforces the constraints documented on each field below.
|
||||
* Only `status` is a standard frontmatter field. All other fields are
|
||||
* role-specific and defined by the output schema.
|
||||
*/
|
||||
|
||||
// ── Vocabulary types ─────────────────────────────────────────────────────────
|
||||
@@ -34,20 +29,12 @@
|
||||
*/
|
||||
export type FrontmatterStatus = "done" | "needs_input" | "in_progress" | "failed";
|
||||
|
||||
/**
|
||||
* Scope of frontmatter signals.
|
||||
*
|
||||
* - `role` — signals apply to the current role execution only (default)
|
||||
* - `thread` — signals are suggestions for the entire thread moderator
|
||||
*/
|
||||
export type FrontmatterScope = "role" | "thread";
|
||||
|
||||
// ── Core frontmatter schema ──────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Parsed and validated frontmatter from an agent response.
|
||||
*
|
||||
* All fields use explicit `T | null` (no optional `?:` per convention).
|
||||
* Only `status` is a standard field. All other fields are role-specific.
|
||||
*/
|
||||
export type AgentFrontmatter = {
|
||||
/**
|
||||
@@ -55,32 +42,6 @@ export type AgentFrontmatter = {
|
||||
* Null when omitted — engine treats it as "done" for backward compatibility.
|
||||
*/
|
||||
status: FrontmatterStatus | null;
|
||||
|
||||
/**
|
||||
* Suggested next role name for the moderator.
|
||||
* The moderator is NOT obligated to follow this — it is advisory only.
|
||||
* Null when the agent has no preference.
|
||||
*/
|
||||
next: string | null;
|
||||
|
||||
/**
|
||||
* Agent's self-assessed confidence in its output (0.0 – 1.0 inclusive).
|
||||
* Null when omitted.
|
||||
*/
|
||||
confidence: number | null;
|
||||
|
||||
/**
|
||||
* Relative file paths or CAS hashes the agent considers its primary outputs.
|
||||
* Used for GC ref-tracing and human-readable summaries.
|
||||
* Empty array when omitted (never null — an absent list is an empty list).
|
||||
*/
|
||||
artifacts: readonly string[];
|
||||
|
||||
/**
|
||||
* Scope of the frontmatter signals.
|
||||
* Defaults to "role" when omitted.
|
||||
*/
|
||||
scope: FrontmatterScope;
|
||||
};
|
||||
|
||||
// ── Parse output ─────────────────────────────────────────────────────────────
|
||||
@@ -103,9 +64,4 @@ export type ParsedFrontmatterMarkdown = {
|
||||
|
||||
// ── Validation error ─────────────────────────────────────────────────────────
|
||||
|
||||
export type FrontmatterValidationError =
|
||||
| { field: "status"; message: string }
|
||||
| { field: "next"; message: string }
|
||||
| { field: "confidence"; message: string }
|
||||
| { field: "artifacts"; message: string }
|
||||
| { field: "scope"; message: string };
|
||||
export type FrontmatterValidationError = { field: "status"; message: string };
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
export { generateActorReference } from "./actor-reference.js";
|
||||
export { generateAdapterReference } from "./adapter-reference.js";
|
||||
export { generateArchitectureReference } from "./architecture-reference.js";
|
||||
export { generateAuthorReference } from "./author-reference.js";
|
||||
export { encodeUint64AsCrockford } from "./base32.js";
|
||||
export { generateCliReference } from "./cli-reference.js";
|
||||
export { generateDeveloperReference } from "./developer-reference.js";
|
||||
export { env } from "./env.js";
|
||||
export type {
|
||||
AgentFrontmatter,
|
||||
FrontmatterScope,
|
||||
FrontmatterStatus,
|
||||
FrontmatterValidationError,
|
||||
ParsedFrontmatterMarkdown,
|
||||
@@ -28,4 +30,5 @@ export { err, ok } from "./result.js";
|
||||
export { getDefaultWorkflowStorageRoot, getGlobalCasDir } from "./storage-root.js";
|
||||
export type { LogFn, Result } from "./types.js";
|
||||
export { extractUlidTimestamp, generateUlid } from "./ulid.js";
|
||||
export { generateUserReference } from "./user-reference.js";
|
||||
export { generateYamlReference } from "./yaml-reference.js";
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
export function generateUserReference(): string {
|
||||
return `# User Reference
|
||||
|
||||
Guide for using the uwf CLI to manage workflows and threads.
|
||||
|
||||
## Quick Start
|
||||
|
||||
\`\`\`bash
|
||||
# 1. Configure provider and model
|
||||
uwf setup
|
||||
|
||||
# 2. Register a workflow
|
||||
uwf workflow add my-workflow.yaml
|
||||
|
||||
# 3. Start a thread (creates but does not execute)
|
||||
uwf thread start my-workflow -p "Build a login page"
|
||||
|
||||
# 4. Execute the thread (runs moderator → agent → extract cycles)
|
||||
uwf thread exec <thread-id> # one step
|
||||
uwf thread exec <thread-id> -c 10 # up to 10 steps
|
||||
uwf thread exec <thread-id> -c 10 --background # run in background
|
||||
\`\`\`
|
||||
|
||||
## Concepts
|
||||
|
||||
- **Workflow** — YAML definition with roles and a routing graph; stored as a CAS node
|
||||
- **Thread** — A running instance of a workflow; a chain of step nodes in CAS
|
||||
- **Step** — One moderator → agent → extract cycle; contains the role's structured output
|
||||
- **CAS** — Content-addressable store; every artifact is hashed (XXH64, Crockford Base32)
|
||||
|
||||
## Setup
|
||||
|
||||
\`\`\`
|
||||
uwf setup # interactive wizard
|
||||
uwf setup --provider <name> --base-url <url> \\
|
||||
--api-key <key> --model <name> # non-interactive
|
||||
[--agent <name>] # optional default agent
|
||||
\`\`\`
|
||||
|
||||
Config is stored at \`~/.uncaged/workflow/config.yaml\`. Override storage root with \`UNCAGED_WORKFLOW_STORAGE_ROOT\`.
|
||||
|
||||
## Workflow Commands
|
||||
|
||||
\`\`\`
|
||||
uwf workflow add <file> # register from YAML file
|
||||
uwf workflow show <id> # show by name or CAS hash
|
||||
uwf workflow list # list all registered workflows
|
||||
\`\`\`
|
||||
|
||||
You can also pass a file path directly to \`uwf thread start\` without registering first.
|
||||
|
||||
## Thread Lifecycle
|
||||
|
||||
\`\`\`
|
||||
uwf thread start <workflow> -p <prompt> # create thread
|
||||
uwf thread exec <thread-id> # execute one step
|
||||
[--agent <cmd>] # override agent
|
||||
[-c, --count <n>] # run n steps
|
||||
[--background] # run in background
|
||||
uwf thread show <thread-id> # show head pointer
|
||||
uwf thread list # list all threads
|
||||
[--status <filter>] # idle, running, completed, cancelled, active (comma-separated)
|
||||
[--after <thread-id>] # pagination: after this thread
|
||||
[--before <thread-id>] # pagination: before this thread
|
||||
[--skip <n>] # skip first n results
|
||||
[--take <n>] # limit results
|
||||
uwf thread read <thread-id> # render context as markdown
|
||||
[--quota <chars>] # max output chars (default 4000)
|
||||
[--before <step-hash>] # pagination
|
||||
[--start] # include start step
|
||||
uwf thread stop <thread-id> # stop background execution
|
||||
uwf thread cancel <thread-id> # cancel and archive thread
|
||||
\`\`\`
|
||||
|
||||
### Typical Lifecycle
|
||||
|
||||
\`\`\`
|
||||
start → exec (repeat) → thread reaches $END → auto-completed
|
||||
→ or: cancel to abort
|
||||
\`\`\`
|
||||
|
||||
## Step Commands
|
||||
|
||||
\`\`\`
|
||||
uwf step list <thread-id> # list all steps
|
||||
uwf step show <step-hash> # show step details
|
||||
uwf step fork <step-hash> # fork thread from a step (branch)
|
||||
\`\`\`
|
||||
|
||||
Forking creates a new thread that shares history up to the fork point — useful for retrying from a known-good state.
|
||||
|
||||
## CAS Commands
|
||||
|
||||
\`\`\`
|
||||
uwf cas get <hash> # read a node (type + payload)
|
||||
[--timestamp] # include timestamp
|
||||
uwf cas put <type-hash> <data> # store typed JSON, print hash
|
||||
uwf cas put-text <text> # store plain text, print hash
|
||||
uwf cas has <hash> # check existence
|
||||
uwf cas refs <hash> # list direct references
|
||||
uwf cas walk <hash> # recursive traversal
|
||||
uwf cas reindex # rebuild type index
|
||||
uwf cas schema list # list schemas
|
||||
uwf cas schema get <hash> # show schema definition
|
||||
\`\`\`
|
||||
|
||||
## Log Commands
|
||||
|
||||
\`\`\`
|
||||
uwf log list # list log files
|
||||
uwf log show # show log entries
|
||||
[--thread <id>] # filter by thread
|
||||
[--process <pid>] # filter by process
|
||||
[--date <YYYY-MM-DD>] # filter by date
|
||||
uwf log clean --before <date> # delete old logs
|
||||
\`\`\`
|
||||
|
||||
## Global Options
|
||||
|
||||
\`\`\`
|
||||
uwf --format <json|yaml> # output format (default: json)
|
||||
uwf -V, --version # print version
|
||||
\`\`\`
|
||||
`;
|
||||
}
|
||||
Reference in New Issue
Block a user