Compare commits
22 Commits
eval-v0.1.1
...
cli@0.2.0
| Author | SHA1 | Date | |
|---|---|---|---|
| 36a3ca6a08 | |||
| eb0b7b514f | |||
| a47871ec4e | |||
| 5851e5d162 | |||
| 61dfb40933 | |||
| fbfd31a042 | |||
| d99a376b60 | |||
| a536efee00 | |||
| 9260d81084 | |||
| c8d884072a | |||
| abeb465f46 | |||
| 28427a973f | |||
| 794f9db568 | |||
| cd585a26f1 | |||
| 1cf8f350d0 | |||
| 427568a21d | |||
| d3a2353acf | |||
| 8085d1d6e0 | |||
| 8764d7bda3 | |||
| 850a3b2f25 | |||
| 3d6a517e83 | |||
| 825f0c641a |
@@ -0,0 +1,9 @@
|
||||
---
|
||||
"@united-workforce/cli": minor
|
||||
"@united-workforce/util": patch
|
||||
---
|
||||
|
||||
feat: replace $START `_` status with `new`/`resume` semantics
|
||||
|
||||
BREAKING: All workflow YAML files must update `$START._` to `$START.new` + `$START.resume`.
|
||||
The `resume` edge prompt replaces the previously hardcoded resume message in the CLI.
|
||||
@@ -264,7 +264,8 @@ roles:
|
||||
|
||||
graph:
|
||||
$START:
|
||||
_: { role: "bootstrap", prompt: "Set up the Docker container and verify uwf is runnable." }
|
||||
new: { role: "bootstrap", prompt: "Set up the Docker container and verify uwf is runnable." }
|
||||
resume: { role: "bootstrap", prompt: "Review the previous run output and continue the walkthrough." }
|
||||
bootstrap:
|
||||
pass: { role: "config-and-registry", prompt: "Container {{{containerName}}} is ready. Validate config and workflow registration." }
|
||||
fail: { role: "$END", prompt: "Bootstrap failed: {{{error}}}. No container was created." }
|
||||
|
||||
@@ -227,7 +227,8 @@ roles:
|
||||
required: [$status, error]
|
||||
graph:
|
||||
$START:
|
||||
_: { role: "planner", prompt: "Analyze the issue and produce an implementation plan." }
|
||||
new: { role: "planner", prompt: "Analyze the issue and produce an implementation plan." }
|
||||
resume: { role: "planner", prompt: "Review the previous run output and continue the work." }
|
||||
planner:
|
||||
insufficient_info: { role: "$SUSPEND", prompt: "信息不足,需要补充:{{{reason}}}" }
|
||||
ready: { role: "developer", prompt: "Implement the TDD test spec (CAS hash: {{{plan}}}) in repo {{{repoPath}}}. Repo remote: {{{repoRemote}}}." }
|
||||
|
||||
@@ -200,7 +200,7 @@ payload:
|
||||
|
||||
- `roles` — 内联定义,每个 role 的 `meta` 是独立的 ocas_ref(指向 ocas 内置 JSON Schema 节点)
|
||||
- `graph` — `Record<Role | "$START", Record<Status, Target>>`,每个 Target = `{ role, prompt }`
|
||||
- Status 来自上一个 role 输出的 `status` 字段,`$START` 用 `_` 作为初始 status
|
||||
- Status 来自上一个 role 输出的 `$status` 字段,`$START` 使用 `new`(首次启动)和 `resume`(恢复已完成的 thread)作为 status
|
||||
- Prompt 模板使用 Mustache 渲染,变量来自 lastOutput
|
||||
- 不含 agent binding — agent 配置在 `~/.uwf/config.yaml` 中管理
|
||||
|
||||
@@ -208,7 +208,7 @@ Moderator 的求值逻辑:
|
||||
|
||||
```typescript
|
||||
evaluate(graph, lastRole, lastOutput) → { role, prompt }
|
||||
// 1. status = lastRole === "$START" ? "_" : lastOutput.status
|
||||
// 1. status = lastOutput.$status (e.g. "new" for $START first run, "resume" for completed thread resume)
|
||||
// 2. target = graph[lastRole][status]
|
||||
// 3. prompt = mustache.render(target.prompt, lastOutput)
|
||||
```
|
||||
@@ -422,8 +422,8 @@ type StepNodePayload = StepRecord & {
|
||||
Moderator 使用 `evaluate(graph, lastRole, lastOutput)` 进行同步 status-based routing:
|
||||
|
||||
```typescript
|
||||
// graph[lastRole][lastOutput.status] → Target { role, prompt }
|
||||
// $START 角色使用 "_" 作为初始 status
|
||||
// graph[lastRole][lastOutput.$status] → Target { role, prompt }
|
||||
// $START 使用 "new"(首次启动)和 "resume"(恢复已完成 thread)作为 status
|
||||
// prompt 通过 Mustache 模板渲染,变量来自 lastOutput
|
||||
```
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ roles:
|
||||
required: [$status, thesis, keyPoints]
|
||||
graph:
|
||||
$START:
|
||||
_: { role: "analyst", prompt: "Analyze the topic in the task and produce a structured summary with key points." }
|
||||
new: { role: "analyst", prompt: "Analyze the topic in the task and produce a structured summary with key points." }
|
||||
resume: { role: "analyst", prompt: "Review the previous analysis output and continue with additional context." }
|
||||
analyst:
|
||||
done: { role: "$END", prompt: "Analysis complete. Finish the workflow." }
|
||||
|
||||
@@ -53,7 +53,8 @@ roles:
|
||||
required: [$status, argument]
|
||||
graph:
|
||||
$START:
|
||||
_: { role: "against", prompt: "Present your opening argument against the proposition." }
|
||||
new: { role: "against", prompt: "Present your opening argument against the proposition." }
|
||||
resume: { role: "against", prompt: "Review the previous debate output and continue the argument against the proposition." }
|
||||
against:
|
||||
conceded: { role: "$END", prompt: "The against side conceded. Debate over." }
|
||||
continue: { role: "for", prompt: "Counter the opposing argument: {{{argument}}}" }
|
||||
|
||||
@@ -25,6 +25,7 @@ roles:
|
||||
required: [$status, summary]
|
||||
graph:
|
||||
$START:
|
||||
_: { role: "fixer", prompt: "Fix the code issue described in the task prompt." }
|
||||
new: { role: "fixer", prompt: "Fix the code issue described in the task prompt." }
|
||||
resume: { role: "fixer", prompt: "Review the previous run output and continue fixing the code issue." }
|
||||
fixer:
|
||||
done: { role: "$END", prompt: "Fix complete." }
|
||||
|
||||
@@ -215,7 +215,8 @@ roles:
|
||||
required: [$status, error]
|
||||
graph:
|
||||
$START:
|
||||
_: { role: "planner", prompt: "Analyze the issue and produce an implementation plan." }
|
||||
new: { role: "planner", prompt: "Analyze the issue and produce an implementation plan." }
|
||||
resume: { role: "planner", prompt: "Review the previous run output and continue the work." }
|
||||
planner:
|
||||
insufficient_info: { role: "$SUSPEND", prompt: "信息不足,需要补充:{{{reason}}}" }
|
||||
ready: { role: "developer", prompt: "Implement the TDD test spec (CAS hash: {{{plan}}}) in repo {{{repoPath}}}." }
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@united-workforce/agent-builtin",
|
||||
"version": "0.1.0",
|
||||
"version": "0.1.1",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// eslint-disable-next-line -- dynamic import for version
|
||||
const pkg = await import("../package.json", { with: { type: "json" } });
|
||||
if (process.argv.includes("--version") || process.argv.includes("-V")) {
|
||||
process.stdout.write(`${pkg.default.version}\n`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
import { createBuiltinAgent } from "./agent.js";
|
||||
|
||||
const main = createBuiltinAgent();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@united-workforce/agent-claude-code",
|
||||
"version": "0.1.0",
|
||||
"version": "0.1.1",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// eslint-disable-next-line -- dynamic import for version
|
||||
const pkg = await import("../package.json", { with: { type: "json" } });
|
||||
if (process.argv.includes("--version") || process.argv.includes("-V")) {
|
||||
process.stdout.write(`${pkg.default.version}\n`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
import { createClaudeCodeAgent } from "./claude-code.js";
|
||||
|
||||
const model = process.env.CLAUDE_MODEL ?? null;
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
# @united-workforce/agent-hermes
|
||||
|
||||
## 0.1.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 8085d1d: fix: read token usage from ACP PromptResponse instead of DB
|
||||
|
||||
Token counts (inputTokens, outputTokens) now come from the ACP
|
||||
`PromptResponse.usage` field, which is populated synchronously from
|
||||
`run_conversation()` return data — no WAL race condition.
|
||||
|
||||
Turns (assistant message count) still come from the DB via
|
||||
`snapshotTurns()` before/after delta.
|
||||
|
||||
Previously both tokens and turns were read from the Hermes state DB
|
||||
after the ACP prompt returned, but due to WAL write lag the DB often
|
||||
had incomplete token data at read time (e.g. 235 vs actual 26,080).
|
||||
@@ -1,5 +1,6 @@
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { computeUsageDelta, snapshotUsage } from "../src/hermes.js";
|
||||
import type { AcpUsage } from "../src/acp-client.js";
|
||||
import { buildUsage, snapshotTurns } from "../src/hermes.js";
|
||||
import type { HermesSessionJson } from "../src/types.js";
|
||||
|
||||
function makeSession(overrides: Partial<HermesSessionJson> = {}): HermesSessionJson {
|
||||
@@ -14,19 +15,19 @@ function makeSession(overrides: Partial<HermesSessionJson> = {}): HermesSessionJ
|
||||
};
|
||||
}
|
||||
|
||||
describe("snapshotUsage", () => {
|
||||
test("returns zero snapshot for null session", () => {
|
||||
const result = snapshotUsage(null);
|
||||
expect(result).toEqual({ turns: 0, inputTokens: 0, outputTokens: 0 });
|
||||
describe("snapshotTurns", () => {
|
||||
test("returns zero for null session", () => {
|
||||
const result = snapshotTurns(null);
|
||||
expect(result).toEqual({ turns: 0 });
|
||||
});
|
||||
|
||||
test("returns zero snapshot for empty session", () => {
|
||||
const result = snapshotUsage(makeSession());
|
||||
expect(result).toEqual({ turns: 0, inputTokens: 0, outputTokens: 0 });
|
||||
test("returns zero for empty session", () => {
|
||||
const result = snapshotTurns(makeSession());
|
||||
expect(result).toEqual({ turns: 0 });
|
||||
});
|
||||
|
||||
test("counts assistant messages as turns", () => {
|
||||
const result = snapshotUsage(
|
||||
const result = snapshotTurns(
|
||||
makeSession({
|
||||
messages: [
|
||||
{ role: "user", content: "hello", reasoning: null, tool_calls: null },
|
||||
@@ -39,11 +40,11 @@ describe("snapshotUsage", () => {
|
||||
outputTokens: 500,
|
||||
}),
|
||||
);
|
||||
expect(result).toEqual({ turns: 2, inputTokens: 1000, outputTokens: 500 });
|
||||
expect(result).toEqual({ turns: 2 });
|
||||
});
|
||||
|
||||
test("ignores non-assistant messages for turn count", () => {
|
||||
const result = snapshotUsage(
|
||||
const result = snapshotTurns(
|
||||
makeSession({
|
||||
messages: [
|
||||
{ role: "user", content: "hello", reasoning: null, tool_calls: null },
|
||||
@@ -55,11 +56,13 @@ describe("snapshotUsage", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("computeUsageDelta", () => {
|
||||
test("first visit: before is zero, after has all values", () => {
|
||||
const before = { turns: 0, inputTokens: 0, outputTokens: 0 };
|
||||
const after = { turns: 3, inputTokens: 5000, outputTokens: 2000 };
|
||||
const result = computeUsageDelta(before, after, 12.5);
|
||||
describe("buildUsage", () => {
|
||||
const acpUsage: AcpUsage = { inputTokens: 5000, outputTokens: 2000, totalTokens: 7000 };
|
||||
|
||||
test("first visit: tokens from ACP, turns from DB delta", () => {
|
||||
const beforeTurns = { turns: 0 };
|
||||
const afterTurns = { turns: 3 };
|
||||
const result = buildUsage(acpUsage, beforeTurns, afterTurns, 12.5);
|
||||
expect(result).toEqual({
|
||||
turns: 3,
|
||||
inputTokens: 5000,
|
||||
@@ -68,43 +71,52 @@ describe("computeUsageDelta", () => {
|
||||
});
|
||||
});
|
||||
|
||||
test("re-entry: computes delta correctly", () => {
|
||||
const before = { turns: 2, inputTokens: 3000, outputTokens: 1000 };
|
||||
const after = { turns: 4, inputTokens: 8000, outputTokens: 3500 };
|
||||
const result = computeUsageDelta(before, after, 7.3);
|
||||
test("re-entry: turn delta computed correctly, tokens from ACP", () => {
|
||||
const beforeTurns = { turns: 2 };
|
||||
const afterTurns = { turns: 4 };
|
||||
const acpDelta: AcpUsage = { inputTokens: 8000, outputTokens: 3500, totalTokens: 11500 };
|
||||
const result = buildUsage(acpDelta, beforeTurns, afterTurns, 7.3);
|
||||
expect(result).toEqual({
|
||||
turns: 2,
|
||||
inputTokens: 5000,
|
||||
outputTokens: 2500,
|
||||
inputTokens: 8000,
|
||||
outputTokens: 3500,
|
||||
duration: 7,
|
||||
});
|
||||
});
|
||||
|
||||
test("floors negative deltas at 0 (defensive)", () => {
|
||||
const before = { turns: 5, inputTokens: 10000, outputTokens: 5000 };
|
||||
const after = { turns: 3, inputTokens: 8000, outputTokens: 4000 };
|
||||
const result = computeUsageDelta(before, after, 1.0);
|
||||
test("floors negative turn deltas at 0, then defaults to 1", () => {
|
||||
const beforeTurns = { turns: 5 };
|
||||
const afterTurns = { turns: 3 };
|
||||
const result = buildUsage(acpUsage, beforeTurns, afterTurns, 1.0);
|
||||
// turns would be negative (-2), floored to 0, then || 1 gives 1
|
||||
expect(result.turns).toBe(1);
|
||||
expect(result.inputTokens).toBe(0);
|
||||
expect(result.outputTokens).toBe(0);
|
||||
});
|
||||
|
||||
test("zero turns delta defaults to 1 (at least one turn happened)", () => {
|
||||
const before = { turns: 3, inputTokens: 1000, outputTokens: 500 };
|
||||
const after = { turns: 3, inputTokens: 2000, outputTokens: 1000 };
|
||||
const result = computeUsageDelta(before, after, 5.0);
|
||||
const beforeTurns = { turns: 3 };
|
||||
const afterTurns = { turns: 3 };
|
||||
const result = buildUsage(acpUsage, beforeTurns, afterTurns, 5.0);
|
||||
// turns delta is 0, || 1 gives 1
|
||||
expect(result.turns).toBe(1);
|
||||
expect(result.inputTokens).toBe(1000);
|
||||
expect(result.outputTokens).toBe(500);
|
||||
});
|
||||
|
||||
test("null ACP usage yields zero tokens", () => {
|
||||
const beforeTurns = { turns: 0 };
|
||||
const afterTurns = { turns: 2 };
|
||||
const result = buildUsage(null, beforeTurns, afterTurns, 10.0);
|
||||
expect(result).toEqual({
|
||||
turns: 2,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
duration: 10,
|
||||
});
|
||||
});
|
||||
|
||||
test("duration is rounded", () => {
|
||||
const before = { turns: 0, inputTokens: 0, outputTokens: 0 };
|
||||
const after = { turns: 1, inputTokens: 100, outputTokens: 50 };
|
||||
expect(computeUsageDelta(before, after, 3.7).duration).toBe(4);
|
||||
expect(computeUsageDelta(before, after, 3.2).duration).toBe(3);
|
||||
expect(computeUsageDelta(before, after, 0.0).duration).toBe(0);
|
||||
const beforeTurns = { turns: 0 };
|
||||
const afterTurns = { turns: 1 };
|
||||
expect(buildUsage(acpUsage, beforeTurns, afterTurns, 3.7).duration).toBe(4);
|
||||
expect(buildUsage(acpUsage, beforeTurns, afterTurns, 3.2).duration).toBe(3);
|
||||
expect(buildUsage(acpUsage, beforeTurns, afterTurns, 0.0).duration).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@united-workforce/agent-hermes",
|
||||
"version": "0.1.0",
|
||||
"version": "0.1.2",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
|
||||
@@ -1,6 +1,16 @@
|
||||
import type { ChildProcess } from "node:child_process";
|
||||
import { spawn } from "node:child_process";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
import { createInterface } from "node:readline";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const OWN_VERSION = (
|
||||
JSON.parse(readFileSync(join(__dirname, "..", "package.json"), "utf-8")) as {
|
||||
version: string;
|
||||
}
|
||||
).version;
|
||||
|
||||
const HERMES_COMMAND = "hermes";
|
||||
const PROTOCOL_VERSION = 1;
|
||||
@@ -17,9 +27,17 @@ type PendingRequest = {
|
||||
reject: (reason: Error) => void;
|
||||
};
|
||||
|
||||
/** Token usage returned by ACP PromptResponse. */
|
||||
export type AcpUsage = {
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
totalTokens: number;
|
||||
};
|
||||
|
||||
export type AcpPromptResult = {
|
||||
text: string;
|
||||
sessionId: string;
|
||||
usage: AcpUsage | null;
|
||||
};
|
||||
|
||||
export class HermesAcpClient {
|
||||
@@ -96,9 +114,25 @@ export class HermesAcpClient {
|
||||
);
|
||||
}
|
||||
|
||||
// Extract token usage from ACP PromptResponse.result.usage (camelCase wire format)
|
||||
const result = (response as { result?: Record<string, unknown> }).result;
|
||||
const rawUsage = result?.usage as Record<string, unknown> | undefined;
|
||||
const usage: AcpUsage | null =
|
||||
rawUsage !== undefined &&
|
||||
typeof rawUsage.inputTokens === "number" &&
|
||||
typeof rawUsage.outputTokens === "number" &&
|
||||
typeof rawUsage.totalTokens === "number"
|
||||
? {
|
||||
inputTokens: rawUsage.inputTokens,
|
||||
outputTokens: rawUsage.outputTokens,
|
||||
totalTokens: rawUsage.totalTokens,
|
||||
}
|
||||
: null;
|
||||
|
||||
return {
|
||||
text: this.messageChunks.join(""),
|
||||
sessionId: this.sessionId,
|
||||
usage,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -275,7 +309,7 @@ export class HermesAcpClient {
|
||||
private async initialize(): Promise<void> {
|
||||
const initResponse = await this.sendRequest("initialize", {
|
||||
protocolVersion: PROTOCOL_VERSION,
|
||||
clientInfo: { name: "uwf", version: "0.1.0" },
|
||||
clientInfo: { name: "uwf-hermes", version: OWN_VERSION },
|
||||
capabilities: {},
|
||||
});
|
||||
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// eslint-disable-next-line -- dynamic import for version
|
||||
const pkg = await import("../package.json", { with: { type: "json" } });
|
||||
if (process.argv.includes("--version") || process.argv.includes("-V")) {
|
||||
process.stdout.write(`${pkg.default.version}\n`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
import { createHermesAgent } from "./hermes.js";
|
||||
import { isResumeDisabled } from "./session-cache.js";
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ import {
|
||||
buildRolePrompt,
|
||||
createAgent,
|
||||
} from "@united-workforce/util-agent";
|
||||
|
||||
import type { AcpUsage } from "./acp-client.js";
|
||||
import { HermesAcpClient } from "./acp-client.js";
|
||||
import { getCachedSessionId, setCachedSessionId } from "./session-cache.js";
|
||||
import { loadHermesSession, storeHermesSessionDetail } from "./session-detail.js";
|
||||
@@ -17,36 +17,37 @@ import type { HermesSessionJson } from "./types.js";
|
||||
const log = createLogger({ sink: { kind: "stderr" } });
|
||||
|
||||
/** Snapshot of session metrics taken before and after a prompt call. */
|
||||
type UsageSnapshot = {
|
||||
type TurnsSnapshot = {
|
||||
turns: number;
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
};
|
||||
|
||||
const ZERO_SNAPSHOT: UsageSnapshot = { turns: 0, inputTokens: 0, outputTokens: 0 };
|
||||
const ZERO_TURNS: TurnsSnapshot = { turns: 0 };
|
||||
|
||||
/** Extract usage metrics from a session. Returns zeros for null sessions. */
|
||||
export function snapshotUsage(session: HermesSessionJson | null): UsageSnapshot {
|
||||
/** Extract assistant turn count from a session. Returns zero for null sessions. */
|
||||
export function snapshotTurns(session: HermesSessionJson | null): TurnsSnapshot {
|
||||
if (session === null) {
|
||||
return ZERO_SNAPSHOT;
|
||||
return ZERO_TURNS;
|
||||
}
|
||||
return {
|
||||
turns: session.messages.filter((m) => m.role === "assistant").length,
|
||||
inputTokens: session.inputTokens,
|
||||
outputTokens: session.outputTokens,
|
||||
};
|
||||
}
|
||||
|
||||
/** Compute the delta between two snapshots (after minus before). Floors at 0. */
|
||||
export function computeUsageDelta(
|
||||
before: UsageSnapshot,
|
||||
after: UsageSnapshot,
|
||||
/**
|
||||
* Build Usage from ACP token data + DB turn delta.
|
||||
* Tokens come from ACP PromptResponse (synchronous, accurate).
|
||||
* Turns come from DB before/after snapshots (may have WAL lag, but acceptable).
|
||||
*/
|
||||
export function buildUsage(
|
||||
acpUsage: AcpUsage | null,
|
||||
beforeTurns: TurnsSnapshot,
|
||||
afterTurns: TurnsSnapshot,
|
||||
durationSec: number,
|
||||
): Usage {
|
||||
return {
|
||||
turns: Math.max(0, after.turns - before.turns) || 1,
|
||||
inputTokens: Math.max(0, after.inputTokens - before.inputTokens),
|
||||
outputTokens: Math.max(0, after.outputTokens - before.outputTokens),
|
||||
turns: Math.max(0, afterTurns.turns - beforeTurns.turns) || 1,
|
||||
inputTokens: acpUsage?.inputTokens ?? 0,
|
||||
outputTokens: acpUsage?.outputTokens ?? 0,
|
||||
duration: Math.round(durationSec),
|
||||
};
|
||||
}
|
||||
@@ -148,12 +149,12 @@ export function createHermesAgent(resumeDisabled: boolean): () => Promise<void>
|
||||
async function runPrompt(
|
||||
ctx: AgentContext,
|
||||
useContinuation: boolean,
|
||||
beforeSnapshot: UsageSnapshot,
|
||||
beforeTurns: TurnsSnapshot,
|
||||
): Promise<AgentRunResult> {
|
||||
const effectiveCtx = useContinuation ? ctx : { ...ctx, isFirstVisit: true };
|
||||
const fullPrompt = buildHermesPrompt(effectiveCtx);
|
||||
const startMs = Date.now();
|
||||
const { text, sessionId } = await client.prompt(fullPrompt);
|
||||
const { text, sessionId, usage: acpUsage } = await client.prompt(fullPrompt);
|
||||
const durationSec = (Date.now() - startMs) / 1000;
|
||||
const { detailHash } = await storePromptResult(ctx.store, sessionId);
|
||||
|
||||
@@ -161,9 +162,10 @@ export function createHermesAgent(resumeDisabled: boolean): () => Promise<void>
|
||||
await setCachedSessionId(ctx.threadId, ctx.role, sessionId, ctx.storageRoot);
|
||||
}
|
||||
|
||||
// Turns from DB (may lag slightly due to WAL, but acceptable)
|
||||
const afterSession = await loadHermesSession(sessionId);
|
||||
const afterSnapshot = snapshotUsage(afterSession);
|
||||
const usage = computeUsageDelta(beforeSnapshot, afterSnapshot, durationSec);
|
||||
const afterTurns = snapshotTurns(afterSession);
|
||||
const usage = buildUsage(acpUsage, beforeTurns, afterTurns, durationSec);
|
||||
|
||||
return { output: text, detailHash, sessionId, assembledPrompt: fullPrompt, usage };
|
||||
}
|
||||
@@ -173,16 +175,16 @@ export function createHermesAgent(resumeDisabled: boolean): () => Promise<void>
|
||||
const attempt = await prepareSession(client, ctx, cwd, resumeDisabled);
|
||||
|
||||
// Snapshot before prompt: for resumed sessions, captures cumulative state
|
||||
// so we can compute the delta. For new sessions, this is ZERO_SNAPSHOT.
|
||||
// so we can compute the turn delta. For new sessions, this is ZERO_TURNS.
|
||||
const currentSessionId = client.getSessionId();
|
||||
const beforeSession =
|
||||
attempt.resumed && currentSessionId !== null
|
||||
? await loadHermesSession(currentSessionId)
|
||||
: null;
|
||||
const beforeSnapshot = snapshotUsage(beforeSession);
|
||||
const beforeTurns = snapshotTurns(beforeSession);
|
||||
|
||||
try {
|
||||
return await runPrompt(ctx, attempt.useContinuation, beforeSnapshot);
|
||||
return await runPrompt(ctx, attempt.useContinuation, beforeTurns);
|
||||
} catch (error) {
|
||||
if (!attempt.resumed) {
|
||||
throw error;
|
||||
@@ -193,7 +195,7 @@ export function createHermesAgent(resumeDisabled: boolean): () => Promise<void>
|
||||
await client.close();
|
||||
await client.connect(cwd);
|
||||
// Fresh session after retry — reset snapshot to zero
|
||||
return runPrompt(ctx, false, ZERO_SNAPSHOT);
|
||||
return runPrompt(ctx, false, ZERO_TURNS);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -204,20 +206,20 @@ export function createHermesAgent(resumeDisabled: boolean): () => Promise<void>
|
||||
): Promise<AgentRunResult> {
|
||||
// Client is already connected from runHermes — same ACP session,
|
||||
// so the agent sees the full conversation history (crucial for retries).
|
||||
// Snapshot before the continuation prompt for delta computation.
|
||||
// Snapshot turns before the continuation prompt for delta computation.
|
||||
const currentSessionId = client.getSessionId();
|
||||
const beforeSession =
|
||||
currentSessionId !== null ? await loadHermesSession(currentSessionId) : null;
|
||||
const beforeSnapshot = snapshotUsage(beforeSession);
|
||||
const beforeTurns = snapshotTurns(beforeSession);
|
||||
|
||||
const startMs = Date.now();
|
||||
const { text, sessionId } = await client.prompt(message);
|
||||
const { text, sessionId, usage: acpUsage } = await client.prompt(message);
|
||||
const durationSec = (Date.now() - startMs) / 1000;
|
||||
const { detailHash } = await storePromptResult(store, sessionId);
|
||||
|
||||
const afterSession = await loadHermesSession(sessionId);
|
||||
const afterSnapshot = snapshotUsage(afterSession);
|
||||
const usage = computeUsageDelta(beforeSnapshot, afterSnapshot, durationSec);
|
||||
const afterTurns = snapshotTurns(afterSession);
|
||||
const usage = buildUsage(acpUsage, beforeTurns, afterTurns, durationSec);
|
||||
|
||||
return { output: text, detailHash, sessionId, assembledPrompt: "", usage };
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
export type { AcpUsage } from "./acp-client.js";
|
||||
export { HermesAcpClient } from "./acp-client.js";
|
||||
export {
|
||||
buildHermesPrompt,
|
||||
computeUsageDelta,
|
||||
buildUsage,
|
||||
createHermesAgent,
|
||||
snapshotUsage,
|
||||
snapshotTurns,
|
||||
} from "./hermes.js";
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@united-workforce/agent-mock",
|
||||
"version": "0.1.0",
|
||||
"version": "0.1.1",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// eslint-disable-next-line -- dynamic import for version
|
||||
const pkg = await import("../package.json", { with: { type: "json" } });
|
||||
if (process.argv.includes("--version") || process.argv.includes("-V")) {
|
||||
process.stdout.write(`${pkg.default.version}\n`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
import { createMockAgent } from "./mock-agent.js";
|
||||
|
||||
const USAGE = "usage: uwf-mock --mock-data <path> --thread <id> --role <role> --prompt <text>";
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
# @united-workforce/cli
|
||||
|
||||
## 0.1.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 850a3b2: fix: resolve --agent override via config alias before raw command
|
||||
|
||||
`resolveAgentConfig()` now checks `config.agents[alias]` first before falling back to `parseAgentOverride()`. Eval CLI default `--agent` changed from `"hermes"` to `"uwf-hermes"`.
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@united-workforce/cli",
|
||||
"version": "0.1.0",
|
||||
"version": "0.2.0",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
|
||||
@@ -58,7 +58,10 @@ describe("C1: adapter JSON round-trip integration", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "Do the work", location: null } },
|
||||
$START: {
|
||||
new: { role: "worker", prompt: "Do the work", location: null },
|
||||
resume: { role: "worker", prompt: "Resume the work", location: null },
|
||||
},
|
||||
worker: { done: { role: "$END", prompt: "completed", location: null } },
|
||||
},
|
||||
});
|
||||
|
||||
@@ -45,10 +45,14 @@ roles:
|
||||
$status: { type: string, enum: ["done"] }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
new:
|
||||
role: roleA
|
||||
prompt: "Do A"
|
||||
location: null
|
||||
resume:
|
||||
role: roleA
|
||||
prompt: "Resume A"
|
||||
location: null
|
||||
roleA:
|
||||
ready:
|
||||
role: roleB
|
||||
@@ -107,10 +111,14 @@ roles:
|
||||
$status: { type: string, enum: ["done"] }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
new:
|
||||
role: roleA
|
||||
prompt: "Do A"
|
||||
location: null
|
||||
resume:
|
||||
role: roleA
|
||||
prompt: "Resume A"
|
||||
location: null
|
||||
roleA:
|
||||
pass:
|
||||
role: roleB
|
||||
@@ -150,10 +158,14 @@ roles:
|
||||
$status: { type: string, enum: ["done"] }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
new:
|
||||
role: worker
|
||||
prompt: "Work"
|
||||
location: null
|
||||
resume:
|
||||
role: worker
|
||||
prompt: "Resume work"
|
||||
location: null
|
||||
worker:
|
||||
done:
|
||||
role: $END
|
||||
|
||||
@@ -36,7 +36,8 @@ roles:
|
||||
required: [$status]
|
||||
graph:
|
||||
$START:
|
||||
_: { role: analyst, prompt: 'Analyze the task' }
|
||||
new: { role: analyst, prompt: 'Analyze the task' }
|
||||
resume: { role: analyst, prompt: 'Review the previous run output and continue the work.' }
|
||||
analyst:
|
||||
analyzed: { role: developer, prompt: 'Implement the change' }
|
||||
developer:
|
||||
|
||||
@@ -25,7 +25,8 @@ roles:
|
||||
required: [$status]
|
||||
graph:
|
||||
$START:
|
||||
_: { role: planner, prompt: 'Plan the task' }
|
||||
new: { role: planner, prompt: 'Plan the task' }
|
||||
resume: { role: planner, prompt: 'Review the previous run output and continue the work.' }
|
||||
planner:
|
||||
ready: { role: worker, prompt: 'Do the work' }
|
||||
worker:
|
||||
|
||||
@@ -28,7 +28,8 @@ roles:
|
||||
required: [$status]
|
||||
graph:
|
||||
$START:
|
||||
_: { role: developer, prompt: 'Implement the change' }
|
||||
new: { role: developer, prompt: 'Implement the change' }
|
||||
resume: { role: developer, prompt: 'Review the previous run output and continue the work.' }
|
||||
developer:
|
||||
review_needed: { role: reviewer, prompt: 'Review the change' }
|
||||
reviewer:
|
||||
|
||||
@@ -27,7 +27,8 @@ roles:
|
||||
required: [$status]
|
||||
graph:
|
||||
$START:
|
||||
_: { role: planner, prompt: 'Plan the task' }
|
||||
new: { role: planner, prompt: 'Plan the task' }
|
||||
resume: { role: planner, prompt: 'Review the previous run output and continue the work.' }
|
||||
planner:
|
||||
ready: { role: worker, prompt: 'Work on branch {{{branch}}} in {{{repoPath}}}' }
|
||||
worker:
|
||||
|
||||
@@ -18,7 +18,8 @@ roles:
|
||||
required: [$status]
|
||||
graph:
|
||||
$START:
|
||||
_: { role: planner, prompt: 'Analyze the task' }
|
||||
new: { role: planner, prompt: 'Analyze the task' }
|
||||
resume: { role: planner, prompt: 'Review the previous run output and continue the work.' }
|
||||
planner:
|
||||
insufficient_info: { role: '$SUSPEND', prompt: 'Need more info: {{{reason}}}' }
|
||||
ready: { role: '$END', prompt: 'Done' }
|
||||
|
||||
@@ -5,7 +5,12 @@ import { evaluate } from "../moderator/evaluate.js";
|
||||
|
||||
const solveIssueGraph: WorkflowPayload["graph"] = {
|
||||
$START: {
|
||||
_: { role: "planner", prompt: "Start planning from the issue in the task.", location: null },
|
||||
new: { role: "planner", prompt: "Start planning from the issue in the task.", location: null },
|
||||
resume: {
|
||||
role: "planner",
|
||||
prompt: "Review the previous run output and continue the work.",
|
||||
location: null,
|
||||
},
|
||||
},
|
||||
planner: {
|
||||
planned: { role: "developer", prompt: "Implement the plan: {{plan}}", location: null },
|
||||
@@ -20,8 +25,8 @@ const solveIssueGraph: WorkflowPayload["graph"] = {
|
||||
};
|
||||
|
||||
describe("evaluate", () => {
|
||||
test("$START → first role (unit status _)", () => {
|
||||
const result = evaluate(solveIssueGraph, "$START", { $status: "_" });
|
||||
test("$START → first role (status new)", () => {
|
||||
const result = evaluate(solveIssueGraph, "$START", { $status: "new" });
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: {
|
||||
@@ -32,6 +37,18 @@ describe("evaluate", () => {
|
||||
});
|
||||
});
|
||||
|
||||
test("$START → first role (status resume)", () => {
|
||||
const result = evaluate(solveIssueGraph, "$START", { $status: "resume" });
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: {
|
||||
role: "planner",
|
||||
prompt: "Review the previous run output and continue the work.",
|
||||
location: null,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test("status-based routing (reviewer rejected → developer)", () => {
|
||||
const result = evaluate(solveIssueGraph, "reviewer", {
|
||||
$status: "rejected",
|
||||
@@ -95,7 +112,7 @@ describe("evaluate", () => {
|
||||
});
|
||||
|
||||
test("missing role in graph → error", () => {
|
||||
const result = evaluate(solveIssueGraph, "unknown-role", { $status: "_" });
|
||||
const result = evaluate(solveIssueGraph, "unknown-role", { $status: "new" });
|
||||
expect(result.ok).toBe(false);
|
||||
if (!result.ok) {
|
||||
expect(result.error.message).toBe('no transitions defined for role "unknown-role"');
|
||||
|
||||
@@ -9,31 +9,25 @@ import {
|
||||
cmdPromptAdapterDeveloping,
|
||||
cmdPromptBootstrap,
|
||||
cmdPromptList,
|
||||
cmdPromptSetup,
|
||||
cmdPromptUsage,
|
||||
cmdPromptUsageReference,
|
||||
cmdPromptWorkflowAuthoring,
|
||||
} from "../commands/prompt.js";
|
||||
|
||||
describe("prompt commands", () => {
|
||||
test("prompt list returns new prompt names", () => {
|
||||
test("prompt list returns prompt names (no bootstrap)", () => {
|
||||
const result = cmdPromptList();
|
||||
expect(result).toBeInstanceOf(Array);
|
||||
expect(result).toContain("usage");
|
||||
expect(result).toContain("workflow-authoring");
|
||||
expect(result).toContain("adapter-developing");
|
||||
expect(result).toContain("bootstrap");
|
||||
expect(result).not.toContain("user");
|
||||
expect(result).not.toContain("author");
|
||||
expect(result).not.toContain("developer");
|
||||
expect(result).not.toContain("adapter");
|
||||
expect(result).not.toContain("bootstrap");
|
||||
for (const name of result) {
|
||||
expect(name).toMatch(/^\S+$/);
|
||||
}
|
||||
});
|
||||
|
||||
test("prompt usage-reference returns non-empty markdown string with frontmatter", () => {
|
||||
const result = cmdPromptUsageReference();
|
||||
test("prompt usage returns only the usage reference with frontmatter", () => {
|
||||
const result = cmdPromptUsage();
|
||||
expect(typeof result).toBe("string");
|
||||
expect(result).toContain("uwf");
|
||||
expect(result).toContain("thread");
|
||||
@@ -42,6 +36,9 @@ describe("prompt commands", () => {
|
||||
expect(result).toContain("---");
|
||||
expect(result).toContain("name:");
|
||||
expect(result).toContain("version:");
|
||||
// Should NOT contain other references
|
||||
expect(result).not.toContain("Workflow Authoring Reference");
|
||||
expect(result).not.toContain("Adapter Developing Reference");
|
||||
expect(result.length).toBeGreaterThan(500);
|
||||
});
|
||||
|
||||
@@ -71,44 +68,19 @@ describe("prompt commands", () => {
|
||||
expect(result.length).toBeGreaterThan(500);
|
||||
});
|
||||
|
||||
test("prompt bootstrap returns non-empty skill with frontmatter", () => {
|
||||
test("prompt bootstrap returns framework-agnostic setup instructions", () => {
|
||||
const result = cmdPromptBootstrap();
|
||||
expect(typeof result).toBe("string");
|
||||
expect(result).toContain("uwf");
|
||||
expect(result).toContain("---");
|
||||
expect(result.length).toBeGreaterThan(100);
|
||||
});
|
||||
|
||||
test("prompt usage combines remaining references (no developer)", () => {
|
||||
const result = cmdPromptUsage();
|
||||
expect(typeof result).toBe("string");
|
||||
expect(result).toContain("Usage Reference");
|
||||
expect(result).toContain("Workflow Authoring Reference");
|
||||
expect(result).toContain("Adapter Developing Reference");
|
||||
expect(result).not.toContain("Developer Reference");
|
||||
expect(result).toContain("---");
|
||||
expect(result.length).toBeGreaterThan(2000);
|
||||
});
|
||||
|
||||
test("prompt setup returns simplified setup instructions", () => {
|
||||
const result = cmdPromptSetup();
|
||||
expect(typeof result).toBe("string");
|
||||
expect(result).toContain("uwf Skill Setup");
|
||||
expect(result).toContain("uwf prompt bootstrap");
|
||||
expect(result).toContain("SKILL.md");
|
||||
expect(result).toContain("version");
|
||||
expect(result).not.toMatch(/\bbun (install|run|test|changeset|version|release)\b/);
|
||||
});
|
||||
|
||||
test("prompt setup references new subcommand names", () => {
|
||||
const result = cmdPromptSetup();
|
||||
expect(result).toContain("uwf prompt usage");
|
||||
expect(result).toContain("uwf prompt workflow-authoring");
|
||||
expect(result).toContain("uwf prompt adapter-developing");
|
||||
expect(result).not.toContain("uwf prompt user");
|
||||
expect(result).not.toContain("uwf prompt author");
|
||||
expect(result).not.toContain("uwf prompt developer");
|
||||
expect(result).not.toMatch(/uwf prompt adapter\b(?!-developing)/);
|
||||
expect(result).toContain("uwf-usage");
|
||||
expect(result).toContain("uwf-workflow-authoring");
|
||||
expect(result).toContain("uwf-adapter-developing");
|
||||
// Should NOT contain Hermes-specific paths
|
||||
expect(result).not.toContain("~/.hermes/skills/");
|
||||
expect(result).not.toContain("> ~/.hermes/");
|
||||
expect(result.length).toBeGreaterThan(100);
|
||||
});
|
||||
|
||||
test("prompt help subcommand is suppressed", { timeout: 30_000 }, () => {
|
||||
@@ -119,11 +91,12 @@ describe("prompt commands", () => {
|
||||
});
|
||||
expect(output).not.toMatch(/help\s+\[command\]/i);
|
||||
expect(output).toContain("usage");
|
||||
expect(output).toContain("setup");
|
||||
expect(output).toContain("bootstrap");
|
||||
expect(output).toContain("workflow-authoring");
|
||||
expect(output).toContain("adapter-developing");
|
||||
expect(output).toContain("bootstrap");
|
||||
expect(output).toContain("list");
|
||||
expect(output).not.toContain("developer");
|
||||
// Removed subcommands should not appear as command names
|
||||
expect(output).not.toMatch(/^\s+setup\s/m);
|
||||
expect(output).not.toContain("usage-reference");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -253,7 +253,10 @@ describe("thread read timing", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "go", location: null } },
|
||||
$START: {
|
||||
new: { role: "worker", prompt: "go", location: null },
|
||||
resume: { role: "worker", prompt: "resume", location: null },
|
||||
},
|
||||
worker: { done: { role: "$END", prompt: "", location: null } },
|
||||
},
|
||||
});
|
||||
@@ -319,7 +322,10 @@ describe("thread read timing", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "go", location: null } },
|
||||
$START: {
|
||||
new: { role: "worker", prompt: "go", location: null },
|
||||
resume: { role: "worker", prompt: "resume", location: null },
|
||||
},
|
||||
worker: { done: { role: "$END", prompt: "", location: null } },
|
||||
},
|
||||
});
|
||||
|
||||
@@ -57,10 +57,14 @@ roles:
|
||||
$status: { type: string, enum: ["ready"] }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
new:
|
||||
role: planner
|
||||
prompt: "Plan the work"
|
||||
location: null
|
||||
resume:
|
||||
role: planner
|
||||
prompt: "Resume the work"
|
||||
location: null
|
||||
planner:
|
||||
ready:
|
||||
role: $END
|
||||
@@ -113,10 +117,14 @@ roles:
|
||||
$status: { type: string, enum: ["ready"] }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
new:
|
||||
role: planner
|
||||
prompt: "Plan"
|
||||
location: null
|
||||
resume:
|
||||
role: planner
|
||||
prompt: "Resume"
|
||||
location: null
|
||||
planner:
|
||||
ready:
|
||||
role: $END
|
||||
@@ -156,10 +164,14 @@ roles:
|
||||
$status: { type: string, enum: ["ready"] }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
new:
|
||||
role: planner
|
||||
prompt: "Plan"
|
||||
location: null
|
||||
resume:
|
||||
role: planner
|
||||
prompt: "Resume"
|
||||
location: null
|
||||
planner:
|
||||
ready:
|
||||
role: $END
|
||||
|
||||
@@ -70,7 +70,10 @@ async function setupSuspendedThread(mode: MockAgentMode): Promise<{
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "Start work", location: null } },
|
||||
$START: {
|
||||
new: { role: "worker", prompt: "Start work", location: null },
|
||||
resume: { role: "worker", prompt: "Resume the work", location: null },
|
||||
},
|
||||
worker: {
|
||||
needs_input: {
|
||||
role: "$SUSPEND",
|
||||
@@ -233,7 +236,10 @@ describe("uwf thread resume", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "Start", location: null } },
|
||||
$START: {
|
||||
new: { role: "worker", prompt: "Start", location: null },
|
||||
resume: { role: "worker", prompt: "Resume", location: null },
|
||||
},
|
||||
worker: { done: { role: "$END", prompt: "Done", location: null } },
|
||||
},
|
||||
});
|
||||
@@ -479,7 +485,10 @@ describe("uwf thread resume - completed threads", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "Start work", location: null } },
|
||||
$START: {
|
||||
new: { role: "worker", prompt: "Start work", location: null },
|
||||
resume: { role: "worker", prompt: "Resume the work", location: null },
|
||||
},
|
||||
worker: { done: { role: "reviewer", prompt: "Review the work", location: null } },
|
||||
reviewer: { done: { role: "$END", prompt: "Done", location: null } },
|
||||
},
|
||||
@@ -610,7 +619,7 @@ echo '${adapterJson}'
|
||||
expect(cliOutput.done).toBe(false);
|
||||
|
||||
const capturedPrompt = await readFile(promptCapturePath, "utf8");
|
||||
expect(capturedPrompt).toContain("Previous run completed");
|
||||
expect(capturedPrompt).toContain("Resume the work");
|
||||
expect(capturedPrompt).toContain("Additional context");
|
||||
|
||||
const storeModule = await import("../store.js");
|
||||
@@ -640,7 +649,10 @@ echo '${adapterJson}'
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "Start", location: null } },
|
||||
$START: {
|
||||
new: { role: "worker", prompt: "Start", location: null },
|
||||
resume: { role: "worker", prompt: "Resume", location: null },
|
||||
},
|
||||
worker: { done: { role: "$END", prompt: "Done", location: null } },
|
||||
},
|
||||
});
|
||||
@@ -688,7 +700,10 @@ echo '${adapterJson}'
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "Start", location: null } },
|
||||
$START: {
|
||||
new: { role: "worker", prompt: "Start", location: null },
|
||||
resume: { role: "worker", prompt: "Resume", location: null },
|
||||
},
|
||||
worker: { done: { role: "$END", prompt: "Done", location: null } },
|
||||
},
|
||||
});
|
||||
|
||||
@@ -34,10 +34,14 @@ roles:
|
||||
$status: { type: string, enum: ["ready"] }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
new:
|
||||
role: planner
|
||||
prompt: "Plan the work"
|
||||
location: null
|
||||
resume:
|
||||
role: planner
|
||||
prompt: "Resume the work"
|
||||
location: null
|
||||
planner:
|
||||
ready:
|
||||
role: $END
|
||||
@@ -66,10 +70,14 @@ roles:
|
||||
question: { type: string }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
new:
|
||||
role: worker
|
||||
prompt: "Start work"
|
||||
location: null
|
||||
resume:
|
||||
role: worker
|
||||
prompt: "Resume work"
|
||||
location: null
|
||||
worker:
|
||||
needs_input:
|
||||
role: $SUSPEND
|
||||
|
||||
@@ -57,10 +57,14 @@ roles:
|
||||
$status: { type: string, enum: ["ready"] }
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
new:
|
||||
role: planner
|
||||
prompt: "Plan the work"
|
||||
location: null
|
||||
resume:
|
||||
role: planner
|
||||
prompt: "Resume the work"
|
||||
location: null
|
||||
planner:
|
||||
ready:
|
||||
role: $END
|
||||
|
||||
@@ -58,7 +58,10 @@ describe("suspend step CAS chain and threads.yaml metadata", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "Start work", location: null } },
|
||||
$START: {
|
||||
new: { role: "worker", prompt: "Start work", location: null },
|
||||
resume: { role: "worker", prompt: "Resume work", location: null },
|
||||
},
|
||||
worker: {
|
||||
needs_input: {
|
||||
role: "$SUSPEND",
|
||||
|
||||
@@ -55,7 +55,10 @@ describe("suspended thread display", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "Start work", location: null } },
|
||||
$START: {
|
||||
new: { role: "worker", prompt: "Start work", location: null },
|
||||
resume: { role: "worker", prompt: "Resume work", location: null },
|
||||
},
|
||||
worker: {
|
||||
needs_input: {
|
||||
role: "$SUSPEND",
|
||||
@@ -162,7 +165,10 @@ describe("suspended thread display", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "Start work", location: null } },
|
||||
$START: {
|
||||
new: { role: "worker", prompt: "Start work", location: null },
|
||||
resume: { role: "worker", prompt: "Resume work", location: null },
|
||||
},
|
||||
worker: {
|
||||
needs_input: {
|
||||
role: "$SUSPEND",
|
||||
@@ -248,7 +254,10 @@ describe("suspended thread display", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "Start work", location: null } },
|
||||
$START: {
|
||||
new: { role: "worker", prompt: "Start work", location: null },
|
||||
resume: { role: "worker", prompt: "Resume work", location: null },
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
@@ -51,7 +51,10 @@ function makeWorkflow(overrides?: Partial<WorkflowPayload>): WorkflowPayload {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "writer", prompt: "Begin writing", location: null } },
|
||||
$START: {
|
||||
new: { role: "writer", prompt: "Begin writing", location: null },
|
||||
resume: { role: "writer", prompt: "Review previous output and continue", location: null },
|
||||
},
|
||||
writer: { done: { role: "reviewer", prompt: "Review this: {{{plan}}}", location: null } },
|
||||
reviewer: {
|
||||
approved: { role: "$END", prompt: "Done: {{{summary}}}", location: null },
|
||||
@@ -135,27 +138,38 @@ describe("Suite 2: Graph Structure", () => {
|
||||
expect(errors.some((e) => e.includes("$START must be defined in graph"))).toBe(true);
|
||||
});
|
||||
|
||||
test("2.2 $START has multiple status keys", () => {
|
||||
test("2.2 $START missing resume edge", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.$START = {
|
||||
_: { role: "writer", prompt: "Begin", location: null },
|
||||
other: { role: "reviewer", prompt: "Also", location: null },
|
||||
new: { role: "writer", prompt: "Begin", location: null },
|
||||
};
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(
|
||||
errors.some((e) => e.includes('$START must have exactly one edge with status "_"')),
|
||||
errors.some((e) => e.includes('$START must have edges with statuses "new" and "resume"')),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("2.3 $START edge uses non-_ status", () => {
|
||||
test("2.3 $START missing new edge", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.$START = { ready: { role: "writer", prompt: "Begin", location: null } };
|
||||
wf.graph.$START = {
|
||||
resume: { role: "writer", prompt: "Resume", location: null },
|
||||
};
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(
|
||||
errors.some((e) => e.includes('$START must have exactly one edge with status "_"')),
|
||||
errors.some((e) => e.includes('$START must have edges with statuses "new" and "resume"')),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("2.3b $START with new and resume passes", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.$START = {
|
||||
new: { role: "writer", prompt: "Begin", location: null },
|
||||
resume: { role: "writer", prompt: "Resume", location: null },
|
||||
};
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors.some((e) => e.includes("$START must have edges"))).toBe(false);
|
||||
});
|
||||
|
||||
test("2.4 $END has outgoing edges", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.$END = { _: { role: "writer", prompt: "Loop", location: null } };
|
||||
@@ -193,15 +207,18 @@ describe("Suite 2: Graph Structure", () => {
|
||||
});
|
||||
|
||||
describe("Suite 3: Status-Edge Consistency", () => {
|
||||
test("3.1 user role using _ graph key is rejected", () => {
|
||||
test("3.1 user role using _ graph key is treated as an unknown status", () => {
|
||||
// "_" is no longer special-cased — it's just a status key that does not
|
||||
// match the role's $status enum, so it surfaces as extra/missing keys.
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.writer = { _: { role: "reviewer", prompt: "Review", location: null } };
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(
|
||||
errors.some((e) =>
|
||||
e.includes('role "writer" must use explicit $status keys in graph, not "_"'),
|
||||
),
|
||||
).toBe(true);
|
||||
expect(errors.some((e) => e.includes('role "writer" graph has extra status keys: _'))).toBe(
|
||||
true,
|
||||
);
|
||||
expect(errors.some((e) => e.includes('role "writer" graph is missing status keys: done'))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
test("3.2 user role graph key not matching $status enum", () => {
|
||||
@@ -240,13 +257,16 @@ describe("Suite 3: Status-Edge Consistency", () => {
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
test("3.5 multi-exit role with _ key", () => {
|
||||
test("3.5 multi-exit role with _ key is treated as an unknown status", () => {
|
||||
const wf = makeWorkflow();
|
||||
wf.graph.reviewer = { _: { role: "$END", prompt: "Done", location: null } };
|
||||
const errors = validateWorkflow(wf);
|
||||
expect(errors.some((e) => e.includes('role "reviewer" graph has extra status keys: _'))).toBe(
|
||||
true,
|
||||
);
|
||||
expect(
|
||||
errors.some((e) =>
|
||||
e.includes('role "reviewer" must use explicit $status keys in graph, not "_"'),
|
||||
e.includes('role "reviewer" graph is missing status keys: approved, rejected'),
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
@@ -38,7 +38,10 @@ function makeMinimalPayload(name: string, description: string): WorkflowPayload
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: { _: { role: "worker", prompt: "start working", location: null } },
|
||||
$START: {
|
||||
new: { role: "worker", prompt: "start working", location: null },
|
||||
resume: { role: "worker", prompt: "resume working", location: null },
|
||||
},
|
||||
worker: { done: { role: "$END", prompt: "done", location: null } },
|
||||
},
|
||||
};
|
||||
|
||||
+4
-20
@@ -8,9 +8,7 @@ import {
|
||||
cmdPromptAdapterDeveloping,
|
||||
cmdPromptBootstrap,
|
||||
cmdPromptList,
|
||||
cmdPromptSetup,
|
||||
cmdPromptUsage,
|
||||
cmdPromptUsageReference,
|
||||
cmdPromptWorkflowAuthoring,
|
||||
} from "./commands/prompt.js";
|
||||
import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js";
|
||||
@@ -509,23 +507,16 @@ prompt.addHelpCommand(false);
|
||||
|
||||
prompt
|
||||
.command("usage")
|
||||
.description("Print the complete skill content (all references combined)")
|
||||
.description("Print the usage reference (CLI guide + typical workflows)")
|
||||
.action(() => {
|
||||
console.log(cmdPromptUsage());
|
||||
});
|
||||
|
||||
prompt
|
||||
.command("setup")
|
||||
.description("Print setup instructions for installing the uwf skill")
|
||||
.command("bootstrap")
|
||||
.description("Print setup instructions for installing uwf skills")
|
||||
.action(() => {
|
||||
console.log(cmdPromptSetup());
|
||||
});
|
||||
|
||||
prompt
|
||||
.command("usage-reference")
|
||||
.description("Print the usage reference (CLI guide + typical workflows)")
|
||||
.action(() => {
|
||||
console.log(cmdPromptUsageReference());
|
||||
console.log(cmdPromptBootstrap());
|
||||
});
|
||||
|
||||
prompt
|
||||
@@ -542,13 +533,6 @@ prompt
|
||||
console.log(cmdPromptAdapterDeveloping());
|
||||
});
|
||||
|
||||
prompt
|
||||
.command("bootstrap")
|
||||
.description("Print the bootstrap skill YAML for Hermes agents")
|
||||
.action(() => {
|
||||
console.log(cmdPromptBootstrap());
|
||||
});
|
||||
|
||||
prompt
|
||||
.command("list")
|
||||
.description("List all available prompt names")
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
import {
|
||||
generateAdapterDevelopingReference,
|
||||
generateBootstrapReference,
|
||||
generateUsageReference,
|
||||
generateWorkflowAuthoringReference,
|
||||
VERSION,
|
||||
} from "@united-workforce/util";
|
||||
|
||||
export {
|
||||
generateAdapterDevelopingReference as cmdPromptAdapterDeveloping,
|
||||
generateBootstrapReference as cmdPromptBootstrap,
|
||||
generateUsageReference as cmdPromptUsageReference,
|
||||
generateUsageReference as cmdPromptUsage,
|
||||
generateWorkflowAuthoringReference as cmdPromptWorkflowAuthoring,
|
||||
};
|
||||
|
||||
@@ -16,24 +15,16 @@ const PROMPT_ENTRIES: ReadonlyArray<{ name: string; generate: () => string }> =
|
||||
{ name: "usage", generate: generateUsageReference },
|
||||
{ name: "workflow-authoring", generate: generateWorkflowAuthoringReference },
|
||||
{ name: "adapter-developing", generate: generateAdapterDevelopingReference },
|
||||
{ name: "bootstrap", generate: generateBootstrapReference },
|
||||
];
|
||||
|
||||
export function cmdPromptList(): ReadonlyArray<string> {
|
||||
return PROMPT_ENTRIES.map((e) => e.name);
|
||||
}
|
||||
|
||||
export function cmdPromptUsage(): string {
|
||||
return PROMPT_ENTRIES.filter((e) => e.name !== "bootstrap")
|
||||
.map((e) => e.generate())
|
||||
.join("\n\n---\n\n");
|
||||
}
|
||||
export function cmdPromptBootstrap(): string {
|
||||
return `# uwf Skill Bootstrap
|
||||
|
||||
export function cmdPromptSetup(): string {
|
||||
return `# uwf Skill Setup
|
||||
|
||||
You are being asked to install or update the uwf (United Workforce) skill
|
||||
so that you know how to use the \`uwf\` CLI for workflow orchestration.
|
||||
Install or update the uwf (United Workforce) skills for your agent.
|
||||
|
||||
## Steps
|
||||
|
||||
@@ -42,40 +33,30 @@ so that you know how to use the \`uwf\` CLI for workflow orchestration.
|
||||
uwf --help
|
||||
\`\`\`
|
||||
If not installed: \`npm install -g @united-workforce/cli\`
|
||||
Current version: ${VERSION}
|
||||
|
||||
2. **Clean up old versions of the skill** (if any exist):
|
||||
- Look for any existing uwf/workflow skill files in your skill directories
|
||||
(e.g. \`~/.hermes/skills/uwf/SKILL.md\` or similar paths).
|
||||
- If a skill exists, read its \`version\` from the YAML frontmatter.
|
||||
- If the version matches the current CLI version (\`uwf --version\`), you're done — no update needed.
|
||||
- If the version is older, **delete the old skill file** before proceeding.
|
||||
2. **Install the skills** by running each command below and creating a skill from its output:
|
||||
- Run \`uwf prompt usage\` → create a skill named \`uwf-usage\`
|
||||
- Run \`uwf prompt workflow-authoring\` → create a skill named \`uwf-workflow-authoring\`
|
||||
- Run \`uwf prompt adapter-developing\` → create a skill named \`uwf-adapter-developing\`
|
||||
|
||||
3. **Install the bootstrap skill:**
|
||||
\`\`\`bash
|
||||
uwf prompt bootstrap > ~/.hermes/skills/uwf/SKILL.md
|
||||
\`\`\`
|
||||
The bootstrap prompt already includes complete YAML frontmatter — no editing needed.
|
||||
Each command outputs a complete SKILL.md with YAML frontmatter — use your agent framework's skill creation API to save them.
|
||||
|
||||
4. **Verify** the skill is loadable by your agent framework.
|
||||
3. **Verify** the skills are loadable by your agent framework.
|
||||
|
||||
## Individual prompts
|
||||
## Updating
|
||||
|
||||
Each prompt outputs a complete SKILL.md with frontmatter — pipe directly to a file:
|
||||
When \`uwf\` is upgraded, re-run \`uwf prompt bootstrap\` and follow the steps again.
|
||||
The skill content is bundled with the CLI — always use \`uwf prompt <name>\` to get
|
||||
content matching your installed version.
|
||||
|
||||
## Available prompts
|
||||
|
||||
\`\`\`bash
|
||||
uwf prompt list # list available prompt names
|
||||
uwf prompt usage > ~/.hermes/skills/uwf-usage/SKILL.md # CLI usage guide
|
||||
uwf prompt workflow-authoring > ~/.hermes/skills/uwf-workflow-authoring/SKILL.md
|
||||
uwf prompt adapter-developing > ~/.hermes/skills/uwf-adapter-developing/SKILL.md
|
||||
uwf prompt bootstrap > ~/.hermes/skills/uwf/SKILL.md # bootstrap skill
|
||||
uwf prompt usage # CLI usage guide
|
||||
uwf prompt workflow-authoring # workflow YAML design guide
|
||||
uwf prompt adapter-developing # building agent adapters
|
||||
\`\`\`
|
||||
|
||||
## Notes
|
||||
|
||||
- The skill content is bundled with the CLI and versioned with it — always use
|
||||
\`uwf prompt usage\` to get the content matching your installed version.
|
||||
- Do NOT hand-edit the skill body. If the CLI is updated, re-run \`uwf prompt setup\`
|
||||
and follow the steps again.
|
||||
- When upgrading, always delete the old skill first to avoid stale instructions.
|
||||
`;
|
||||
}
|
||||
|
||||
@@ -911,7 +911,7 @@ function resolveEvaluateArgs(
|
||||
chain: ChainState,
|
||||
): { lastRole: string; lastOutput: EvaluateLastOutput } {
|
||||
if (chain.headIsStart) {
|
||||
return { lastRole: START_ROLE, lastOutput: { [STATUS_KEY]: "_" } };
|
||||
return { lastRole: START_ROLE, lastOutput: { [STATUS_KEY]: "new" } };
|
||||
}
|
||||
|
||||
const lastStep = chain.stepsNewestFirst[0];
|
||||
@@ -961,6 +961,12 @@ function resolveAgentConfig(
|
||||
agentOverride: string | null,
|
||||
): AgentConfig {
|
||||
if (agentOverride !== null) {
|
||||
// Try config alias first (e.g. "hermes" → config.agents.hermes),
|
||||
// then fall back to raw command name (e.g. "uwf-hermes" or "/usr/bin/agent").
|
||||
const fromAlias = config.agents[agentOverride as AgentAlias];
|
||||
if (fromAlias !== undefined) {
|
||||
return fromAlias;
|
||||
}
|
||||
return parseAgentOverride(agentOverride);
|
||||
}
|
||||
|
||||
@@ -1031,7 +1037,6 @@ function archiveThread(uwf: UwfStore, threadId: ThreadId, _workflow: CasRef, _he
|
||||
completeThread(uwf.varStore, threadId, "completed");
|
||||
}
|
||||
|
||||
// biome-ignore lint/complexity/noExcessiveCognitiveComplexity: orchestration function with inherent branching
|
||||
export async function cmdThreadResume(
|
||||
storageRoot: string,
|
||||
threadId: ThreadId,
|
||||
@@ -1095,7 +1100,7 @@ export async function cmdThreadResume(
|
||||
|
||||
// status === "completed"
|
||||
const workflow = loadWorkflowPayload(uwf, workflowHash);
|
||||
const startResult = evaluate(workflow.graph, START_ROLE, {});
|
||||
const startResult = evaluate(workflow.graph, START_ROLE, { [STATUS_KEY]: "resume" });
|
||||
if (!startResult.ok) {
|
||||
fail(`failed to evaluate $START: ${startResult.error.message}`);
|
||||
}
|
||||
@@ -1107,11 +1112,7 @@ export async function cmdThreadResume(
|
||||
}
|
||||
|
||||
const startRole = startResult.value.role;
|
||||
const completedPromptPrefix = "Previous run completed. Resuming with additional context.";
|
||||
const completedResumePrompt =
|
||||
supplement !== null && supplement !== ""
|
||||
? `${completedPromptPrefix}\n\n${supplement}`
|
||||
: completedPromptPrefix;
|
||||
const completedResumePrompt = buildResumePrompt(startResult.value.prompt, supplement);
|
||||
|
||||
const updatedEntry = { ...entry, status: "idle" as const, completedAt: null };
|
||||
setThread(uwf.varStore, threadId, updatedEntry);
|
||||
|
||||
@@ -6,11 +6,11 @@ describe("Edge prompt template variable resolution", () => {
|
||||
test("returns error when rendered prompt is empty string", () => {
|
||||
const graph = {
|
||||
$START: {
|
||||
_: { role: "classifier", prompt: "{{{userPrompt}}}", location: null },
|
||||
new: { role: "classifier", prompt: "{{{userPrompt}}}", location: null },
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "$START", {});
|
||||
const result = evaluate(graph, "$START", { $status: "new" });
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (!result.ok) {
|
||||
@@ -22,11 +22,11 @@ describe("Edge prompt template variable resolution", () => {
|
||||
test("returns error when rendered prompt is whitespace-only", () => {
|
||||
const graph = {
|
||||
$START: {
|
||||
_: { role: "classifier", prompt: " {{{userPrompt}}} ", location: null },
|
||||
new: { role: "classifier", prompt: " {{{userPrompt}}} ", location: null },
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "$START", {});
|
||||
const result = evaluate(graph, "$START", { $status: "new" });
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
if (!result.ok) {
|
||||
@@ -38,11 +38,11 @@ describe("Edge prompt template variable resolution", () => {
|
||||
test("succeeds when all template variables resolve to non-empty values", () => {
|
||||
const graph = {
|
||||
$START: {
|
||||
_: { role: "classifier", prompt: "{{{userPrompt}}}", location: null },
|
||||
new: { role: "classifier", prompt: "{{{userPrompt}}}", location: null },
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "$START", { userPrompt: "Fix the bug" });
|
||||
const result = evaluate(graph, "$START", { $status: "new", userPrompt: "Fix the bug" });
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (result.ok) {
|
||||
@@ -53,11 +53,11 @@ describe("Edge prompt template variable resolution", () => {
|
||||
test("succeeds with static (no-variable) prompt", () => {
|
||||
const graph = {
|
||||
$START: {
|
||||
_: { role: "classifier", prompt: "Classify this input", location: null },
|
||||
new: { role: "classifier", prompt: "Classify this input", location: null },
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "$START", {});
|
||||
const result = evaluate(graph, "$START", { $status: "new" });
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (result.ok) {
|
||||
@@ -68,11 +68,11 @@ describe("Edge prompt template variable resolution", () => {
|
||||
test("succeeds when prompt has mix of static text and unresolved variables", () => {
|
||||
const graph = {
|
||||
$START: {
|
||||
_: { role: "classifier", prompt: "Please handle: {{{userPrompt}}}", location: null },
|
||||
new: { role: "classifier", prompt: "Please handle: {{{userPrompt}}}", location: null },
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "$START", {});
|
||||
const result = evaluate(graph, "$START", { $status: "new" });
|
||||
|
||||
expect(result.ok).toBe(true);
|
||||
if (result.ok) {
|
||||
@@ -83,11 +83,11 @@ describe("Edge prompt template variable resolution", () => {
|
||||
test("returns error when ALL variables missing and no static text remains", () => {
|
||||
const graph = {
|
||||
$START: {
|
||||
_: { role: "classifier", prompt: "{{{a}}}{{{b}}}", location: null },
|
||||
new: { role: "classifier", prompt: "{{{a}}}{{{b}}}", location: null },
|
||||
},
|
||||
};
|
||||
|
||||
const result = evaluate(graph, "$START", {});
|
||||
const result = evaluate(graph, "$START", { $status: "new" });
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
});
|
||||
|
||||
@@ -6,10 +6,7 @@ import type { EvaluateResult, Result } from "./types.js";
|
||||
// Disable HTML escaping — prompts are plain text, not HTML.
|
||||
mustache.escape = (text: string) => text;
|
||||
|
||||
const START_ROLE = "$START";
|
||||
const SUSPEND_ROLE = "$SUSPEND";
|
||||
// $START is a special entry node with no agent output — it always uses this key.
|
||||
const START_STATUS = "_";
|
||||
|
||||
type LastOutput = Record<string, unknown>;
|
||||
|
||||
@@ -21,9 +18,7 @@ export function evaluate(
|
||||
lastOutput: LastOutput,
|
||||
): Result<EvaluateResult, Error> {
|
||||
let status: string;
|
||||
if (lastRole === START_ROLE) {
|
||||
status = START_STATUS;
|
||||
} else if (typeof lastOutput[STATUS_KEY] === "string") {
|
||||
if (typeof lastOutput[STATUS_KEY] === "string") {
|
||||
status = lastOutput[STATUS_KEY] as string;
|
||||
} else {
|
||||
return {
|
||||
|
||||
@@ -97,9 +97,9 @@ function checkGraphStructure(payload: WorkflowPayload, errors: string[]): void {
|
||||
if (!graphNodes.has("$START")) {
|
||||
errors.push("$START must be defined in graph");
|
||||
} else {
|
||||
const startKeys = Object.keys(payload.graph.$START);
|
||||
if (startKeys.length !== 1 || startKeys[0] !== "_") {
|
||||
errors.push('$START must have exactly one edge with status "_"');
|
||||
const startKeys = new Set(Object.keys(payload.graph.$START));
|
||||
if (!startKeys.has("new") || !startKeys.has("resume")) {
|
||||
errors.push('$START must have edges with statuses "new" and "resume"');
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,22 +190,13 @@ function checkOneOfDiscriminant(
|
||||
}
|
||||
}
|
||||
|
||||
/** Check status-edge consistency for a user role. "_" is reserved for $START and rejected here. */
|
||||
/** Check status-edge consistency for a user role. */
|
||||
function checkStatusEdges(
|
||||
roleName: string,
|
||||
graphKeys: Set<string>,
|
||||
statusSet: Set<string>,
|
||||
errors: string[],
|
||||
): void {
|
||||
if (graphKeys.has("_")) {
|
||||
errors.push(`role "${roleName}" must use explicit $status keys in graph, not "_"`);
|
||||
return;
|
||||
}
|
||||
if (statusSet.has("_")) {
|
||||
errors.push(`role "${roleName}" $status enum must use explicit values, not "_"`);
|
||||
return;
|
||||
}
|
||||
|
||||
const extraKeys = [...graphKeys].filter((k) => !statusSet.has(k));
|
||||
const missingKeys = [...statusSet].filter((k) => !graphKeys.has(k));
|
||||
if (extraKeys.length > 0) {
|
||||
|
||||
@@ -57,13 +57,13 @@ function isGraph(value: unknown): boolean {
|
||||
if (!isRecord(value)) {
|
||||
return false;
|
||||
}
|
||||
return Object.entries(value).every(([node, statusMap]) => {
|
||||
return Object.values(value).every((statusMap) => {
|
||||
if (!isRecord(statusMap)) {
|
||||
return false;
|
||||
}
|
||||
return Object.entries(statusMap).every(([status, target]) => {
|
||||
// "_" is only valid as a status key for the $START entry node.
|
||||
if (status === "_" && node !== "$START") {
|
||||
// "_" is no longer a valid status key anywhere — $START uses "new"/"resume".
|
||||
if (status === "_") {
|
||||
return false;
|
||||
}
|
||||
return isTarget(target);
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
# @united-workforce/eval
|
||||
|
||||
## 0.1.2
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 850a3b2: fix: resolve --agent override via config alias before raw command
|
||||
|
||||
`resolveAgentConfig()` now checks `config.agents[alias]` first before falling back to `parseAgentOverride()`. Eval CLI default `--agent` changed from `"hermes"` to `"uwf-hermes"`.
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@united-workforce/eval",
|
||||
"version": "0.1.1",
|
||||
"version": "0.1.3",
|
||||
"private": false,
|
||||
"files": [
|
||||
"src",
|
||||
|
||||
@@ -7,12 +7,15 @@ import {
|
||||
registerRunCommand,
|
||||
} from "./commands/index.js";
|
||||
|
||||
// eslint-disable-next-line -- dynamic import for version
|
||||
const pkg = await import("../package.json", { with: { type: "json" } });
|
||||
|
||||
const program = new Command();
|
||||
|
||||
program
|
||||
.name("uwf-eval")
|
||||
.description("Evaluate uwf workflow quality with real agents")
|
||||
.version("0.1.0");
|
||||
.version(pkg.default.version, "-V, --version");
|
||||
|
||||
registerRunCommand(program);
|
||||
registerReportCommand(program);
|
||||
|
||||
@@ -52,7 +52,7 @@ export function registerRunCommand(program: Command): void {
|
||||
program
|
||||
.command("run <task>")
|
||||
.description("Run eval on a task directory or tarball")
|
||||
.option("--agent <name>", "agent adapter to use", "hermes")
|
||||
.option("--agent <name>", "agent adapter to use", "uwf-hermes")
|
||||
.option("--model <model>", "model override")
|
||||
.option("--count <n>", "number of eval runs", "1")
|
||||
.action(async (task: string, opts: RunCliOptions) => {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@united-workforce/util",
|
||||
"version": "0.1.0",
|
||||
"version": "0.1.2",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
export function generateBootstrapReference(): string {
|
||||
return `---
|
||||
name: uwf
|
||||
description: "United Workforce (uwf) — YAML 状态机工作流引擎。任务涉及 workflow 时加载此 skill。"
|
||||
tags: [workflow, uwf]
|
||||
triggers:
|
||||
- uwf
|
||||
- workflow
|
||||
- 工作流
|
||||
---
|
||||
|
||||
# uwf (United Workforce)
|
||||
|
||||
YAML 状态机工作流引擎。当用户提到「workflow」「工作流」时,指的是 **uwf workflow**(YAML 定义的状态机),不是 Hermes skill。用 \`uwf\` CLI 操作,不要混淆。
|
||||
|
||||
## 首次使用
|
||||
|
||||
运行以下命令获取完整用法:
|
||||
|
||||
\`\`\`bash
|
||||
uwf prompt usage # 完整用法文档(所有引用合并)
|
||||
uwf prompt workflow-authoring # workflow 编写指南(role 定义、graph 路由、schema)
|
||||
uwf prompt adapter-developing # adapter 开发指南(构建新的 agent adapter)
|
||||
\`\`\`
|
||||
|
||||
## 快速参考
|
||||
|
||||
\`\`\`bash
|
||||
uwf workflow list # 查看已注册 workflow
|
||||
uwf workflow add <file.yaml> # 注册 workflow
|
||||
uwf thread start <workflow> -p "prompt" # 创建 thread
|
||||
uwf thread exec <thread-id> -c 10 # 执行最多 10 步
|
||||
uwf thread list # 查看所有 thread
|
||||
\`\`\`
|
||||
|
||||
## 示例 workflow
|
||||
|
||||
参考项目 \`examples/\` 目录下的 YAML 文件(analyze-topic、debate、solve-issue)。
|
||||
`;
|
||||
}
|
||||
@@ -2,7 +2,6 @@ export { generateActorReference } from "./actor-reference.js";
|
||||
export { generateAdapterDevelopingReference } from "./adapter-developing-reference.js";
|
||||
export { generateArchitectureReference } from "./architecture-reference.js";
|
||||
export { encodeUint64AsCrockford } from "./base32.js";
|
||||
export { generateBootstrapReference } from "./bootstrap-reference.js";
|
||||
export { generateCliReference } from "./cli-reference.js";
|
||||
export { env } from "./env.js";
|
||||
export type {
|
||||
@@ -16,7 +15,7 @@ export {
|
||||
validateFrontmatter,
|
||||
} from "./frontmatter-markdown/index.js";
|
||||
export { createLogger } from "./logger.js";
|
||||
export { generateModeratorReference } from "./moderator-reference.js";
|
||||
|
||||
export type {
|
||||
CreateProcessLoggerOptions,
|
||||
ProcessLogFn,
|
||||
@@ -36,4 +35,3 @@ export { extractUlidTimestamp, generateUlid } from "./ulid.js";
|
||||
export { generateUsageReference } from "./usage-reference.js";
|
||||
export { VERSION } from "./version.js";
|
||||
export { generateWorkflowAuthoringReference } from "./workflow-authoring-reference.js";
|
||||
export { generateYamlReference } from "./yaml-reference.js";
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
export function generateModeratorReference(): string {
|
||||
return `# Moderator Reference
|
||||
|
||||
## Overview
|
||||
|
||||
The moderator is the workflow engine's routing component. It evaluates the directed graph defined in the workflow YAML to determine the next role (or \`$END\`) after each step — with zero LLM cost.
|
||||
|
||||
## Status-Based Routing
|
||||
|
||||
The moderator uses **status-based routing**: it inspects the previous step's extracted output (specifically the \`$status\` field) and looks up the corresponding edge in the graph.
|
||||
|
||||
### Graph Structure
|
||||
|
||||
The graph is a nested map: \`Record<Role | "$START", Record<Status, Target>>\`. Each role maps its possible \`$status\` values to a target with a \`role\` and \`prompt\`:
|
||||
|
||||
\`\`\`yaml
|
||||
graph:
|
||||
$START:
|
||||
_: { role: planner, prompt: "Analyze the issue." }
|
||||
planner:
|
||||
ready: { role: developer, prompt: "Implement the plan (CAS hash: {{{plan}}})." }
|
||||
insufficient_info: { role: $END, prompt: "Not enough info." }
|
||||
developer:
|
||||
done: { role: reviewer, prompt: "Review branch {{{branch}}} at {{{worktree}}}." }
|
||||
failed: { role: $END, prompt: "Developer failed: {{{reason}}}." }
|
||||
reviewer:
|
||||
approved: { role: tester, prompt: "Run tests on {{{branch}}} at {{{worktree}}}." }
|
||||
rejected: { role: developer, prompt: "Fix issues: {{{comments}}}." }
|
||||
\`\`\`
|
||||
|
||||
### Routing Algorithm
|
||||
|
||||
1. Look up \`graph[lastRole]\` to get the status map for the current role
|
||||
2. Look up \`statusMap[lastOutput.$status]\` to get the target
|
||||
3. If target role is \`$END\`, mark thread as completed
|
||||
4. Otherwise, render the edge prompt (Mustache templates with \`{{{field}}}\` from output) and spawn the next agent
|
||||
|
||||
### Edge Prompts and Mustache Templates
|
||||
|
||||
Edge prompts use triple-brace Mustache syntax (\`{{{field}}}\`) to interpolate values from the previous step's output into the next agent's task prompt. This passes structured data (branch names, file paths, CAS hashes) between roles without manual wiring.
|
||||
|
||||
## Special Nodes
|
||||
|
||||
- \`$START\` — entry point; uses status key \`_\` (unconditional) since there is no previous output
|
||||
- \`$END\` — terminal node; thread completes when reached and is moved to history
|
||||
|
||||
## Integration with Steps
|
||||
|
||||
Each \`uwf thread exec\` cycle:
|
||||
1. Moderator reads the thread's head step output
|
||||
2. Looks up \`graph[lastRole][output.$status]\` to pick the next role
|
||||
3. If next is \`$END\`, marks thread as completed
|
||||
4. Otherwise, renders the edge prompt and spawns the agent for the selected role
|
||||
5. Extract pipeline parses agent output → new step node → append to CAS chain
|
||||
`;
|
||||
}
|
||||
@@ -1,2 +1,9 @@
|
||||
// This version is kept in sync with package.json during releases.
|
||||
export const VERSION = "0.1.0";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const pkg = JSON.parse(readFileSync(join(__dirname, "..", "package.json"), "utf-8")) as {
|
||||
version: string;
|
||||
};
|
||||
export const VERSION = pkg.version;
|
||||
|
||||
@@ -40,7 +40,8 @@ roles: # named actors
|
||||
|
||||
graph: # status-based routing
|
||||
$START:
|
||||
_: { role: planner, prompt: "Analyze the issue." }
|
||||
new: { role: planner, prompt: "Analyze the issue." }
|
||||
resume: { role: planner, prompt: "Review the previous run output and continue." }
|
||||
planner:
|
||||
ready: { role: developer, prompt: "Implement {{{plan}}}." }
|
||||
failed: { role: $END, prompt: "Failed: {{{error}}}" }
|
||||
@@ -113,7 +114,7 @@ graph[role][$status] → { role: nextRole, prompt: edgePrompt }
|
||||
|
||||
| Node | Purpose |
|
||||
|------|---------|
|
||||
| \`$START\` | Entry point — status key is always \`_\` (unconditional) |
|
||||
| \`$START\` | Entry point — status keys \`new\` (first start) and \`resume\` (resuming a completed thread) |
|
||||
| \`$END\` | Terminal — thread completes and is archived |
|
||||
|
||||
### Edge Prompts
|
||||
@@ -178,7 +179,7 @@ ocas get <output-hash>
|
||||
1. Every \`$status\` value in a role's frontmatter has a matching edge in the graph
|
||||
2. Every field referenced in edge prompts (\`{{{field}}}\`) exists in the source role's schema
|
||||
3. Every role referenced in the graph exists in \`roles\`
|
||||
4. \`$START\` has exactly one edge with key \`_\`
|
||||
4. \`$START\` has edges with keys \`new\` and \`resume\`
|
||||
5. At least one path leads to \`$END\`
|
||||
6. No orphan roles (defined but never routed to)
|
||||
|
||||
|
||||
@@ -1,82 +0,0 @@
|
||||
export function generateYamlReference(): string {
|
||||
return `# Workflow YAML Schema Reference
|
||||
|
||||
## Top-Level Structure
|
||||
|
||||
A workflow YAML file defines the complete workflow specification:
|
||||
|
||||
\`\`\`yaml
|
||||
name: solve-issue # verb-first kebab-case identifier
|
||||
description: "..." # human-readable description
|
||||
|
||||
roles: # named actors in the workflow
|
||||
planner:
|
||||
description: "Analyzes issue and outputs a plan"
|
||||
goal: "You are a planning agent."
|
||||
capabilities:
|
||||
- issue-analysis
|
||||
- planning
|
||||
procedure: |
|
||||
1. Read the issue
|
||||
2. Produce a test spec
|
||||
output: "Output the plan summary. Set $status to ready or insufficient_info."
|
||||
frontmatter: # JSON Schema for structured output (drives routing)
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: ready }
|
||||
plan: { type: string }
|
||||
required: [$status, plan]
|
||||
- properties:
|
||||
$status: { const: insufficient_info }
|
||||
required: [$status]
|
||||
|
||||
graph: # status-based routing (nested map)
|
||||
$START:
|
||||
_: { role: planner, prompt: "Analyze the issue." }
|
||||
planner:
|
||||
ready: { role: developer, prompt: "Implement plan {{{plan}}}." }
|
||||
insufficient_info: { role: $END, prompt: "Not enough info." }
|
||||
\`\`\`
|
||||
|
||||
## roles
|
||||
|
||||
Each role defines an actor in the workflow:
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| \`description\` | string | Short description of the role's purpose |
|
||||
| \`goal\` | string | System-level goal statement for the agent |
|
||||
| \`capabilities\` | string[] | Tags describing what the role can do |
|
||||
| \`procedure\` | string | Step-by-step instructions for the agent |
|
||||
| \`output\` | string | Description of expected output format |
|
||||
| \`frontmatter\` | JSON Schema | Defines the structured output the agent must produce |
|
||||
|
||||
### frontmatter
|
||||
|
||||
The \`frontmatter\` field is a standard JSON Schema object. The extract pipeline validates agent output against it. Key conventions:
|
||||
- \`$status\` field drives routing decisions in the graph
|
||||
- Use \`const\` or \`enum\` to constrain status values
|
||||
- Use \`oneOf\` to define multiple valid output shapes (one per status)
|
||||
- All \`required\` fields must appear in the agent's frontmatter output
|
||||
|
||||
## graph
|
||||
|
||||
The graph is a nested map defining status-based routing:
|
||||
|
||||
\`\`\`
|
||||
Record<Role | "$START", Record<Status, { role: string, prompt: string }>>
|
||||
\`\`\`
|
||||
|
||||
| Level | Key | Value |
|
||||
|-------|-----|-------|
|
||||
| Outer | Role name or \`$START\` | Status map for that role |
|
||||
| Inner | \`$status\` value (or \`_\` for unconditional) | Target: \`{ role, prompt }\` |
|
||||
|
||||
### Special Nodes
|
||||
- \`$START\` — entry point; uses status key \`_\` (unconditional, no previous output)
|
||||
- \`$END\` — terminal node; thread completes when reached
|
||||
|
||||
### Edge Prompts
|
||||
Prompts use triple-brace Mustache templates (\`{{{field}}}\`) to interpolate values from the previous step's output. Example: \`"Implement plan {{{plan}}} in repo {{{repoPath}}}."\`
|
||||
`;
|
||||
}
|
||||
@@ -21,9 +21,12 @@ graph:
|
||||
role: package-metadata
|
||||
prompt: Biome setup failed ({{{reason}}}), but continue. Standardize package metadata for repo at {{{repoPath}}}.
|
||||
$START:
|
||||
_:
|
||||
new:
|
||||
role: workspace
|
||||
prompt: Set up bun workspace structure for repo at {{{repoPath}}}.
|
||||
resume:
|
||||
role: workspace
|
||||
prompt: Review the previous run output and continue setting up the bun workspace structure for repo at {{{repoPath}}}.
|
||||
release:
|
||||
done:
|
||||
role: testing
|
||||
|
||||
@@ -283,9 +283,12 @@ roles:
|
||||
- error
|
||||
graph:
|
||||
$START:
|
||||
_:
|
||||
new:
|
||||
role: planner
|
||||
prompt: Analyze the issue and produce an implementation plan.
|
||||
resume:
|
||||
role: planner
|
||||
prompt: Review the previous run output and continue the work.
|
||||
planner:
|
||||
insufficient_info:
|
||||
role: $SUSPEND
|
||||
|
||||
Reference in New Issue
Block a user