Compare commits
33 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 83c1eedc75 | |||
| cef4db9a87 | |||
| deac2336b6 | |||
| 10642fdc45 | |||
| 92020d2d78 | |||
| cd0a79d72b | |||
| 3b6aa6525f | |||
| 54631c43c7 | |||
| 655b57c4b5 | |||
| 7faa8184ae | |||
| 816137315e | |||
| 9a111d16c7 | |||
| ea6ceafe51 | |||
| d0dc7b5a19 | |||
| 3b81521e9d | |||
| aa0a23293f | |||
| 187dd036e5 | |||
| 4b45f4e6d1 | |||
| 2a6bce4918 | |||
| 3d6399c0e3 | |||
| b9258f84a5 | |||
| 638329a562 | |||
| 1a06e014f5 | |||
| d5d05334f5 | |||
| 844f5438fe | |||
| e329d74ec0 | |||
| f90614a622 | |||
| 68af555313 | |||
| 025695dbe9 | |||
| 96584e481f | |||
| 766ec7ddc2 | |||
| aeb7180e9d | |||
| 9b56f7b75e |
@@ -44,7 +44,7 @@ roles:
|
||||
- If bounced back from reviewer or tester, reuse the existing branch instead
|
||||
|
||||
Then implement TDD:
|
||||
3. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the latest planner step's meta.plan)
|
||||
3. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the latest planner step's frontmatter.plan)
|
||||
4. If bounced back from reviewer or tester: read the previous role's output to understand what needs fixing
|
||||
5. Write tests first based on the spec
|
||||
6. Implement the code to make tests pass
|
||||
@@ -99,7 +99,7 @@ roles:
|
||||
- testing
|
||||
procedure: |
|
||||
1. Run `bun test` for automated test verification
|
||||
2. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the latest planner step's meta.plan)
|
||||
2. Read the test spec from CAS: `uwf cas get <plan hash>` (find the hash from the latest planner step's frontmatter.plan)
|
||||
3. Verify each scenario in the spec is covered and passing
|
||||
4. Determine outcome:
|
||||
- passed: all scenarios verified, tests pass
|
||||
@@ -154,25 +154,43 @@ conditions:
|
||||
graph:
|
||||
$START:
|
||||
- role: "planner"
|
||||
condition: null
|
||||
prompt: "Analyze the issue and produce an implementation plan."
|
||||
planner:
|
||||
- role: "$END"
|
||||
condition: "insufficientInfo"
|
||||
prompt: "Insufficient information to proceed; end the workflow."
|
||||
- role: "developer"
|
||||
condition: null
|
||||
prompt: "Implement the plan from the planner."
|
||||
developer:
|
||||
- role: "$END"
|
||||
condition: "devFailed"
|
||||
prompt: "Development failed; end the workflow."
|
||||
- role: "reviewer"
|
||||
condition: null
|
||||
prompt: "Send the implementation to the reviewer."
|
||||
reviewer:
|
||||
- role: "developer"
|
||||
condition: "rejected"
|
||||
prompt: "Reviewer rejected the implementation; fix the issues."
|
||||
- role: "tester"
|
||||
condition: null
|
||||
prompt: "Review passed; run tests on the implementation."
|
||||
tester:
|
||||
- role: "developer"
|
||||
condition: "fixCode"
|
||||
prompt: "Tests found code issues; return to developer."
|
||||
- role: "planner"
|
||||
condition: "fixSpec"
|
||||
prompt: "Tests found spec issues; return to planner."
|
||||
- role: "committer"
|
||||
condition: null
|
||||
prompt: "Tests passed; commit and push the changes."
|
||||
committer:
|
||||
- role: "developer"
|
||||
condition: "hookFailed"
|
||||
prompt: "Push hook failed; return to developer to fix."
|
||||
- role: "$END"
|
||||
condition: null
|
||||
prompt: "Commit succeeded; complete the workflow."
|
||||
|
||||
@@ -0,0 +1,779 @@
|
||||
# Built-in Role Agent 调研
|
||||
|
||||
## 目标
|
||||
|
||||
实现一个内置的 role agent(暂称 `uwf-builtin`),不依赖 hermes/openclaw 等外部 agent 进程。
|
||||
直接使用 workflow config 中配置的 model,自己实现 agent run loop 和关键 toolkit。
|
||||
|
||||
---
|
||||
|
||||
## 关键问题
|
||||
|
||||
### Q1: Agent 接口协议
|
||||
|
||||
现有 agent 是怎么被 CLI 调用的?输入(argv、环境变量)和输出(stdout、CAS)格式是什么?
|
||||
|
||||
**调研要点:**
|
||||
- `cli-workflow` 里 `spawnAgent` 的完整实现
|
||||
- AgentConfig 类型定义
|
||||
- agent 进程的 exit code 约定
|
||||
- 环境变量传递(UWF_STORAGE_ROOT 等)
|
||||
|
||||
**答案:**
|
||||
|
||||
#### 调用链
|
||||
|
||||
`uwf thread step` → `cmdThreadStepOnce` → moderator 求值下一 role → `resolveAgentConfig` → `spawnAgent`。
|
||||
|
||||
#### AgentConfig 类型
|
||||
|
||||
```146:149:packages/workflow-protocol/src/types.ts
|
||||
export type AgentConfig = {
|
||||
command: string;
|
||||
args: string[];
|
||||
};
|
||||
```
|
||||
|
||||
在 `config.yaml` 的 `agents` 段注册,例如 `hermes: { command: "uwf-hermes", args: [] }`。
|
||||
|
||||
#### spawnAgent 行为
|
||||
|
||||
```627:653:packages/cli-workflow/src/commands/thread.ts
|
||||
function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): CasRef {
|
||||
const argv = [...agent.args, threadId, role];
|
||||
let stdout: string;
|
||||
try {
|
||||
stdout = execFileSync(agent.command, argv, {
|
||||
encoding: "utf8",
|
||||
env: process.env,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
} catch (e) {
|
||||
// ... stderr 拼进 fail 消息
|
||||
}
|
||||
|
||||
const line = stdout.trim().split("\n").pop()?.trim() ?? "";
|
||||
if (!isCasRef(line)) {
|
||||
fail(`agent stdout is not a valid CAS hash: ${line || "(empty)"}`);
|
||||
}
|
||||
return line;
|
||||
}
|
||||
```
|
||||
|
||||
| 项目 | 约定 |
|
||||
|------|------|
|
||||
| **argv** | `[...agent.args, <thread-id>, <role>]`,即 `process.argv[2]`=threadId,`process.argv[3]`=role(与 `createAgent` 的 `parseArgv` 一致) |
|
||||
| **stdin** | 忽略 |
|
||||
| **stdout** | 纯文本,**最后一行**必须是新 `StepNode` 的 CAS hash(13 字符 Crockford Base32) |
|
||||
| **stderr** | 失败时 CLI 会附带 stderr;成功时无约定 |
|
||||
| **exit code** | `0` = 成功;非 0 时 `execFileSync` 抛错,step 失败 |
|
||||
| **环境变量** | 继承父进程 `process.env`(含 storage root、API key 等) |
|
||||
| **链头更新** | **不由 agent 负责**;agent 只写 CAS StepNode,CLI 在拿到 stdout hash 后更新 `threads.yaml` |
|
||||
|
||||
Agent 解析优先级(`resolveAgentConfig`):
|
||||
|
||||
1. CLI `--agent` override(整段 command + args 字符串)
|
||||
2. `config.agentOverrides[workflow.name][role]`
|
||||
3. `config.defaultAgent`
|
||||
|
||||
#### 环境变量:Storage Root
|
||||
|
||||
文档中写的 `UWF_STORAGE_ROOT` **在当前代码中不存在**。实际优先级(`workflow-agent-kit` / `cli-workflow` 一致):
|
||||
|
||||
```33:43:packages/workflow-agent-kit/src/storage.ts
|
||||
export function resolveStorageRoot(): string {
|
||||
const internal = process.env.UNCAGED_WORKFLOW_STORAGE_ROOT;
|
||||
if (internal !== undefined && internal !== "") {
|
||||
return internal;
|
||||
}
|
||||
const userOverride = process.env.WORKFLOW_STORAGE_ROOT;
|
||||
if (userOverride !== undefined && userOverride !== "") {
|
||||
return userOverride;
|
||||
}
|
||||
return getDefaultStorageRoot();
|
||||
}
|
||||
```
|
||||
|
||||
Agent 子进程通过继承的 `process.env` 与父 CLI 共享同一 storage root;`createAgent` 内还会 `loadDotenv({ path: getEnvPath(storageRoot) })` 加载 `~/.uncaged/workflow/.env`。
|
||||
|
||||
#### Agent 侧职责(设计文档 + 实现)
|
||||
|
||||
- 读 `threads.yaml` 链头,构建 context,执行 role
|
||||
- 将 `StepNode` 写入 CAS(`output` / `detail` / `agent` / `prev` / `start`)
|
||||
- stdout 打印 step hash
|
||||
- **不**更新 `threads.yaml`
|
||||
|
||||
---
|
||||
|
||||
### Q2: createAgent 工厂
|
||||
|
||||
workflow-agent-kit 的 `createAgent` 做了什么?它的完整生命周期是什么?
|
||||
|
||||
**调研要点:**
|
||||
- `AgentOptions` 类型的 `run` 和 `continue` 回调签名
|
||||
- `AgentRunResult` 的完整定义
|
||||
- retry 逻辑(frontmatter 校验失败后的重试机制)
|
||||
- `persistStep` 写入 CAS 的 StepNode 结构
|
||||
|
||||
**答案:**
|
||||
|
||||
#### 类型定义
|
||||
|
||||
```4:35:packages/workflow-agent-kit/src/types.ts
|
||||
export type AgentContext = ModeratorContext & {
|
||||
threadId: ThreadId;
|
||||
role: string;
|
||||
store: Store;
|
||||
workflow: WorkflowPayload;
|
||||
outputFormatInstruction: string;
|
||||
};
|
||||
|
||||
export type AgentRunResult = {
|
||||
output: string;
|
||||
detailHash: CasRef;
|
||||
sessionId: string;
|
||||
};
|
||||
|
||||
export type AgentContinueFn = (
|
||||
sessionId: string,
|
||||
message: string,
|
||||
store: AgentContext["store"],
|
||||
) => Promise<AgentRunResult>;
|
||||
|
||||
export type AgentRunFn = (ctx: AgentContext) => Promise<AgentRunResult>;
|
||||
|
||||
export type AgentOptions = {
|
||||
name: string;
|
||||
run: AgentRunFn;
|
||||
continue: AgentContinueFn;
|
||||
};
|
||||
```
|
||||
|
||||
- **`run(ctx)`**:首次执行,返回原始 agent 文本 `output`、审计用 `detailHash`、用于续聊的 `sessionId`。
|
||||
- **`continue(sessionId, message, store)`**:在同一 session 上追加用户消息(用于 frontmatter 纠错),再次返回 `AgentRunResult`。
|
||||
|
||||
`createAgent(options)` 返回 `() => Promise<void>`,作为 agent CLI 的 `main`(见 `uwf-hermes` 的 `cli.ts`)。
|
||||
|
||||
#### 生命周期(按执行顺序)
|
||||
|
||||
```101:152:packages/workflow-agent-kit/src/run.ts
|
||||
export function createAgent(options: AgentOptions): () => Promise<void> {
|
||||
return async function main(): Promise<void> {
|
||||
const { threadId, role } = parseArgv(process.argv);
|
||||
const storageRoot = resolveStorageRoot();
|
||||
loadDotenv({ path: getEnvPath(storageRoot) });
|
||||
|
||||
const ctx = await buildContextWithMeta(threadId, role);
|
||||
// 1. 校验 role 存在
|
||||
// 2. 从 CAS 取 frontmatter JSON Schema → buildOutputFormatInstruction → ctx.outputFormatInstruction
|
||||
|
||||
let agentResult = await options.run(ctx);
|
||||
|
||||
let outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
|
||||
|
||||
for (let retry = 0; retry < MAX_FRONTMATTER_RETRIES && outputHash === null; retry++) {
|
||||
const correctionMessage = "Your previous response did not contain valid YAML frontmatter...";
|
||||
agentResult = await options.continue(agentResult.sessionId, correctionMessage, ctx.meta.store);
|
||||
outputHash = await tryExtractOutput(agentResult.output, roleDef.frontmatter, ctx);
|
||||
}
|
||||
|
||||
if (outputHash === null) { fail(...); }
|
||||
|
||||
const stepHash = await persistStep({ ctx, outputHash, detailHash: agentResult.detailHash, agentName });
|
||||
process.stdout.write(`${stepHash}\n`);
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
| 阶段 | 行为 |
|
||||
|------|------|
|
||||
| 解析 argv | `argv[2]=threadId`, `argv[3]=role`,缺失则 `stderr` + `exit(1)` |
|
||||
| Context | `buildContextWithMeta` + 可选 `outputFormatInstruction` |
|
||||
| Run | `options.run(ctx)` |
|
||||
| Extract | **仅** `tryFrontmatterFastPath`(见 Q4);**不**调用 `extract()` LLM fallback |
|
||||
| Retry | 最多 `MAX_FRONTMATTER_RETRIES = 2` 次 `continue` + 再试 fast-path |
|
||||
| Persist | `persistStep` → `writeStepNode` |
|
||||
| 输出 | stdout 一行 step CAS hash |
|
||||
|
||||
#### StepNode 写入结构
|
||||
|
||||
```44:68:packages/workflow-agent-kit/src/run.ts
|
||||
async function writeStepNode(options: {
|
||||
store: AgentStore["store"];
|
||||
schemas: AgentStore["schemas"];
|
||||
startHash: CasRef;
|
||||
prevHash: CasRef | null;
|
||||
role: string;
|
||||
outputHash: CasRef;
|
||||
detailHash: CasRef;
|
||||
agentName: string;
|
||||
}): Promise<CasRef> {
|
||||
const payload: StepNodePayload = {
|
||||
start: options.startHash,
|
||||
prev: options.prevHash,
|
||||
role: options.role,
|
||||
output: options.outputHash,
|
||||
detail: options.detailHash,
|
||||
agent: options.agentName,
|
||||
};
|
||||
// store.put(stepNode schema) + validate
|
||||
}
|
||||
```
|
||||
|
||||
`agentName` 经 `agentLabel(name)` 规范化:已有 `uwf-` 前缀则原样,否则加 `uwf-`(如 `hermes` → `uwf-hermes`)。
|
||||
|
||||
`prevHash`:若链头仍是 `StartNode` 则为 `null`,否则为当前 head step hash。
|
||||
|
||||
---
|
||||
|
||||
### Q3: Context Builder
|
||||
|
||||
`buildContextWithMeta` 构建了什么上下文给 agent?
|
||||
|
||||
**调研要点:**
|
||||
- `AgentContext` 完整类型定义(所有字段)
|
||||
- context 构建过程(CAS chain walk)
|
||||
- `outputFormatInstruction` 怎么生成的
|
||||
- role definition 怎么获取(从 workflow YAML)
|
||||
|
||||
**答案:**
|
||||
|
||||
#### AgentContext 字段
|
||||
|
||||
继承 `ModeratorContext`:
|
||||
|
||||
```60:68:packages/workflow-protocol/src/types.ts
|
||||
export type ModeratorContext = {
|
||||
start: StartNodePayload;
|
||||
steps: StepContext[];
|
||||
};
|
||||
```
|
||||
|
||||
```48:51:packages/workflow-protocol/src/types.ts
|
||||
export type StartNodePayload = {
|
||||
workflow: CasRef;
|
||||
prompt: string;
|
||||
};
|
||||
```
|
||||
|
||||
```61:63:packages/workflow-protocol/src/types.ts
|
||||
export type StepContext = Omit<StepRecord, "output"> & {
|
||||
output: unknown;
|
||||
};
|
||||
```
|
||||
|
||||
`AgentContext` 额外字段:
|
||||
|
||||
| 字段 | 类型 | 含义 |
|
||||
|------|------|------|
|
||||
| `threadId` | `ThreadId` | 当前线程 |
|
||||
| `role` | `string` | 本步要执行的角色名 |
|
||||
| `store` | `Store` | CAS store(读写节点) |
|
||||
| `workflow` | `WorkflowPayload` | 已从 CAS 加载的 workflow 定义 |
|
||||
| `outputFormatInstruction` | `string` | 由 `createAgent` 根据 role 的 frontmatter schema 生成;`buildContext*` 初始为 `""` |
|
||||
|
||||
`buildContextWithMeta` 还返回 `meta`:
|
||||
|
||||
```148:154:packages/workflow-agent-kit/src/context.ts
|
||||
export type BuildContextMeta = {
|
||||
storageRoot: string;
|
||||
store: Store;
|
||||
schemas: AgentStore["schemas"];
|
||||
headHash: CasRef;
|
||||
chain: ChainState;
|
||||
};
|
||||
```
|
||||
|
||||
#### CAS chain walk
|
||||
|
||||
1. 从 `threads.yaml[threadId]` 取 `headHash`
|
||||
2. `walkChain`:若 head 是 `StartNode`,`stepsNewestFirst=[]`;否则沿 `prev` 收集所有 `StepNode`, newest-first
|
||||
3. `buildHistory`:反转为时间序,`expandOutput` 把每步 `output` CasRef 展开为 JSON payload(供 prompt / JSONata 使用)
|
||||
4. `loadWorkflow`:从 `start.workflow` CasRef 加载 `WorkflowPayload`
|
||||
|
||||
#### Role definition 来源
|
||||
|
||||
- 作者写在 workflow YAML 的 `roles.<name>`(`goal`, `capabilities`, `procedure`, `output`, `frontmatter` 等)
|
||||
- `uwf workflow put` 时 `frontmatter` 内联 JSON Schema 经 `putSchema` 存入 CAS,workflow 里存的是 **CasRef**
|
||||
- Agent 运行时:`ctx.workflow.roles[ctx.role]` → `RoleDefinition`
|
||||
|
||||
#### outputFormatInstruction
|
||||
|
||||
在 `createAgent` 中,若 `getSchema(store, roleDef.frontmatter)` 非空,则:
|
||||
|
||||
```typescript
|
||||
ctx.outputFormatInstruction = buildOutputFormatInstruction(frontmatterSchema);
|
||||
```
|
||||
|
||||
`buildOutputFormatInstruction` 根据 JSON Schema 的 `properties` 生成「必须以 `---` YAML frontmatter 开头」的说明和示例字段列表(见 `build-output-format-instruction.ts`)。
|
||||
|
||||
各 agent 实现(Hermes / Claude Code)在组装 prompt 时把该块放在最前,再接 `buildRolePrompt(roleDef)`。
|
||||
|
||||
---
|
||||
|
||||
### Q4: Extract Pipeline
|
||||
|
||||
agent 输出怎么被处理成结构化数据?
|
||||
|
||||
**调研要点:**
|
||||
- frontmatter fast-path 的完整逻辑
|
||||
- LLM extract fallback 的实现(`extract.ts`)
|
||||
- frontmatter schema 从哪里来(role 定义里的 `frontmatter` 字段)
|
||||
- 校验失败时的 correction prompt 是什么
|
||||
|
||||
**答案:**
|
||||
|
||||
#### Schema 来源
|
||||
|
||||
Workflow YAML 中每个 role 的 `frontmatter:` 段是 JSON Schema 对象;注册时:
|
||||
|
||||
```66:76:packages/cli-workflow/src/commands/workflow.ts
|
||||
async function resolveFrontmatterRef(..., frontmatter: unknown): Promise<CasRef> {
|
||||
// 校验为 JSON Schema → putSchema → 返回 CasRef
|
||||
}
|
||||
```
|
||||
|
||||
运行时 `roleDef.frontmatter` 即该 schema 的 CAS hash;structured `output` 节点用**同一 schema** 写入 CAS。
|
||||
|
||||
#### Frontmatter fast-path(createAgent 实际使用的路径)
|
||||
|
||||
```148:195:packages/workflow-agent-kit/src/frontmatter.ts
|
||||
export async function tryFrontmatterFastPath(
|
||||
raw: string,
|
||||
outputSchema: CasRef,
|
||||
store: Store,
|
||||
): Promise<FrontmatterFastPathResult | null>
|
||||
```
|
||||
|
||||
流程:
|
||||
|
||||
1. `parseFrontmatterMarkdown(raw)` → 标准 agent 字段(`status`, `next`, `confidence`, `artifacts`, `scope`)+ body
|
||||
2. `validateFrontmatter` 失败 → `null`
|
||||
3. `getSchema(store, outputSchema)` + `extractSchemaFields` 得到 role 需要的属性名
|
||||
4. `buildCandidate`:从标准 frontmatter + YAML 原始字段拼出符合 schema 的对象
|
||||
5. `store.put(outputSchema, candidate)` + `validate` → 成功则 `{ body, outputHash }`
|
||||
|
||||
**永不抛错**,失败返回 `null`。
|
||||
|
||||
#### LLM extract fallback(已实现但未接入 createAgent)
|
||||
|
||||
```135:181:packages/workflow-agent-kit/src/extract.ts
|
||||
export async function extract(
|
||||
rawOutput: string,
|
||||
outputSchema: CasRef,
|
||||
config: WorkflowConfig,
|
||||
): Promise<ExtractResult>
|
||||
```
|
||||
|
||||
- 模型:`resolveExtractModelAlias(config)` → `modelOverrides.extract` → `models.extract` → `models.default` → `defaultModel`
|
||||
- HTTP:`POST {baseUrl}/chat/completions`,`response_format: { type: "json_object" }`
|
||||
- System:要求按 JSON Schema 从 agent 输出提取单个 JSON 对象
|
||||
- 校验通过后 `store.put(outputSchema, structured)`
|
||||
|
||||
**重要:`createAgent` 当前未调用 `extract()`**。fast-path 失败且 2 次 `continue` 仍失败则直接 `fail()`。builtin agent 若希望无 frontmatter 也能跑,需在 kit 或 builtin 层显式接入 `extract()`。
|
||||
|
||||
#### Correction prompt(retry)
|
||||
|
||||
```125:128:packages/workflow-agent-kit/src/run.ts
|
||||
const correctionMessage =
|
||||
"Your previous response did not contain valid YAML frontmatter matching the role schema.\n" +
|
||||
"You MUST begin your response with a YAML frontmatter block (--- delimited).\n" +
|
||||
"Please output ONLY the corrected frontmatter block followed by your work.";
|
||||
```
|
||||
|
||||
通过 `options.continue(sessionId, correctionMessage, store)` 发给外部 agent;builtin 需在自有 message 历史里 append 同等语义的 user 消息。
|
||||
|
||||
---
|
||||
|
||||
### Q5: Model 配置与 LLM 调用
|
||||
|
||||
workflow 怎么配置和使用 model?
|
||||
|
||||
**调研要点:**
|
||||
- `WorkflowConfig` 中 providers/models/defaultModel/modelOverrides 的完整定义
|
||||
- `resolveModel` 函数的实现
|
||||
- `chatCompletionText` 的实现(OpenAI 兼容 HTTP 客户端)
|
||||
- 有没有 streaming 支持?tool calling 支持?
|
||||
|
||||
**答案:**
|
||||
|
||||
#### WorkflowConfig
|
||||
|
||||
```136:160:packages/workflow-protocol/src/types.ts
|
||||
export type ProviderConfig = {
|
||||
baseUrl: string;
|
||||
apiKeyEnv: string;
|
||||
};
|
||||
|
||||
export type ModelConfig = {
|
||||
provider: ProviderAlias;
|
||||
name: string;
|
||||
};
|
||||
|
||||
export type WorkflowConfig = {
|
||||
providers: Record<ProviderAlias, ProviderConfig>;
|
||||
models: Record<ModelAlias, ModelConfig>;
|
||||
agents: Record<AgentAlias, AgentConfig>;
|
||||
defaultAgent: AgentAlias;
|
||||
agentOverrides: Record<WorkflowName, Record<RoleName, AgentAlias>> | null;
|
||||
defaultModel: ModelAlias;
|
||||
modelOverrides: Record<Scenario, ModelAlias> | null;
|
||||
};
|
||||
```
|
||||
|
||||
示例见 `docs/architecture.md`(`providers` / `models` / `defaultModel` / `modelOverrides.extract`)。
|
||||
|
||||
#### resolveModel
|
||||
|
||||
```32:50:packages/workflow-agent-kit/src/extract.ts
|
||||
export function resolveModel(config: WorkflowConfig, alias: ModelAlias): ResolvedLlmProvider {
|
||||
const modelEntry = config.models[alias];
|
||||
const providerEntry = config.providers[modelEntry.provider];
|
||||
const apiKey = process.env[providerEntry.apiKeyEnv];
|
||||
return { baseUrl: providerEntry.baseUrl, apiKey, model: modelEntry.name };
|
||||
}
|
||||
```
|
||||
|
||||
`ResolvedLlmProvider = { baseUrl, apiKey, model }`。
|
||||
|
||||
Extract 专用别名解析:
|
||||
|
||||
```18:30:packages/workflow-agent-kit/src/extract.ts
|
||||
export function resolveExtractModelAlias(config: WorkflowConfig): ModelAlias {
|
||||
return config.modelOverrides?.extract ?? (config.models.extract ? "extract" : config.models.default ? "default" : config.defaultModel);
|
||||
}
|
||||
```
|
||||
|
||||
**尚无** `modelOverrides` 按 role/workflow 解析 agent 主模型的函数;builtin 首版可用 `config.defaultModel`,扩展时可加 `modelOverrides.agent` 或与 `agentOverrides` 对称的表。
|
||||
|
||||
#### chatCompletionText
|
||||
|
||||
```87:124:packages/workflow-agent-kit/src/extract.ts
|
||||
async function chatCompletionText(
|
||||
provider: ResolvedLlmProvider,
|
||||
messages: Array<{ role: "system" | "user"; content: string }>,
|
||||
): Promise<string>
|
||||
```
|
||||
|
||||
| 能力 | 现状 |
|
||||
|------|------|
|
||||
| 协议 | OpenAI 兼容 `POST /chat/completions` |
|
||||
| Streaming | **无**(一次性 `response.text()`) |
|
||||
| Tool calling | **无**(无 `tools` / `tool_calls` 字段) |
|
||||
| 多模态 | **无**(仅 text `content`) |
|
||||
| Extract 专用 | `response_format: { type: "json_object" }` |
|
||||
|
||||
builtin agent 的 run loop 需要**新写**带 `tools` 的 completion 客户端(可放在 `workflow-agent-builtin` 或扩展 `workflow-agent-kit` 的 `llm/` 模块),不能复用当前 `chatCompletionText` 而不改。
|
||||
|
||||
---
|
||||
|
||||
### Q6: Hermes Agent 参考实现
|
||||
|
||||
`uwf-hermes` 是怎么实现 `run` 和 `continue` 的?
|
||||
|
||||
**调研要点:**
|
||||
- prompt 怎么组装的(outputFormatInstruction + rolePrompt + task + history)
|
||||
- hermes CLI 的调用参数
|
||||
- session management(resume)
|
||||
- 输出怎么捕获
|
||||
|
||||
**答案:**
|
||||
|
||||
#### Prompt 组装
|
||||
|
||||
```40:53:packages/workflow-agent-hermes/src/hermes.ts
|
||||
export function buildHermesPrompt(ctx: AgentContext): string {
|
||||
const roleDef = ctx.workflow.roles[ctx.role];
|
||||
const rolePrompt = roleDef !== undefined ? buildRolePrompt(roleDef) : "";
|
||||
const parts: string[] = [];
|
||||
if (ctx.outputFormatInstruction !== "") {
|
||||
parts.push(ctx.outputFormatInstruction, "");
|
||||
}
|
||||
parts.push(rolePrompt, "", "## Task", ctx.start.prompt);
|
||||
const historyBlock = buildHistorySummary(ctx.steps);
|
||||
if (historyBlock !== "") {
|
||||
parts.push("", historyBlock);
|
||||
}
|
||||
return parts.join("\n");
|
||||
}
|
||||
```
|
||||
|
||||
`buildRolePrompt` 生成 `## Goal` / `## Capabilities` / `## Prepare`(含 `generateCliReference()`)/ `## Procedure` / `## Output`。
|
||||
|
||||
`buildHistorySummary`:每步 `role`、`JSON.stringify(step.output)`、`agent`。
|
||||
|
||||
Hermes 把**整段 prompt 作为单条 user 消息**传给 `hermes chat -q`(无独立 system channel)。
|
||||
|
||||
#### Hermes CLI 参数
|
||||
|
||||
首次:
|
||||
|
||||
```88:97:packages/workflow-agent-hermes/src/hermes.ts
|
||||
spawnHermes(["chat", "-q", prompt, "--yolo", "--max-turns", "90", "--quiet"]);
|
||||
```
|
||||
|
||||
续聊:
|
||||
|
||||
```100:114:packages/workflow-agent-hermes/src/hermes.ts
|
||||
spawnHermes(["chat", "--resume", sessionId, "-q", message, "--yolo", "--max-turns", "90", "--quiet"]);
|
||||
```
|
||||
|
||||
#### Session
|
||||
|
||||
- stdout/stderr 中解析 `session_id: <id>`(`parseSessionIdFromStdout`)
|
||||
- 会话文件:`~/.hermes/sessions/session_<id>.json`
|
||||
- `loadHermesSession` → `storeHermesSessionDetail`:每 assistant/tool 消息写成 CAS turn 节点,汇总为 `detail`;**output 文本** = 最后一条非空 `assistant` 的 `content`
|
||||
|
||||
#### 与 createAgent 的衔接
|
||||
|
||||
```157:164:packages/workflow-agent-hermes/src/hermes.ts
|
||||
export function createHermesAgent(): () => Promise<void> {
|
||||
return createAgent({ name: "hermes", run: runHermes, continue: continueHermes });
|
||||
}
|
||||
```
|
||||
|
||||
`uwf-hermes` 入口:`createHermesAgent()` 即 main。
|
||||
|
||||
Claude Code 包(`workflow-agent-claude-code`)结构相同:`buildClaudeCodePrompt` 同构,`claude -p` + `--resume` + JSON stdout 解析。
|
||||
|
||||
---
|
||||
|
||||
### Q7: Toolkit 需求分析
|
||||
|
||||
要实现一个自给自足的 agent,最少需要哪些 tool?
|
||||
|
||||
**调研要点:**
|
||||
- 现有 workflow example(solve-issue.yaml)里 role 都做什么任务
|
||||
- hermes agent 在 workflow 场景下常用哪些 tool
|
||||
- 哪些 tool 是 agent loop 必须的(如 file read/write、shell exec、web fetch)
|
||||
|
||||
**答案:**
|
||||
|
||||
#### solve-issue.yaml 角色能力
|
||||
|
||||
| Role | capabilities | 隐含需求 |
|
||||
|------|----------------|----------|
|
||||
| planner | issue-analysis, planning | 读上下文/仓库、总结,通常不需写代码 |
|
||||
| developer | file-edit, shell, testing | **读文件、写文件、执行命令** |
|
||||
| reviewer | code-review, static-analysis | 读 diff/文件、静态分析(可读+可选 shell) |
|
||||
|
||||
#### Hermes 侧
|
||||
|
||||
Hermes 自带完整 agent runtime(`--yolo`、max-turns),tool 集由 Hermes 项目定义,workflow 不配置。从 session JSON 可见 `tool_calls` 被记入 detail,常见包括文件与 shell 类工具。
|
||||
|
||||
#### Builtin 最小 toolkit 建议
|
||||
|
||||
| 优先级 | Tool | 用途 |
|
||||
|--------|------|------|
|
||||
| P0 | `read_file` | 读仓库/配置/issue 上下文 |
|
||||
| P0 | `write_file` / `edit_file` | developer 改代码 |
|
||||
| P0 | `run_command` | 测试、构建、git(需 cwd + timeout + 输出截断) |
|
||||
| P1 | `list_dir` / `glob` | 导航代码库 |
|
||||
| P1 | `grep` | 搜索符号/引用 |
|
||||
| P2 | `fetch_url` | 查文档(planner 偶尔需要) |
|
||||
|
||||
**不需要**在 builtin 里实现 moderator / workflow 路由工具——仍由 `uwf thread step` + JSONata 负责。
|
||||
|
||||
#### Agent loop 必须能力
|
||||
|
||||
1. 多轮 LLM 调用 + **OpenAI-style tool_calls** 解析与执行
|
||||
2. 将 tool 结果 append 回 messages
|
||||
3. 终止条件:模型不再请求 tool,或达到 `maxTurns`
|
||||
4. 最终响应须含合法 YAML frontmatter(满足 Q4),供 `createAgent` fast-path
|
||||
|
||||
---
|
||||
|
||||
## 方案草案
|
||||
|
||||
(调研完成后基于以上答案撰写)
|
||||
|
||||
### 架构设计
|
||||
|
||||
```mermaid
|
||||
flowchart TB
|
||||
subgraph cli ["cli-workflow"]
|
||||
Step["uwf thread step"]
|
||||
Spawn["spawnAgent(uwf-builtin, threadId, role)"]
|
||||
Step --> Spawn
|
||||
end
|
||||
|
||||
subgraph builtin_pkg ["@uncaged/workflow-agent-builtin"]
|
||||
Main["createBuiltinAgent() = createAgent({...})"]
|
||||
Prompt["buildBuiltinPrompt(ctx)"]
|
||||
Loop["runBuiltinLoop(provider, messages, tools)"]
|
||||
Tools["Toolkit: read/write/exec/..."]
|
||||
Detail["storeBuiltinDetail(turns)"]
|
||||
Main --> Prompt
|
||||
Main --> Loop
|
||||
Loop --> Tools
|
||||
Loop --> Detail
|
||||
end
|
||||
|
||||
subgraph kit ["workflow-agent-kit"]
|
||||
Ctx["buildContextWithMeta"]
|
||||
FM["tryFrontmatterFastPath"]
|
||||
Persist["persistStep"]
|
||||
Ctx --> Main
|
||||
Main --> FM
|
||||
FM --> Persist
|
||||
end
|
||||
|
||||
subgraph cas ["CAS / config"]
|
||||
Config["config.yaml models/providers"]
|
||||
CAS["cas/ + threads.yaml"]
|
||||
end
|
||||
|
||||
Spawn --> Main
|
||||
Config --> Loop
|
||||
CAS --> Ctx
|
||||
Persist --> CAS
|
||||
Spawn -->|"stdout: step hash"| Step
|
||||
```
|
||||
|
||||
**新包**:`packages/workflow-agent-builtin`,bin `uwf-builtin`,仅依赖 `workflow-agent-kit`、`workflow-protocol`、`workflow-util`(可选 `@uncaged/json-cas` 写 detail schema)。
|
||||
|
||||
**分层**:
|
||||
|
||||
| 层 | 职责 |
|
||||
|----|------|
|
||||
| `createAgent`(kit) | argv、context、frontmatter extract、StepNode、stdout 协议 — **不变** |
|
||||
| `builtin/agent.ts` | `run` / `continue` 实现 |
|
||||
| `builtin/llm.ts` | OpenAI 兼容 chat + tools(可后续抽到 kit) |
|
||||
| `builtin/tools/*.ts` | 各 tool 的 JSON Schema + handler |
|
||||
| `builtin/prompt.ts` | 复用 Hermes 的 prompt 拼接逻辑(或抽到 kit 的 `buildAgentPrompt`) |
|
||||
| `builtin/detail.ts` | 类似 Hermes:每轮 assistant/tool 写入 CAS detail |
|
||||
|
||||
**配置集成**:
|
||||
|
||||
```yaml
|
||||
agents:
|
||||
builtin:
|
||||
command: "uwf-builtin"
|
||||
args: []
|
||||
defaultAgent: "builtin" # 或 agentOverrides 按 role 指定
|
||||
```
|
||||
|
||||
模型:首版 `resolveModel(config, config.defaultModel)`;后续可增加 `modelOverrides.agent` 或 per-role 映射。
|
||||
|
||||
---
|
||||
|
||||
### Agent Run Loop
|
||||
|
||||
伪代码(单次 `run(ctx)`):
|
||||
|
||||
```
|
||||
1. provider ← resolveModel(loadWorkflowConfig(), defaultModel)
|
||||
2. system ← buildBuiltinPrompt(ctx) // outputFormatInstruction + buildRolePrompt + Task + History
|
||||
3. messages ← [{ role: "system", content: system }]
|
||||
4. sessionId ← newULID() // 内存或临时目录,供 continue 使用
|
||||
5. turns ← []
|
||||
|
||||
6. for turn in 1..MAX_TURNS:
|
||||
response ← chatCompletionWithTools(provider, messages, TOOL_DEFINITIONS)
|
||||
record assistant message + tool_calls in turns
|
||||
|
||||
if response has no tool_calls:
|
||||
finalText ← response.content
|
||||
break
|
||||
|
||||
for each tool_call:
|
||||
result ← executeTool(tool_call, { cwd: process.cwd() })
|
||||
messages.push tool result
|
||||
record in turns
|
||||
|
||||
7. if no finalText with valid frontmatter after loop:
|
||||
optionally one-shot "finalize" message without tools
|
||||
|
||||
8. detailHash ← storeBuiltinDetail(store, sessionId, turns, metadata)
|
||||
9. return { output: finalText, detailHash, sessionId }
|
||||
```
|
||||
|
||||
**`continue(sessionId, message, store)`**:
|
||||
|
||||
- 从内存/磁盘恢复 `messages` + `turns`
|
||||
- `messages.push({ role: "user", content: message })`(correction 或续聊)
|
||||
- 从步骤 6 继续,步数上限可单独设小一点(如 3)
|
||||
- 返回新的 `AgentRunResult`
|
||||
|
||||
**与 frontmatter 的配合**:
|
||||
|
||||
- system prompt 已含 `outputFormatInstruction`;最后一轮可强制 user:`Now output your final answer with YAML frontmatter only if you have not yet.`
|
||||
- 仍依赖 `createAgent` 的 fast-path + 最多 2 次 continue
|
||||
|
||||
**安全**:
|
||||
|
||||
- `run_command`:白名单或需 `UWF_BUILTIN_ALLOW_SHELL=1`,默认工作区限定在 `process.cwd()` 或 `start` 中将来扩展的 `workspace` 字段
|
||||
- 路径:禁止 `..` 逃逸出 workspace root
|
||||
|
||||
---
|
||||
|
||||
### Toolkit 设计
|
||||
|
||||
统一注册表:
|
||||
|
||||
```typescript
|
||||
type BuiltinTool = {
|
||||
name: string;
|
||||
description: string;
|
||||
parameters: JSONSchema; // object type
|
||||
execute: (args: unknown, ctx: ToolContext) => Promise<string>;
|
||||
};
|
||||
|
||||
type ToolContext = {
|
||||
cwd: string;
|
||||
storageRoot: string;
|
||||
};
|
||||
```
|
||||
|
||||
| Tool name | OpenAI function | 行为摘要 |
|
||||
|-----------|-----------------|----------|
|
||||
| `read_file` | `read_file` | `{ path }` → UTF-8 文本,大小上限 |
|
||||
| `write_file` | `write_file` | `{ path, content }` → 写盘,返回确认 |
|
||||
| `edit_file` | 可选 | search/replace 块,减少 token |
|
||||
| `run_command` | `run_command` | `{ command, cwd? }` → stdout/stderr 截断 |
|
||||
| `list_dir` | `list_dir` | `{ path }` → 条目列表 |
|
||||
| `grep` | `grep` | `{ pattern, path? }` → 匹配行 |
|
||||
|
||||
**LLM 请求形状**(扩展 extract 客户端):
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "...",
|
||||
"messages": [...],
|
||||
"tools": [{ "type": "function", "function": { "name", "description", "parameters" } }],
|
||||
"tool_choice": "auto"
|
||||
}
|
||||
```
|
||||
|
||||
解析 `choices[0].message.tool_calls`,执行后以 `{ role: "tool", tool_call_id, content }` 回传。
|
||||
|
||||
**不提供** streaming 首版;detail CAS 记录每轮 tool 名/参数/结果摘要供 `uwf thread step-details` 调试。
|
||||
|
||||
---
|
||||
|
||||
### 与现有架构的集成
|
||||
|
||||
| 集成点 | 方式 |
|
||||
|--------|------|
|
||||
| CLI 协议 | 实现标准 agent CLI:`uwf-builtin <thread-id> <role>`,stdout 一行 step hash,exit 0/1 |
|
||||
| 工厂 | `export function createBuiltinAgent()` → `createAgent({ name: "builtin", run, continue })` |
|
||||
| Context / Prompt | 复用 `buildContextWithMeta`、`buildRolePrompt`、`buildOutputFormatInstruction`;prompt 布局对齐 `buildHermesPrompt` |
|
||||
| 结构化输出 | 优先 YAML frontmatter fast-path;可选后续在 `createAgent` 增加 `extract()` fallback 开关 |
|
||||
| 配置 | `config.yaml` 增加 `agents.builtin`;`uwf setup` 可选默认 agent |
|
||||
| 存储 | `resolveStorageRoot()` + `loadWorkflowConfig` + `getEnvPath`;与 Hermes 相同,**不**改 `threads.yaml` 写入方 |
|
||||
| 测试 | 单元测试:tool handlers、prompt 组装、mock LLM tool loop;集成测试:临时 storage root + fake provider |
|
||||
| 发布 | 新包 `@uncaged/workflow-agent-builtin`,bin `uwf-builtin`,加入 `scripts/publish-all.mjs` |
|
||||
|
||||
**明确不做**:
|
||||
|
||||
- 不替代 moderator / 不在 agent 内调用 `uwf thread step`
|
||||
- 不依赖 Hermes/OpenClaw/Claude Code 二进制
|
||||
- 首版不实现 streaming、不实现 MCP
|
||||
|
||||
**建议实现顺序**:
|
||||
|
||||
1. `llm.ts`:tool calling HTTP 客户端 + 单测
|
||||
2. P0 tools + `runBuiltinLoop`
|
||||
3. `createBuiltinAgent` + detail CAS
|
||||
4. `config` / docs / `examples` 可选 `agentOverrides` 演示
|
||||
5. (可选)`createAgent` 接入 `extract()` fallback
|
||||
@@ -36,6 +36,8 @@ graph:
|
||||
$START:
|
||||
- role: "analyst"
|
||||
condition: null
|
||||
prompt: "Analyze the topic in the task and produce a structured summary with key points."
|
||||
analyst:
|
||||
- role: "$END"
|
||||
condition: null
|
||||
prompt: "Analysis complete. Finish the workflow."
|
||||
|
||||
@@ -62,14 +62,19 @@ graph:
|
||||
$START:
|
||||
- role: "planner"
|
||||
condition: null
|
||||
prompt: "Analyze the issue described in the task and produce a detailed implementation plan."
|
||||
planner:
|
||||
- role: "developer"
|
||||
condition: null
|
||||
prompt: "Implement the plan from the planner. Write code, tests, and ensure existing tests pass."
|
||||
developer:
|
||||
- role: "reviewer"
|
||||
condition: null
|
||||
prompt: "Review the developer's implementation against the plan for correctness and quality."
|
||||
reviewer:
|
||||
- role: "developer"
|
||||
condition: "notApproved"
|
||||
prompt: "The reviewer rejected your implementation. Read their feedback and fix the issues."
|
||||
- role: "$END"
|
||||
condition: null
|
||||
prompt: "The review passed. Complete the workflow."
|
||||
|
||||
@@ -15,10 +15,12 @@
|
||||
"release": "bun run build && bun test && node scripts/publish-all.mjs"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@agentclientprotocol/sdk": "^0.22.1",
|
||||
"@biomejs/biome": "^2.4.14",
|
||||
"@changesets/cli": "^2.31.0",
|
||||
"@types/node": "^25.7.0",
|
||||
"@types/xxhashjs": "^0.2.4",
|
||||
"@uncaged/workflow-agent-hermes": "workspace:*",
|
||||
"bun-types": "^1.3.13"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,181 @@
|
||||
import { mkdir, readdir, rm, writeFile } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, test } from "vitest";
|
||||
import { cmdLogClean, cmdLogList, cmdLogShow } from "../commands/log.js";
|
||||
|
||||
let storageRoot: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
storageRoot = join(tmpdir(), `uwf-log-test-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
||||
await mkdir(join(storageRoot, "logs"), { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(storageRoot, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
const entry1 = JSON.stringify({
|
||||
ts: "2026-05-20T10:00:00.000Z",
|
||||
pid: "1716200000000-1234",
|
||||
tag: "W9F3RK2M",
|
||||
msg: "process start",
|
||||
thread: "01J1234ABCDEF",
|
||||
workflow: "solve-issue",
|
||||
});
|
||||
|
||||
const entry2 = JSON.stringify({
|
||||
ts: "2026-05-20T10:00:01.000Z",
|
||||
pid: "1716200000000-1234",
|
||||
tag: "ABC12345",
|
||||
msg: "step executed",
|
||||
thread: "01J1234ABCDEF",
|
||||
workflow: "solve-issue",
|
||||
});
|
||||
|
||||
const entry3 = JSON.stringify({
|
||||
ts: "2026-05-20T10:00:02.000Z",
|
||||
pid: "1716200000000-5678",
|
||||
tag: "XYZ98765",
|
||||
msg: "different process",
|
||||
thread: "01JOTHER000000",
|
||||
workflow: "review-code",
|
||||
});
|
||||
|
||||
const oldEntry = JSON.stringify({
|
||||
ts: "2026-05-19T08:00:00.000Z",
|
||||
pid: "1716200000000-9999",
|
||||
tag: "OLD1TAG1",
|
||||
msg: "old entry",
|
||||
thread: "01JOLD0000000",
|
||||
workflow: "solve-issue",
|
||||
});
|
||||
|
||||
const olderEntry = JSON.stringify({
|
||||
ts: "2026-05-18T08:00:00.000Z",
|
||||
pid: "1716200000000-0001",
|
||||
tag: "OLD2TAG2",
|
||||
msg: "older entry",
|
||||
thread: "01JOLDER00000",
|
||||
workflow: "review-code",
|
||||
});
|
||||
|
||||
async function writeLogFiles(): Promise<void> {
|
||||
const logsDir = join(storageRoot, "logs");
|
||||
await writeFile(join(logsDir, "2026-05-20.jsonl"), [entry1, entry2, entry3].join("\n") + "\n");
|
||||
await writeFile(join(logsDir, "2026-05-19.jsonl"), oldEntry + "\n");
|
||||
await writeFile(join(logsDir, "2026-05-18.jsonl"), olderEntry + "\n");
|
||||
}
|
||||
|
||||
describe("cmdLogList", () => {
|
||||
test("lists log files with sizes sorted by date descending", async () => {
|
||||
await writeLogFiles();
|
||||
const result = await cmdLogList(storageRoot);
|
||||
expect(result).toHaveLength(3);
|
||||
expect(result[0].name).toBe("2026-05-20.jsonl");
|
||||
expect(result[0].date).toBe("2026-05-20");
|
||||
expect(result[0].size).toBeGreaterThan(0);
|
||||
expect(result[1].name).toBe("2026-05-19.jsonl");
|
||||
expect(result[2].name).toBe("2026-05-18.jsonl");
|
||||
});
|
||||
|
||||
test("returns empty array when no log files exist", async () => {
|
||||
const result = await cmdLogList(storageRoot);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
test("returns empty array when logs directory does not exist", async () => {
|
||||
const noLogsRoot = join(storageRoot, "nonexistent");
|
||||
await mkdir(noLogsRoot, { recursive: true });
|
||||
const result = await cmdLogList(noLogsRoot);
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("cmdLogShow", () => {
|
||||
test("filters by thread ID", async () => {
|
||||
await writeLogFiles();
|
||||
const result = await cmdLogShow(storageRoot, {
|
||||
thread: "01J1234ABCDEF",
|
||||
process: null,
|
||||
date: null,
|
||||
});
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result.every((e) => e.thread === "01J1234ABCDEF")).toBe(true);
|
||||
});
|
||||
|
||||
test("filters by process ID", async () => {
|
||||
await writeLogFiles();
|
||||
const result = await cmdLogShow(storageRoot, {
|
||||
thread: null,
|
||||
process: "1716200000000-1234",
|
||||
date: null,
|
||||
});
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result.every((e) => e.pid === "1716200000000-1234")).toBe(true);
|
||||
});
|
||||
|
||||
test("filters by date", async () => {
|
||||
await writeLogFiles();
|
||||
const result = await cmdLogShow(storageRoot, {
|
||||
thread: null,
|
||||
process: null,
|
||||
date: "2026-05-19",
|
||||
});
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].msg).toBe("old entry");
|
||||
});
|
||||
|
||||
test("reads all files when no date filter", async () => {
|
||||
await writeLogFiles();
|
||||
const result = await cmdLogShow(storageRoot, { thread: null, process: null, date: null });
|
||||
expect(result).toHaveLength(5);
|
||||
// sorted by ts ascending
|
||||
expect(result[0].ts).toBe("2026-05-18T08:00:00.000Z");
|
||||
expect(result[4].ts).toBe("2026-05-20T10:00:02.000Z");
|
||||
});
|
||||
|
||||
test("returns empty when no matches", async () => {
|
||||
await writeLogFiles();
|
||||
const result = await cmdLogShow(storageRoot, {
|
||||
thread: "NONEXISTENT",
|
||||
process: null,
|
||||
date: null,
|
||||
});
|
||||
expect(result).toEqual([]);
|
||||
});
|
||||
|
||||
test("combined thread + date filter", async () => {
|
||||
await writeLogFiles();
|
||||
const result = await cmdLogShow(storageRoot, {
|
||||
thread: "01J1234ABCDEF",
|
||||
process: null,
|
||||
date: "2026-05-20",
|
||||
});
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result.every((e) => e.thread === "01J1234ABCDEF")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("cmdLogClean", () => {
|
||||
test("deletes files before given date", async () => {
|
||||
await writeLogFiles();
|
||||
const result = await cmdLogClean(storageRoot, "2026-05-20");
|
||||
expect(result.deleted).toBe(2);
|
||||
const remaining = await readdir(join(storageRoot, "logs"));
|
||||
expect(remaining).toEqual(["2026-05-20.jsonl"]);
|
||||
});
|
||||
|
||||
test("deletes nothing when all files are newer", async () => {
|
||||
await writeLogFiles();
|
||||
const result = await cmdLogClean(storageRoot, "2026-05-18");
|
||||
expect(result.deleted).toBe(0);
|
||||
});
|
||||
|
||||
test("handles missing logs directory gracefully", async () => {
|
||||
const noLogsRoot = join(storageRoot, "nonexistent");
|
||||
await mkdir(noLogsRoot, { recursive: true });
|
||||
const result = await cmdLogClean(noLogsRoot, "2026-05-20");
|
||||
expect(result).toEqual({ deleted: 0 });
|
||||
});
|
||||
});
|
||||
@@ -14,6 +14,7 @@ import {
|
||||
cmdCasSchemaList,
|
||||
cmdCasWalk,
|
||||
} from "./commands/cas.js";
|
||||
import { cmdLogClean, cmdLogList, cmdLogShow } from "./commands/log.js";
|
||||
import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js";
|
||||
import { cmdSkillCli } from "./commands/skill.js";
|
||||
import {
|
||||
@@ -379,6 +380,55 @@ casSchema
|
||||
});
|
||||
});
|
||||
|
||||
const log = program.command("log").description("Process-level debug logs");
|
||||
|
||||
log
|
||||
.command("list")
|
||||
.description("List log files with sizes")
|
||||
.action(() => {
|
||||
const storageRoot = resolveStorageRoot();
|
||||
runAction(async () => {
|
||||
const result = await cmdLogList(storageRoot);
|
||||
writeOutput(result);
|
||||
});
|
||||
});
|
||||
|
||||
log
|
||||
.command("show")
|
||||
.description("Show and filter log entries")
|
||||
.option("--thread <thread-id>", "Filter by thread ID")
|
||||
.option("--process <pid>", "Filter by process ID")
|
||||
.option("--date <date>", "Filter by date (YYYY-MM-DD)")
|
||||
.action(
|
||||
(opts: {
|
||||
thread: string | undefined;
|
||||
process: string | undefined;
|
||||
date: string | undefined;
|
||||
}) => {
|
||||
const storageRoot = resolveStorageRoot();
|
||||
runAction(async () => {
|
||||
const result = await cmdLogShow(storageRoot, {
|
||||
thread: opts.thread ?? null,
|
||||
process: opts.process ?? null,
|
||||
date: opts.date ?? null,
|
||||
});
|
||||
writeOutput(result);
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
log
|
||||
.command("clean")
|
||||
.description("Delete log files older than given date")
|
||||
.requiredOption("--before <date>", "Delete files before this date (YYYY-MM-DD)")
|
||||
.action((opts: { before: string }) => {
|
||||
const storageRoot = resolveStorageRoot();
|
||||
runAction(async () => {
|
||||
const result = await cmdLogClean(storageRoot, opts.before);
|
||||
writeOutput(result);
|
||||
});
|
||||
});
|
||||
|
||||
program.parseAsync(process.argv).catch((e: unknown) => {
|
||||
const message = e instanceof Error ? e.message : String(e);
|
||||
process.stderr.write(`${message}\n`);
|
||||
|
||||
@@ -0,0 +1,116 @@
|
||||
import { readdir, readFile, stat, unlink } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
|
||||
type LogListItem = {
|
||||
name: string;
|
||||
size: number;
|
||||
date: string;
|
||||
};
|
||||
|
||||
type LogShowFilter = {
|
||||
thread: string | null;
|
||||
process: string | null;
|
||||
date: string | null;
|
||||
};
|
||||
|
||||
type LogEntry = {
|
||||
ts: string;
|
||||
pid: string;
|
||||
tag: string;
|
||||
msg: string;
|
||||
thread: string | null;
|
||||
workflow: string | null;
|
||||
};
|
||||
|
||||
type LogCleanResult = {
|
||||
deleted: number;
|
||||
};
|
||||
|
||||
function logsDir(storageRoot: string): string {
|
||||
return join(storageRoot, "logs");
|
||||
}
|
||||
|
||||
async function listLogFiles(dir: string): Promise<Array<string>> {
|
||||
try {
|
||||
const files = await readdir(dir);
|
||||
return files.filter((f) => f.endsWith(".jsonl")).sort();
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
function dateFromFilename(name: string): string {
|
||||
return name.replace(".jsonl", "");
|
||||
}
|
||||
|
||||
async function parseJsonlFile(path: string): Promise<Array<LogEntry>> {
|
||||
const content = await readFile(path, "utf-8");
|
||||
const lines = content
|
||||
.trim()
|
||||
.split("\n")
|
||||
.filter((l) => l.length > 0);
|
||||
return lines.map((line) => JSON.parse(line) as LogEntry);
|
||||
}
|
||||
|
||||
export async function cmdLogList(storageRoot: string): Promise<Array<LogListItem>> {
|
||||
const dir = logsDir(storageRoot);
|
||||
const files = await listLogFiles(dir);
|
||||
const items: Array<LogListItem> = [];
|
||||
for (const name of files) {
|
||||
const s = await stat(join(dir, name));
|
||||
items.push({ name, size: s.size, date: dateFromFilename(name) });
|
||||
}
|
||||
// sort by date descending
|
||||
items.sort((a, b) => (a.date > b.date ? -1 : a.date < b.date ? 1 : 0));
|
||||
return items;
|
||||
}
|
||||
|
||||
export async function cmdLogShow(
|
||||
storageRoot: string,
|
||||
filter: LogShowFilter,
|
||||
): Promise<Array<LogEntry>> {
|
||||
const dir = logsDir(storageRoot);
|
||||
let files: Array<string>;
|
||||
|
||||
if (filter.date !== null) {
|
||||
files = [`${filter.date}.jsonl`];
|
||||
} else {
|
||||
files = await listLogFiles(dir);
|
||||
}
|
||||
|
||||
let entries: Array<LogEntry> = [];
|
||||
for (const file of files) {
|
||||
try {
|
||||
const parsed = await parseJsonlFile(join(dir, file));
|
||||
entries = entries.concat(parsed);
|
||||
} catch {
|
||||
// file doesn't exist or is unreadable, skip
|
||||
}
|
||||
}
|
||||
|
||||
if (filter.thread !== null) {
|
||||
entries = entries.filter((e) => e.thread === filter.thread);
|
||||
}
|
||||
if (filter.process !== null) {
|
||||
entries = entries.filter((e) => e.pid === filter.process);
|
||||
}
|
||||
|
||||
entries.sort((a, b) => (a.ts < b.ts ? -1 : a.ts > b.ts ? 1 : 0));
|
||||
return entries;
|
||||
}
|
||||
|
||||
export async function cmdLogClean(storageRoot: string, before: string): Promise<LogCleanResult> {
|
||||
const dir = logsDir(storageRoot);
|
||||
const files = await listLogFiles(dir);
|
||||
let deleted = 0;
|
||||
|
||||
for (const name of files) {
|
||||
const date = dateFromFilename(name);
|
||||
if (date < before) {
|
||||
await unlink(join(dir, name));
|
||||
deleted++;
|
||||
}
|
||||
}
|
||||
|
||||
return { deleted };
|
||||
}
|
||||
@@ -23,7 +23,7 @@ import type {
|
||||
WorkflowConfig,
|
||||
WorkflowPayload,
|
||||
} from "@uncaged/workflow-protocol";
|
||||
import { generateUlid } from "@uncaged/workflow-util";
|
||||
import { createProcessLogger, generateUlid, type ProcessLogger } from "@uncaged/workflow-util";
|
||||
import { config as loadDotenv } from "dotenv";
|
||||
import { parse, stringify } from "yaml";
|
||||
|
||||
@@ -47,6 +47,18 @@ import { materializeWorkflowPayload } from "./workflow.js";
|
||||
const END_ROLE = "$END";
|
||||
export const THREAD_READ_DEFAULT_QUOTA = 4000;
|
||||
|
||||
const PL_THREAD_START = "7HNQ4B2X";
|
||||
const PL_MODERATOR = "M3K8V9T1";
|
||||
const PL_AGENT_SPAWN = "R5J2W8N4";
|
||||
const PL_AGENT_DONE = "C6P9E3H7";
|
||||
const PL_THREAD_ARCHIVED = "F4D8Q2K5";
|
||||
const PL_STEP_ERROR = "B8T5N1V6";
|
||||
|
||||
function failStep(plog: ProcessLogger, message: string): never {
|
||||
plog.log(PL_STEP_ERROR, message, null);
|
||||
fail(message);
|
||||
}
|
||||
|
||||
type ChainState = {
|
||||
startHash: CasRef;
|
||||
start: StartNodePayload;
|
||||
@@ -168,6 +180,10 @@ export async function cmdThreadStart(
|
||||
const workflowHash = await resolveWorkflowCasRef(uwf, storageRoot, workflowId, projectRoot);
|
||||
|
||||
const threadId = generateUlid(Date.now()) as ThreadId;
|
||||
const plog = createProcessLogger({
|
||||
storageRoot,
|
||||
context: { thread: threadId, workflow: workflowHash },
|
||||
});
|
||||
const startPayload: StartNodePayload = {
|
||||
workflow: workflowHash,
|
||||
prompt,
|
||||
@@ -183,6 +199,12 @@ export async function cmdThreadStart(
|
||||
index[threadId] = headHash;
|
||||
await saveThreadsIndex(storageRoot, index);
|
||||
|
||||
plog.log(
|
||||
PL_THREAD_START,
|
||||
`thread created workflow=${workflowHash} thread=${threadId} head=${headHash}`,
|
||||
null,
|
||||
);
|
||||
|
||||
return { workflow: workflowHash, thread: threadId };
|
||||
}
|
||||
|
||||
@@ -624,13 +646,20 @@ function resolveAgentConfig(
|
||||
return agentConfig;
|
||||
}
|
||||
|
||||
function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): CasRef {
|
||||
function spawnAgent(
|
||||
plog: ProcessLogger,
|
||||
agent: AgentConfig,
|
||||
threadId: ThreadId,
|
||||
role: string,
|
||||
edgePrompt: string,
|
||||
): CasRef {
|
||||
const argv = [...agent.args, threadId, role];
|
||||
const env = { ...process.env, UWF_EDGE_PROMPT: edgePrompt };
|
||||
let stdout: string;
|
||||
try {
|
||||
stdout = execFileSync(agent.command, argv, {
|
||||
encoding: "utf8",
|
||||
env: process.env,
|
||||
env,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
} catch (e) {
|
||||
@@ -642,12 +671,12 @@ function spawnAgent(agent: AgentConfig, threadId: ThreadId, role: string): CasRe
|
||||
? err.stderr
|
||||
: err.stderr.toString("utf8");
|
||||
const detail = stderr.trim() !== "" ? `: ${stderr.trim()}` : "";
|
||||
fail(`agent command failed (${agent.command})${detail}`);
|
||||
failStep(plog, `agent command failed (${agent.command})${detail}`);
|
||||
}
|
||||
|
||||
const line = stdout.trim().split("\n").pop()?.trim() ?? "";
|
||||
if (!isCasRef(line)) {
|
||||
fail(`agent stdout is not a valid CAS hash: ${line || "(empty)"}`);
|
||||
failStep(plog, `agent stdout is not a valid CAS hash: ${line || "(empty)"}`);
|
||||
}
|
||||
return line;
|
||||
}
|
||||
@@ -679,9 +708,15 @@ export async function cmdThreadStep(
|
||||
fail(`--count must be a positive integer, got: ${count}`);
|
||||
}
|
||||
|
||||
const workflowHash = await resolveActiveThreadWorkflowHash(storageRoot, threadId);
|
||||
const plog = createProcessLogger({
|
||||
storageRoot,
|
||||
context: { thread: threadId, workflow: workflowHash },
|
||||
});
|
||||
|
||||
const results: StepOutput[] = [];
|
||||
for (let i = 0; i < count; i++) {
|
||||
const result = await cmdThreadStepOnce(storageRoot, threadId, agentOverride);
|
||||
const result = await cmdThreadStepOnce(storageRoot, threadId, agentOverride, plog);
|
||||
results.push(result);
|
||||
if (result.done) {
|
||||
break;
|
||||
@@ -690,16 +725,31 @@ export async function cmdThreadStep(
|
||||
return results;
|
||||
}
|
||||
|
||||
async function cmdThreadStepOnce(
|
||||
async function resolveActiveThreadWorkflowHash(
|
||||
storageRoot: string,
|
||||
threadId: ThreadId,
|
||||
agentOverride: string | null,
|
||||
): Promise<StepOutput> {
|
||||
): Promise<CasRef> {
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const headHash = index[threadId];
|
||||
if (headHash === undefined) {
|
||||
fail(`thread not active: ${threadId}`);
|
||||
}
|
||||
const uwf = await createUwfStore(storageRoot);
|
||||
const chain = walkChain(uwf, headHash);
|
||||
return chain.start.workflow;
|
||||
}
|
||||
|
||||
async function cmdThreadStepOnce(
|
||||
storageRoot: string,
|
||||
threadId: ThreadId,
|
||||
agentOverride: string | null,
|
||||
plog: ProcessLogger,
|
||||
): Promise<StepOutput> {
|
||||
const index = await loadThreadsIndex(storageRoot);
|
||||
const headHash = index[threadId];
|
||||
if (headHash === undefined) {
|
||||
failStep(plog, `thread not active: ${threadId}`);
|
||||
}
|
||||
|
||||
const uwf = await createUwfStore(storageRoot);
|
||||
const chain = walkChain(uwf, headHash);
|
||||
@@ -709,10 +759,17 @@ async function cmdThreadStepOnce(
|
||||
|
||||
const nextResult = await evaluate(workflow, context);
|
||||
if (!nextResult.ok) {
|
||||
fail(nextResult.error.message);
|
||||
failStep(plog, `moderator evaluate failed: ${nextResult.error.message}`);
|
||||
}
|
||||
|
||||
if (nextResult.value === END_ROLE) {
|
||||
plog.log(
|
||||
PL_MODERATOR,
|
||||
`moderator role=${nextResult.value.role} prompt=${nextResult.value.prompt}`,
|
||||
null,
|
||||
);
|
||||
|
||||
if (nextResult.value.role === END_ROLE) {
|
||||
plog.log(PL_THREAD_ARCHIVED, `thread archived head=${headHash}`, null);
|
||||
await archiveThread(storageRoot, threadId, workflowHash, headHash);
|
||||
return {
|
||||
workflow: workflowHash,
|
||||
@@ -722,18 +779,25 @@ async function cmdThreadStepOnce(
|
||||
};
|
||||
}
|
||||
|
||||
const role = nextResult.value;
|
||||
const role = nextResult.value.role;
|
||||
const edgePrompt = nextResult.value.prompt;
|
||||
const config = await loadWorkflowConfig(storageRoot);
|
||||
const agent = resolveAgentConfig(config, workflow, role, agentOverride);
|
||||
|
||||
plog.log(PL_AGENT_SPAWN, `spawning agent command=${agent.command}`, {
|
||||
args: [...agent.args, threadId, role].join(" "),
|
||||
});
|
||||
|
||||
loadDotenv({ path: getEnvPath(storageRoot) });
|
||||
const newHead = spawnAgent(agent, threadId, role);
|
||||
const newHead = spawnAgent(plog, agent, threadId, role, edgePrompt);
|
||||
|
||||
plog.log(PL_AGENT_DONE, `agent returned head=${newHead}`, null);
|
||||
|
||||
// Re-create store to pick up nodes written by the agent subprocess
|
||||
const uwfAfter = await createUwfStore(storageRoot);
|
||||
const newNode = uwfAfter.store.get(newHead);
|
||||
if (newNode === null || newNode.type !== uwfAfter.schemas.stepNode) {
|
||||
fail(`agent returned hash that is not a StepNode: ${newHead}`);
|
||||
failStep(plog, `agent returned hash that is not a StepNode: ${newHead}`);
|
||||
}
|
||||
|
||||
// Reload threads index to avoid overwriting changes made by the agent subprocess
|
||||
@@ -745,11 +809,12 @@ async function cmdThreadStepOnce(
|
||||
const contextAfter = buildModeratorContext(uwfAfter, chainAfter);
|
||||
const afterResult = await evaluate(workflow, contextAfter);
|
||||
if (!afterResult.ok) {
|
||||
fail(afterResult.error.message);
|
||||
failStep(plog, `post-step moderator evaluate failed: ${afterResult.error.message}`);
|
||||
}
|
||||
|
||||
const done = afterResult.value === END_ROLE;
|
||||
const done = afterResult.value.role === END_ROLE;
|
||||
if (done) {
|
||||
plog.log(PL_THREAD_ARCHIVED, `thread archived head=${newHead}`, null);
|
||||
await archiveThread(storageRoot, threadId, workflowHash, newHead);
|
||||
}
|
||||
|
||||
|
||||
@@ -55,10 +55,16 @@ function isJsonSchema(value: unknown): value is JSONSchema {
|
||||
function normalizeGraph(graph: Record<string, Transition[]>): Record<string, Transition[]> {
|
||||
const result: Record<string, Transition[]> = {};
|
||||
for (const [node, transitions] of Object.entries(graph)) {
|
||||
result[node] = transitions.map((t) => ({
|
||||
role: t.role,
|
||||
condition: t.condition ?? null,
|
||||
}));
|
||||
result[node] = transitions.map((t) => {
|
||||
if (typeof t.prompt !== "string" || t.prompt.trim() === "") {
|
||||
fail(`graph[${node}] transition to "${t.role}": prompt is required (non-empty string)`);
|
||||
}
|
||||
return {
|
||||
role: t.role,
|
||||
condition: t.condition ?? null,
|
||||
prompt: t.prompt,
|
||||
};
|
||||
});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -44,6 +44,8 @@ function isTransition(value: unknown): boolean {
|
||||
const condition = value.condition;
|
||||
return (
|
||||
typeof value.role === "string" &&
|
||||
typeof value.prompt === "string" &&
|
||||
value.prompt.trim() !== "" &&
|
||||
(condition === null || condition === undefined || typeof condition === "string")
|
||||
);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
|
||||
import type { LlmToolCall } from "../src/llm/types.js";
|
||||
|
||||
/** Mirror OpenAI response shape for parser coverage via chatCompletionWithTools integration later. */
|
||||
describe("LlmToolCall shape", () => {
|
||||
test("tool call record fields", () => {
|
||||
const call: LlmToolCall = {
|
||||
id: "call_1",
|
||||
name: "read_file",
|
||||
arguments: '{"path":"README.md"}',
|
||||
};
|
||||
expect(call.name).toBe("read_file");
|
||||
expect(JSON.parse(call.arguments)).toEqual({ path: "README.md" });
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,43 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { resolve } from "node:path";
|
||||
import { resolvePath, resolvePathInWorkspace } from "../src/tools/path.js";
|
||||
|
||||
describe("resolvePath", () => {
|
||||
test("resolves relative paths against cwd", () => {
|
||||
const root = "/workspace/project";
|
||||
const resolved = resolvePath(root, "src/foo.ts");
|
||||
expect(resolved).toBe(resolve(root, "src/foo.ts"));
|
||||
});
|
||||
|
||||
test("resolves absolute paths as-is", () => {
|
||||
const resolved = resolvePath("/workspace", "/etc/hosts");
|
||||
expect(resolved).toBe("/etc/hosts");
|
||||
});
|
||||
|
||||
test("resolves parent traversal normally", () => {
|
||||
const resolved = resolvePath("/workspace/project", "../other/file.ts");
|
||||
expect(resolved).toBe(resolve("/workspace/project", "../other/file.ts"));
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolvePathInWorkspace", () => {
|
||||
test("allows relative paths within workspace", () => {
|
||||
const resolved = resolvePathInWorkspace("/workspace", "src/foo.ts");
|
||||
expect(resolved).toBe(resolve("/workspace", "src/foo.ts"));
|
||||
});
|
||||
|
||||
test("rejects path that escapes workspace root", () => {
|
||||
const resolved = resolvePathInWorkspace("/workspace", "../etc/passwd");
|
||||
expect(resolved).toBeNull();
|
||||
});
|
||||
|
||||
test("rejects absolute path escape via double-dot", () => {
|
||||
const resolved = resolvePathInWorkspace("/workspace/project", "../../outside");
|
||||
expect(resolved).toBeNull();
|
||||
});
|
||||
|
||||
test("allows deep nested path", () => {
|
||||
const resolved = resolvePathInWorkspace("/workspace", "a/b/c/file.txt");
|
||||
expect(resolved).toBe(resolve("/workspace", "a/b/c/file.txt"));
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,59 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
|
||||
import type { AgentContext } from "@uncaged/workflow-agent-kit";
|
||||
|
||||
import { buildBuiltinPrompt } from "../src/prompt.js";
|
||||
|
||||
function minimalContext(overrides: Partial<AgentContext> = {}): AgentContext {
|
||||
return {
|
||||
threadId: "00000000000000000000000000" as AgentContext["threadId"],
|
||||
role: "developer",
|
||||
store: {} as AgentContext["store"],
|
||||
workflow: {
|
||||
name: "test",
|
||||
roles: {
|
||||
developer: {
|
||||
goal: "Ship the fix",
|
||||
capabilities: ["file-edit"],
|
||||
procedure: ["Edit files"],
|
||||
output: "A patch",
|
||||
frontmatter: "schema-hash",
|
||||
},
|
||||
},
|
||||
conditions: {},
|
||||
graph: {},
|
||||
},
|
||||
start: { workflow: "wf-hash", prompt: "Fix the bug" },
|
||||
steps: [],
|
||||
outputFormatInstruction: "---\nstatus: done\n---",
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("buildBuiltinPrompt", () => {
|
||||
test("includes output format, task, and role goal", () => {
|
||||
const prompt = buildBuiltinPrompt(minimalContext());
|
||||
expect(prompt).toContain("status: done");
|
||||
expect(prompt).toContain("## Goal");
|
||||
expect(prompt).toContain("Ship the fix");
|
||||
expect(prompt).toContain("## Task");
|
||||
expect(prompt).toContain("Fix the bug");
|
||||
});
|
||||
|
||||
test("includes history when steps exist", () => {
|
||||
const prompt = buildBuiltinPrompt(
|
||||
minimalContext({
|
||||
steps: [
|
||||
{
|
||||
role: "planner",
|
||||
output: { plan: "step 1" },
|
||||
agent: "uwf-builtin",
|
||||
detail: "detail-hash",
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
expect(prompt).toContain("## Previous Steps");
|
||||
expect(prompt).toContain("planner");
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"name": "@uncaged/workflow-agent-builtin",
|
||||
"version": "0.5.0",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
"package.json"
|
||||
],
|
||||
"type": "module",
|
||||
"bin": {
|
||||
"uwf-builtin": "./src/cli.ts"
|
||||
},
|
||||
"exports": {
|
||||
".": {
|
||||
"bun": "./src/index.ts",
|
||||
"types": "./dist/index.d.ts",
|
||||
"import": "./dist/index.js"
|
||||
}
|
||||
},
|
||||
"scripts": {
|
||||
"test": "bun test"
|
||||
},
|
||||
"dependencies": {
|
||||
"@uncaged/json-cas": "^0.4.0",
|
||||
"@uncaged/workflow-agent-kit": "workspace:^",
|
||||
"@uncaged/workflow-util": "workspace:^"
|
||||
},
|
||||
"devDependencies": {
|
||||
"typescript": "^5.8.3"
|
||||
},
|
||||
"publishConfig": {
|
||||
"access": "public"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
import type { Store } from "@uncaged/json-cas";
|
||||
import {
|
||||
type AgentContext,
|
||||
type AgentRunResult,
|
||||
createAgent,
|
||||
loadWorkflowConfig,
|
||||
resolveModel,
|
||||
resolveStorageRoot,
|
||||
} from "@uncaged/workflow-agent-kit";
|
||||
import { generateUlid } from "@uncaged/workflow-util";
|
||||
|
||||
import { storeBuiltinDetail } from "./detail.js";
|
||||
import type { ChatMessage } from "./llm/index.js";
|
||||
import { BUILTIN_CONTINUE_MAX_TURNS, BUILTIN_MAX_TURNS, runBuiltinLoop } from "./loop.js";
|
||||
import { buildBuiltinPrompt } from "./prompt.js";
|
||||
import type { BuiltinSessionState } from "./types.js";
|
||||
|
||||
const sessions = new Map<string, BuiltinSessionState>();
|
||||
|
||||
function getSession(sessionId: string): BuiltinSessionState {
|
||||
const session = sessions.get(sessionId);
|
||||
if (session === undefined) {
|
||||
throw new Error(`builtin session not found: ${sessionId}`);
|
||||
}
|
||||
return session;
|
||||
}
|
||||
|
||||
function buildToolContext(storageRoot: string): { cwd: string; storageRoot: string } {
|
||||
return {
|
||||
cwd: process.cwd(),
|
||||
storageRoot,
|
||||
};
|
||||
}
|
||||
|
||||
async function runBuiltinWithMessages(
|
||||
storageRoot: string,
|
||||
provider: ReturnType<typeof resolveModel>,
|
||||
messages: ChatMessage[],
|
||||
session: BuiltinSessionState,
|
||||
store: Store,
|
||||
maxTurns: number,
|
||||
): Promise<AgentRunResult> {
|
||||
const loopResult = await runBuiltinLoop({
|
||||
provider,
|
||||
messages,
|
||||
toolCtx: buildToolContext(storageRoot),
|
||||
maxTurns,
|
||||
existingTurns: session.turns,
|
||||
});
|
||||
|
||||
session.messages = loopResult.messages;
|
||||
session.turns = loopResult.turns;
|
||||
|
||||
const { detailHash, output } = await storeBuiltinDetail(
|
||||
store,
|
||||
session.sessionId,
|
||||
session.model,
|
||||
session.startedAtMs,
|
||||
session.turns,
|
||||
);
|
||||
|
||||
const finalOutput = output !== "" ? output : loopResult.finalText;
|
||||
return { output: finalOutput, detailHash, sessionId: session.sessionId };
|
||||
}
|
||||
|
||||
async function runBuiltin(ctx: AgentContext): Promise<AgentRunResult> {
|
||||
const storageRoot = resolveStorageRoot();
|
||||
const config = await loadWorkflowConfig(storageRoot);
|
||||
const provider = resolveModel(config, config.defaultModel);
|
||||
|
||||
const sessionId = generateUlid(Date.now());
|
||||
const systemPrompt = buildBuiltinPrompt(ctx);
|
||||
const messages: ChatMessage[] = [{ role: "system", content: systemPrompt }];
|
||||
|
||||
const session: BuiltinSessionState = {
|
||||
sessionId,
|
||||
model: provider.model,
|
||||
startedAtMs: Date.now(),
|
||||
messages,
|
||||
turns: [],
|
||||
};
|
||||
sessions.set(sessionId, session);
|
||||
|
||||
return runBuiltinWithMessages(
|
||||
storageRoot,
|
||||
provider,
|
||||
messages,
|
||||
session,
|
||||
ctx.store,
|
||||
BUILTIN_MAX_TURNS,
|
||||
);
|
||||
}
|
||||
|
||||
async function continueBuiltin(
|
||||
sessionId: string,
|
||||
message: string,
|
||||
store: Store,
|
||||
): Promise<AgentRunResult> {
|
||||
const session = getSession(sessionId);
|
||||
const storageRoot = resolveStorageRoot();
|
||||
const config = await loadWorkflowConfig(storageRoot);
|
||||
const provider = resolveModel(config, config.defaultModel);
|
||||
|
||||
const messages: ChatMessage[] = [...session.messages, { role: "user", content: message }];
|
||||
|
||||
return runBuiltinWithMessages(
|
||||
storageRoot,
|
||||
provider,
|
||||
messages,
|
||||
session,
|
||||
store,
|
||||
BUILTIN_CONTINUE_MAX_TURNS,
|
||||
);
|
||||
}
|
||||
|
||||
/** Agent CLI factory: built-in LLM loop with file/shell tools. */
|
||||
export function createBuiltinAgent(): () => Promise<void> {
|
||||
return createAgent({
|
||||
name: "builtin",
|
||||
run: runBuiltin,
|
||||
continue: continueBuiltin,
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
import { createBuiltinAgent } from "./agent.js";
|
||||
|
||||
const main = createBuiltinAgent();
|
||||
void main();
|
||||
@@ -0,0 +1,115 @@
|
||||
import { bootstrap, putSchema, type Store } from "@uncaged/json-cas";
|
||||
|
||||
import { BUILTIN_DETAIL_SCHEMA, BUILTIN_TURN_SCHEMA } from "./schemas.js";
|
||||
import type {
|
||||
BuiltinDetailPayload,
|
||||
BuiltinLoopTurn,
|
||||
BuiltinToolCall,
|
||||
BuiltinTurnPayload,
|
||||
BuiltinTurnRole,
|
||||
} from "./types.js";
|
||||
|
||||
function mapToolCalls(calls: NonNullable<BuiltinLoopTurn["toolCalls"]>): BuiltinToolCall[] {
|
||||
return calls.map((call) => ({
|
||||
name: call.name,
|
||||
args: call.args,
|
||||
}));
|
||||
}
|
||||
|
||||
function loopTurnToAssistantPayload(turn: BuiltinLoopTurn, index: number): BuiltinTurnPayload {
|
||||
return {
|
||||
index,
|
||||
role: "assistant",
|
||||
content: turn.assistantContent ?? "",
|
||||
toolCalls:
|
||||
turn.toolCalls !== null && turn.toolCalls.length > 0 ? mapToolCalls(turn.toolCalls) : null,
|
||||
reasoning: null,
|
||||
};
|
||||
}
|
||||
|
||||
function loopTurnToToolPayloads(turn: BuiltinLoopTurn, startIndex: number): BuiltinTurnPayload[] {
|
||||
if (turn.toolResults === null || turn.toolResults.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const payloads: BuiltinTurnPayload[] = [];
|
||||
let index = startIndex;
|
||||
for (const result of turn.toolResults) {
|
||||
payloads.push({
|
||||
index,
|
||||
role: "tool" as BuiltinTurnRole,
|
||||
content: result.content,
|
||||
toolCalls: null,
|
||||
reasoning: null,
|
||||
});
|
||||
index += 1;
|
||||
}
|
||||
return payloads;
|
||||
}
|
||||
|
||||
/** Last assistant message with non-empty text. */
|
||||
export function extractFinalAssistantText(turns: BuiltinLoopTurn[]): string {
|
||||
for (let i = turns.length - 1; i >= 0; i--) {
|
||||
const turn = turns[i];
|
||||
if (turn === undefined) {
|
||||
continue;
|
||||
}
|
||||
const text = turn.assistantContent;
|
||||
if (text !== null && text.trim() !== "") {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
type BuiltinSchemaHashes = {
|
||||
turn: string;
|
||||
detail: string;
|
||||
};
|
||||
|
||||
async function registerBuiltinSchemas(store: Store): Promise<BuiltinSchemaHashes> {
|
||||
await bootstrap(store);
|
||||
const [turn, detail] = await Promise.all([
|
||||
putSchema(store, BUILTIN_TURN_SCHEMA),
|
||||
putSchema(store, BUILTIN_DETAIL_SCHEMA),
|
||||
]);
|
||||
return { turn, detail };
|
||||
}
|
||||
|
||||
export async function storeBuiltinDetail(
|
||||
store: Store,
|
||||
sessionId: string,
|
||||
model: string,
|
||||
startedAtMs: number,
|
||||
turns: BuiltinLoopTurn[],
|
||||
nowMs: number = Date.now(),
|
||||
): Promise<{ detailHash: string; output: string }> {
|
||||
const schemas = await registerBuiltinSchemas(store);
|
||||
const turnHashes: string[] = [];
|
||||
let turnIndex = 0;
|
||||
|
||||
for (const loopTurn of turns) {
|
||||
const assistant = loopTurnToAssistantPayload(loopTurn, turnIndex);
|
||||
const assistantHash = await store.put(schemas.turn, assistant);
|
||||
turnHashes.push(assistantHash);
|
||||
turnIndex += 1;
|
||||
|
||||
const toolPayloads = loopTurnToToolPayloads(loopTurn, turnIndex);
|
||||
for (const toolPayload of toolPayloads) {
|
||||
const toolHash = await store.put(schemas.turn, toolPayload);
|
||||
turnHashes.push(toolHash);
|
||||
turnIndex += 1;
|
||||
}
|
||||
}
|
||||
|
||||
const duration = Math.max(0, nowMs - startedAtMs);
|
||||
const detail: BuiltinDetailPayload = {
|
||||
sessionId,
|
||||
model,
|
||||
duration,
|
||||
turnCount: turnHashes.length,
|
||||
turns: turnHashes,
|
||||
};
|
||||
const detailHash = await store.put(schemas.detail, detail);
|
||||
const output = extractFinalAssistantText(turns);
|
||||
return { detailHash, output };
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
export { createBuiltinAgent } from "./agent.js";
|
||||
export { extractFinalAssistantText, storeBuiltinDetail } from "./detail.js";
|
||||
export type { ChatMessage, LlmAssistantResponse, LlmToolCall } from "./llm/index.js";
|
||||
export { chatCompletionWithTools } from "./llm/index.js";
|
||||
export { BUILTIN_CONTINUE_MAX_TURNS, BUILTIN_MAX_TURNS, runBuiltinLoop } from "./loop.js";
|
||||
export { buildBuiltinPrompt } from "./prompt.js";
|
||||
export type { BuiltinTool, ToolContext } from "./tools/index.js";
|
||||
export { executeBuiltinTool, getBuiltinTools } from "./tools/index.js";
|
||||
export type {
|
||||
BuiltinDetailPayload,
|
||||
BuiltinLoopTurn,
|
||||
BuiltinSessionState,
|
||||
BuiltinTurnPayload,
|
||||
} from "./types.js";
|
||||
@@ -0,0 +1,7 @@
|
||||
export { chatCompletionWithTools } from "./llm.js";
|
||||
export type {
|
||||
ChatMessage,
|
||||
LlmAssistantResponse,
|
||||
LlmToolCall,
|
||||
OpenAiToolDefinition,
|
||||
} from "./types.js";
|
||||
@@ -0,0 +1,135 @@
|
||||
import type { ResolvedLlmProvider } from "@uncaged/workflow-agent-kit";
|
||||
|
||||
import type {
|
||||
ChatMessage,
|
||||
LlmAssistantResponse,
|
||||
LlmToolCall,
|
||||
OpenAiToolDefinition,
|
||||
} from "./types.js";
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function chatUrl(baseUrl: string): string {
|
||||
const trimmed = baseUrl.replace(/\/+$/, "");
|
||||
return `${trimmed}/chat/completions`;
|
||||
}
|
||||
|
||||
function parseToolCalls(raw: unknown): LlmToolCall[] | null {
|
||||
if (!Array.isArray(raw) || raw.length === 0) {
|
||||
return null;
|
||||
}
|
||||
const calls: LlmToolCall[] = [];
|
||||
for (const entry of raw) {
|
||||
if (!isRecord(entry)) {
|
||||
continue;
|
||||
}
|
||||
const id = entry.id;
|
||||
const fn = entry.function;
|
||||
if (typeof id !== "string" || !isRecord(fn)) {
|
||||
continue;
|
||||
}
|
||||
const name = fn.name;
|
||||
const args = fn.arguments;
|
||||
if (typeof name !== "string" || typeof args !== "string") {
|
||||
continue;
|
||||
}
|
||||
calls.push({ id, name, arguments: args });
|
||||
}
|
||||
return calls.length > 0 ? calls : null;
|
||||
}
|
||||
|
||||
function parseAssistantMessage(parsed: unknown): LlmAssistantResponse {
|
||||
if (!isRecord(parsed)) {
|
||||
throw new Error("LLM response is not an object");
|
||||
}
|
||||
const choices = parsed.choices;
|
||||
if (!Array.isArray(choices) || choices.length === 0) {
|
||||
throw new Error("LLM response has no choices");
|
||||
}
|
||||
const c0 = choices[0];
|
||||
if (!isRecord(c0)) {
|
||||
throw new Error("LLM choice is not an object");
|
||||
}
|
||||
const messageObj = c0.message;
|
||||
if (!isRecord(messageObj)) {
|
||||
throw new Error("LLM message is not an object");
|
||||
}
|
||||
const contentRaw = messageObj.content;
|
||||
const content =
|
||||
typeof contentRaw === "string"
|
||||
? contentRaw
|
||||
: contentRaw === null || contentRaw === undefined
|
||||
? null
|
||||
: null;
|
||||
const toolCalls = parseToolCalls(messageObj.tool_calls);
|
||||
return { content, toolCalls };
|
||||
}
|
||||
|
||||
function serializeMessage(message: ChatMessage): Record<string, unknown> {
|
||||
if (message.role === "tool") {
|
||||
return {
|
||||
role: "tool",
|
||||
tool_call_id: message.tool_call_id,
|
||||
content: message.content,
|
||||
};
|
||||
}
|
||||
if (message.role === "assistant") {
|
||||
const base: Record<string, unknown> = {
|
||||
role: "assistant",
|
||||
content: message.content,
|
||||
};
|
||||
if (message.tool_calls !== null && message.tool_calls.length > 0) {
|
||||
base.tool_calls = message.tool_calls.map((call) => ({
|
||||
id: call.id,
|
||||
type: "function",
|
||||
function: { name: call.name, arguments: call.arguments },
|
||||
}));
|
||||
}
|
||||
return base;
|
||||
}
|
||||
return { role: message.role, content: message.content };
|
||||
}
|
||||
|
||||
/** OpenAI-compatible chat completion with tool calling (non-streaming). */
|
||||
export async function chatCompletionWithTools(
|
||||
provider: ResolvedLlmProvider,
|
||||
messages: ChatMessage[],
|
||||
tools: OpenAiToolDefinition[],
|
||||
): Promise<LlmAssistantResponse> {
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetch(chatUrl(provider.baseUrl), {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${provider.apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: provider.model,
|
||||
messages: messages.map(serializeMessage),
|
||||
tools,
|
||||
tool_choice: "auto",
|
||||
}),
|
||||
});
|
||||
} catch (cause) {
|
||||
const message = cause instanceof Error ? cause.message : String(cause);
|
||||
throw new Error(`LLM network error: ${message}`);
|
||||
}
|
||||
|
||||
const responseText = await response.text();
|
||||
if (!response.ok) {
|
||||
throw new Error(`LLM HTTP ${response.status}: ${responseText.slice(0, 2000)}`);
|
||||
}
|
||||
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(responseText) as unknown;
|
||||
} catch (cause) {
|
||||
const message = cause instanceof Error ? cause.message : String(cause);
|
||||
throw new Error(`LLM invalid JSON response: ${message}`);
|
||||
}
|
||||
|
||||
return parseAssistantMessage(parsed);
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
export type LlmToolCall = {
|
||||
id: string;
|
||||
name: string;
|
||||
arguments: string;
|
||||
};
|
||||
|
||||
export type LlmAssistantResponse = {
|
||||
content: string | null;
|
||||
toolCalls: LlmToolCall[] | null;
|
||||
};
|
||||
|
||||
export type ChatMessage =
|
||||
| { role: "system"; content: string }
|
||||
| { role: "user"; content: string }
|
||||
| {
|
||||
role: "assistant";
|
||||
content: string | null;
|
||||
tool_calls: LlmToolCall[] | null;
|
||||
}
|
||||
| { role: "tool"; tool_call_id: string; content: string };
|
||||
|
||||
export type OpenAiToolDefinition = {
|
||||
type: "function";
|
||||
function: {
|
||||
name: string;
|
||||
description: string;
|
||||
parameters: Record<string, unknown>;
|
||||
};
|
||||
};
|
||||
@@ -0,0 +1,110 @@
|
||||
import type { ResolvedLlmProvider } from "@uncaged/workflow-agent-kit";
|
||||
import { createLogger } from "@uncaged/workflow-util";
|
||||
|
||||
import { type ChatMessage, chatCompletionWithTools, type LlmToolCall } from "./llm/index.js";
|
||||
import {
|
||||
builtinToolsToOpenAi,
|
||||
executeBuiltinTool,
|
||||
getBuiltinTools,
|
||||
type ToolContext,
|
||||
} from "./tools/index.js";
|
||||
import type { BuiltinLoopTurn, BuiltinToolCallRecord, BuiltinToolResultRecord } from "./types.js";
|
||||
|
||||
const log = createLogger({ sink: { kind: "stderr" } });
|
||||
|
||||
export const BUILTIN_MAX_TURNS = 30;
|
||||
export const BUILTIN_CONTINUE_MAX_TURNS = 5;
|
||||
|
||||
export type RunBuiltinLoopOptions = {
|
||||
provider: ResolvedLlmProvider;
|
||||
messages: ChatMessage[];
|
||||
toolCtx: ToolContext;
|
||||
maxTurns: number;
|
||||
existingTurns: BuiltinLoopTurn[];
|
||||
};
|
||||
|
||||
export type RunBuiltinLoopResult = {
|
||||
finalText: string;
|
||||
messages: ChatMessage[];
|
||||
turns: BuiltinLoopTurn[];
|
||||
};
|
||||
|
||||
function mapToolCalls(calls: LlmToolCall[]): BuiltinToolCallRecord[] {
|
||||
return calls.map((call) => ({
|
||||
id: call.id,
|
||||
name: call.name,
|
||||
args: call.arguments,
|
||||
}));
|
||||
}
|
||||
|
||||
/** Agent run loop: LLM ↔ tools until no tool_calls or maxTurns. */
|
||||
export async function runBuiltinLoop(
|
||||
options: RunBuiltinLoopOptions,
|
||||
): Promise<RunBuiltinLoopResult> {
|
||||
const messages = [...options.messages];
|
||||
const turns = [...options.existingTurns];
|
||||
const openAiTools = builtinToolsToOpenAi(getBuiltinTools());
|
||||
let finalText = "";
|
||||
|
||||
for (let turn = 0; turn < options.maxTurns; turn++) {
|
||||
log("8K2M4N7P", `builtin loop turn ${turn + 1}/${options.maxTurns}`);
|
||||
const response = await chatCompletionWithTools(options.provider, messages, openAiTools);
|
||||
|
||||
const assistantMessage: ChatMessage = {
|
||||
role: "assistant",
|
||||
content: response.content,
|
||||
tool_calls: response.toolCalls,
|
||||
};
|
||||
messages.push(assistantMessage);
|
||||
|
||||
if (response.toolCalls === null || response.toolCalls.length === 0) {
|
||||
finalText = response.content ?? "";
|
||||
turns.push({
|
||||
assistantContent: response.content,
|
||||
toolCalls: null,
|
||||
toolResults: null,
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
const toolCallRecords = mapToolCalls(response.toolCalls);
|
||||
const toolResults: BuiltinToolResultRecord[] = [];
|
||||
|
||||
for (const call of response.toolCalls) {
|
||||
const result = await executeBuiltinTool(call.name, call.arguments, options.toolCtx);
|
||||
toolResults.push({
|
||||
toolCallId: call.id,
|
||||
name: call.name,
|
||||
content: result,
|
||||
});
|
||||
messages.push({
|
||||
role: "tool",
|
||||
tool_call_id: call.id,
|
||||
content: result,
|
||||
});
|
||||
}
|
||||
|
||||
turns.push({
|
||||
assistantContent: response.content,
|
||||
toolCalls: toolCallRecords,
|
||||
toolResults,
|
||||
});
|
||||
}
|
||||
|
||||
if (finalText === "" && messages.length > 0) {
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const msg = messages[i];
|
||||
if (
|
||||
msg !== undefined &&
|
||||
msg.role === "assistant" &&
|
||||
msg.content !== null &&
|
||||
msg.content.trim() !== ""
|
||||
) {
|
||||
finalText = msg.content;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { finalText, messages, turns };
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
import { type AgentContext, buildRolePrompt } from "@uncaged/workflow-agent-kit";
|
||||
|
||||
function buildHistorySummary(steps: AgentContext["steps"]): string {
|
||||
if (steps.length === 0) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const lines: string[] = ["## Previous Steps"];
|
||||
for (let i = 0; i < steps.length; i++) {
|
||||
const step = steps[i];
|
||||
if (step === undefined) {
|
||||
continue;
|
||||
}
|
||||
lines.push("");
|
||||
lines.push(`### Step ${i + 1}: ${step.role}`);
|
||||
lines.push(`Output: ${JSON.stringify(step.output)}`);
|
||||
lines.push(`Agent: ${step.agent}`);
|
||||
}
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
/** Assemble output format, role prompt, task, and history (aligned with buildHermesPrompt). */
|
||||
export function buildBuiltinPrompt(ctx: AgentContext): string {
|
||||
const roleDef = ctx.workflow.roles[ctx.role];
|
||||
const rolePrompt = roleDef !== undefined ? buildRolePrompt(roleDef) : "";
|
||||
const parts: string[] = [];
|
||||
if (ctx.outputFormatInstruction !== "") {
|
||||
parts.push(ctx.outputFormatInstruction, "");
|
||||
}
|
||||
parts.push(rolePrompt, "", "## Task", ctx.start.prompt);
|
||||
const historyBlock = buildHistorySummary(ctx.steps);
|
||||
if (historyBlock !== "") {
|
||||
parts.push("", historyBlock);
|
||||
}
|
||||
return parts.join("\n");
|
||||
}
|
||||
@@ -0,0 +1,46 @@
|
||||
import type { JSONSchema } from "@uncaged/json-cas";
|
||||
|
||||
const BUILTIN_TOOL_CALL_SCHEMA: JSONSchema = {
|
||||
type: "object",
|
||||
required: ["name", "args"],
|
||||
properties: {
|
||||
name: { type: "string" },
|
||||
args: { type: "string" },
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
export const BUILTIN_TURN_SCHEMA: JSONSchema = {
|
||||
title: "builtin-turn",
|
||||
type: "object",
|
||||
required: ["index", "role", "content"],
|
||||
properties: {
|
||||
index: { type: "integer" },
|
||||
role: { type: "string", enum: ["assistant", "tool"] },
|
||||
content: { type: "string" },
|
||||
toolCalls: {
|
||||
anyOf: [{ type: "array", items: BUILTIN_TOOL_CALL_SCHEMA }, { type: "null" }],
|
||||
},
|
||||
reasoning: {
|
||||
anyOf: [{ type: "string" }, { type: "null" }],
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
export const BUILTIN_DETAIL_SCHEMA: JSONSchema = {
|
||||
title: "builtin-detail",
|
||||
type: "object",
|
||||
required: ["sessionId", "model", "duration", "turnCount", "turns"],
|
||||
properties: {
|
||||
sessionId: { type: "string" },
|
||||
model: { type: "string" },
|
||||
duration: { type: "integer" },
|
||||
turnCount: { type: "integer" },
|
||||
turns: {
|
||||
type: "array",
|
||||
items: { type: "string", format: "cas_ref" },
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
@@ -0,0 +1,44 @@
|
||||
import type { OpenAiToolDefinition } from "../llm/index.js";
|
||||
|
||||
import { readFileTool } from "./read-file.js";
|
||||
import { runCommandTool } from "./run-command.js";
|
||||
import type { BuiltinTool, ToolContext } from "./types.js";
|
||||
import { writeFileTool } from "./write-file.js";
|
||||
|
||||
export { resolvePath } from "./path.js";
|
||||
export type { BuiltinTool, ToolContext } from "./types.js";
|
||||
|
||||
const BUILTIN_TOOLS: BuiltinTool[] = [readFileTool, writeFileTool, runCommandTool];
|
||||
|
||||
export function getBuiltinTools(): readonly BuiltinTool[] {
|
||||
return BUILTIN_TOOLS;
|
||||
}
|
||||
|
||||
export function builtinToolsToOpenAi(tools: readonly BuiltinTool[]): OpenAiToolDefinition[] {
|
||||
return tools.map((tool) => ({
|
||||
type: "function",
|
||||
function: {
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters as Record<string, unknown>,
|
||||
},
|
||||
}));
|
||||
}
|
||||
|
||||
export async function executeBuiltinTool(
|
||||
name: string,
|
||||
argsJson: string,
|
||||
ctx: ToolContext,
|
||||
): Promise<string> {
|
||||
const tool = BUILTIN_TOOLS.find((t) => t.name === name);
|
||||
if (tool === undefined) {
|
||||
return `Error: unknown tool ${name}`;
|
||||
}
|
||||
let args: unknown;
|
||||
try {
|
||||
args = JSON.parse(argsJson) as unknown;
|
||||
} catch {
|
||||
return "Error: tool arguments must be valid JSON";
|
||||
}
|
||||
return tool.execute(args, ctx);
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
import { isAbsolute, relative, resolve } from "node:path";
|
||||
|
||||
/** Resolve a path relative to the working directory. */
|
||||
export function resolvePath(cwd: string, inputPath: string): string {
|
||||
return resolve(cwd, inputPath);
|
||||
}
|
||||
|
||||
/** Reject paths that escape the workspace root via `..` segments. */
|
||||
export function resolvePathInWorkspace(cwd: string, inputPath: string): string | null {
|
||||
const root = resolve(cwd);
|
||||
const target = resolve(root, inputPath);
|
||||
const rel = relative(root, target);
|
||||
if (rel.startsWith("..") || isAbsolute(rel)) {
|
||||
return null;
|
||||
}
|
||||
return target;
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
import { readFile, stat } from "node:fs/promises";
|
||||
import { resolvePathInWorkspace } from "./path.js";
|
||||
import type { BuiltinTool } from "./types.js";
|
||||
|
||||
const MAX_READ_BYTES = 512 * 1024;
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
export const readFileTool: BuiltinTool = {
|
||||
name: "read_file",
|
||||
description: "Read a UTF-8 text file from the workspace.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
required: ["path"],
|
||||
properties: {
|
||||
path: { type: "string", description: "Relative or absolute path within the workspace." },
|
||||
},
|
||||
additionalProperties: false,
|
||||
},
|
||||
execute: async (args, ctx) => {
|
||||
if (!isRecord(args) || typeof args.path !== "string") {
|
||||
return "Error: path must be a string";
|
||||
}
|
||||
const resolved = resolvePathInWorkspace(ctx.cwd, args.path);
|
||||
if (resolved === null) {
|
||||
return "Error: path escapes workspace root";
|
||||
}
|
||||
try {
|
||||
const info = await stat(resolved);
|
||||
if (!info.isFile()) {
|
||||
return "Error: not a file";
|
||||
}
|
||||
if (info.size > MAX_READ_BYTES) {
|
||||
return `Error: file exceeds ${MAX_READ_BYTES} byte limit`;
|
||||
}
|
||||
return await readFile(resolved, "utf8");
|
||||
} catch (cause) {
|
||||
const message = cause instanceof Error ? cause.message : String(cause);
|
||||
return `Error: ${message}`;
|
||||
}
|
||||
},
|
||||
};
|
||||
@@ -0,0 +1,103 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import { resolvePathInWorkspace } from "./path.js";
|
||||
import type { BuiltinTool } from "./types.js";
|
||||
|
||||
const COMMAND_TIMEOUT_MS = 60_000;
|
||||
const MAX_OUTPUT_CHARS = 32_000;
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function truncate(text: string, maxChars: number): string {
|
||||
if (text.length <= maxChars) {
|
||||
return text;
|
||||
}
|
||||
return `${text.slice(0, maxChars)}\n...(truncated)`;
|
||||
}
|
||||
|
||||
function runShell(
|
||||
command: string,
|
||||
cwd: string,
|
||||
): Promise<{ stdout: string; stderr: string; code: number }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const child = spawn(command, {
|
||||
cwd,
|
||||
env: process.env,
|
||||
shell: true,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
child.stdout?.on("data", (chunk: Buffer) => {
|
||||
stdout += chunk.toString();
|
||||
});
|
||||
child.stderr?.on("data", (chunk: Buffer) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
child.kill("SIGTERM");
|
||||
}, COMMAND_TIMEOUT_MS);
|
||||
|
||||
child.on("error", (cause) => {
|
||||
clearTimeout(timer);
|
||||
const message = cause instanceof Error ? cause.message : String(cause);
|
||||
reject(new Error(message));
|
||||
});
|
||||
|
||||
child.on("close", (code) => {
|
||||
clearTimeout(timer);
|
||||
resolve({ stdout, stderr, code: code ?? 1 });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export const runCommandTool: BuiltinTool = {
|
||||
name: "run_command",
|
||||
description:
|
||||
"Run a shell command in the workspace. Requires UWF_BUILTIN_ALLOW_SHELL=1. Output is truncated.",
|
||||
parameters: {
|
||||
type: "object",
|
||||
required: ["command"],
|
||||
properties: {
|
||||
command: { type: "string", description: "Shell command to execute." },
|
||||
cwd: {
|
||||
type: "string",
|
||||
description: "Optional working directory relative to workspace root.",
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
},
|
||||
execute: async (args, ctx) => {
|
||||
if (process.env.UWF_BUILTIN_ALLOW_SHELL !== "1") {
|
||||
return "Error: run_command disabled. Set UWF_BUILTIN_ALLOW_SHELL=1 to enable.";
|
||||
}
|
||||
if (!isRecord(args) || typeof args.command !== "string") {
|
||||
return "Error: command must be a string";
|
||||
}
|
||||
let workDir = ctx.cwd;
|
||||
if (args.cwd !== undefined && args.cwd !== null) {
|
||||
if (typeof args.cwd !== "string") {
|
||||
return "Error: cwd must be a string";
|
||||
}
|
||||
const resolved = resolvePathInWorkspace(ctx.cwd, args.cwd);
|
||||
if (resolved === null) {
|
||||
return "Error: cwd escapes workspace root";
|
||||
}
|
||||
workDir = resolved;
|
||||
}
|
||||
try {
|
||||
const { stdout, stderr, code } = await runShell(args.command, workDir);
|
||||
const out = truncate(
|
||||
`exit_code: ${code}\n--- stdout ---\n${stdout}\n--- stderr ---\n${stderr}`,
|
||||
MAX_OUTPUT_CHARS,
|
||||
);
|
||||
return out;
|
||||
} catch (cause) {
|
||||
const message = cause instanceof Error ? cause.message : String(cause);
|
||||
return `Error: ${message}`;
|
||||
}
|
||||
},
|
||||
};
|
||||
@@ -0,0 +1,13 @@
|
||||
import type { JSONSchema } from "@uncaged/json-cas";
|
||||
|
||||
export type ToolContext = {
|
||||
cwd: string;
|
||||
storageRoot: string;
|
||||
};
|
||||
|
||||
export type BuiltinTool = {
|
||||
name: string;
|
||||
description: string;
|
||||
parameters: JSONSchema;
|
||||
execute: (args: unknown, ctx: ToolContext) => Promise<string>;
|
||||
};
|
||||
@@ -0,0 +1,39 @@
|
||||
import { mkdir, writeFile } from "node:fs/promises";
|
||||
import { dirname } from "node:path";
|
||||
import { resolvePathInWorkspace } from "./path.js";
|
||||
import type { BuiltinTool } from "./types.js";
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
export const writeFileTool: BuiltinTool = {
|
||||
name: "write_file",
|
||||
description: "Write UTF-8 text to a file in the workspace (creates parent directories).",
|
||||
parameters: {
|
||||
type: "object",
|
||||
required: ["path", "content"],
|
||||
properties: {
|
||||
path: { type: "string", description: "Relative or absolute path within the workspace." },
|
||||
content: { type: "string", description: "File contents to write." },
|
||||
},
|
||||
additionalProperties: false,
|
||||
},
|
||||
execute: async (args, ctx) => {
|
||||
if (!isRecord(args) || typeof args.path !== "string" || typeof args.content !== "string") {
|
||||
return "Error: path and content must be strings";
|
||||
}
|
||||
const resolved = resolvePathInWorkspace(ctx.cwd, args.path);
|
||||
if (resolved === null) {
|
||||
return "Error: path escapes workspace root";
|
||||
}
|
||||
try {
|
||||
await mkdir(dirname(resolved), { recursive: true });
|
||||
await writeFile(resolved, args.content, "utf8");
|
||||
return `Wrote ${args.content.length} bytes to ${args.path}`;
|
||||
} catch (cause) {
|
||||
const message = cause instanceof Error ? cause.message : String(cause);
|
||||
return `Error: ${message}`;
|
||||
}
|
||||
},
|
||||
};
|
||||
@@ -0,0 +1,50 @@
|
||||
import type { ChatMessage } from "./llm/index.js";
|
||||
|
||||
export type BuiltinToolCallRecord = {
|
||||
id: string;
|
||||
name: string;
|
||||
args: string;
|
||||
};
|
||||
|
||||
export type BuiltinToolResultRecord = {
|
||||
toolCallId: string;
|
||||
name: string;
|
||||
content: string;
|
||||
};
|
||||
|
||||
export type BuiltinLoopTurn = {
|
||||
assistantContent: string | null;
|
||||
toolCalls: BuiltinToolCallRecord[] | null;
|
||||
toolResults: BuiltinToolResultRecord[] | null;
|
||||
};
|
||||
|
||||
export type BuiltinSessionState = {
|
||||
sessionId: string;
|
||||
model: string;
|
||||
startedAtMs: number;
|
||||
messages: ChatMessage[];
|
||||
turns: BuiltinLoopTurn[];
|
||||
};
|
||||
|
||||
export type BuiltinTurnRole = "assistant" | "tool";
|
||||
|
||||
export type BuiltinToolCall = {
|
||||
name: string;
|
||||
args: string;
|
||||
};
|
||||
|
||||
export type BuiltinTurnPayload = {
|
||||
index: number;
|
||||
role: BuiltinTurnRole;
|
||||
content: string;
|
||||
toolCalls: BuiltinToolCall[] | null;
|
||||
reasoning: string | null;
|
||||
};
|
||||
|
||||
export type BuiltinDetailPayload = {
|
||||
sessionId: string;
|
||||
model: string;
|
||||
duration: number;
|
||||
turnCount: number;
|
||||
turns: string[];
|
||||
};
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"extends": "../../tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"rootDir": "src",
|
||||
"outDir": "dist"
|
||||
},
|
||||
"include": ["src"],
|
||||
"references": [{ "path": "../workflow-agent-kit" }, { "path": "../workflow-util" }]
|
||||
}
|
||||
@@ -1,9 +1,13 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import type { AgentContext } from "@uncaged/workflow-agent-kit";
|
||||
import type { ThreadId } from "@uncaged/workflow-protocol";
|
||||
import { buildClaudeCodePrompt } from "../src/claude-code.js";
|
||||
|
||||
function makeCtx(overrides: Partial<AgentContext> = {}): AgentContext {
|
||||
return {
|
||||
threadId: "01JTEST0000000000000000000" as ThreadId,
|
||||
edgePrompt: "Proceed with the assigned role.",
|
||||
isFirstVisit: true,
|
||||
workflow: {
|
||||
roles: {
|
||||
developer: {
|
||||
|
||||
@@ -0,0 +1,77 @@
|
||||
import { afterEach, beforeEach, describe, expect, it } from "bun:test";
|
||||
|
||||
import { HermesAcpClient } from "../src/acp-client.js";
|
||||
|
||||
const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
||||
|
||||
describe("HermesAcpClient", () => {
|
||||
let client: HermesAcpClient;
|
||||
|
||||
beforeEach(() => {
|
||||
client = new HermesAcpClient();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await client.close();
|
||||
});
|
||||
|
||||
it(
|
||||
"connect() returns a UUID sessionId",
|
||||
async () => {
|
||||
const sessionId = await client.connect(process.cwd());
|
||||
expect(typeof sessionId).toBe("string");
|
||||
expect(sessionId).toMatch(UUID_RE);
|
||||
},
|
||||
{ timeout: 2 * 60 * 1000 },
|
||||
);
|
||||
|
||||
it(
|
||||
"prompt() returns a non-empty text response",
|
||||
async () => {
|
||||
await client.connect(process.cwd());
|
||||
const result = await client.prompt("Reply with exactly the word: PONG");
|
||||
expect(typeof result.text).toBe("string");
|
||||
expect(result.text.length).toBeGreaterThan(0);
|
||||
expect(typeof result.sessionId).toBe("string");
|
||||
expect(result.sessionId).toMatch(UUID_RE);
|
||||
},
|
||||
{ timeout: 2 * 60 * 1000 },
|
||||
);
|
||||
|
||||
it(
|
||||
"prompt() can be called twice on the same session (resume)",
|
||||
async () => {
|
||||
await client.connect(process.cwd());
|
||||
|
||||
const first = await client.prompt("Say the word ALPHA and nothing else.");
|
||||
expect(first.text.length).toBeGreaterThan(0);
|
||||
|
||||
const second = await client.prompt("Now say the word BETA and nothing else.");
|
||||
expect(second.text.length).toBeGreaterThan(0);
|
||||
|
||||
expect(first.sessionId).toBe(second.sessionId);
|
||||
},
|
||||
{ timeout: 2 * 60 * 1000 },
|
||||
);
|
||||
|
||||
it(
|
||||
"prompt() collects structured messages including tool calls",
|
||||
async () => {
|
||||
await client.connect(process.cwd());
|
||||
const result = await client.prompt("Run this command: echo TOOL_DETAIL_TEST");
|
||||
expect(result.messages.length).toBeGreaterThan(0);
|
||||
// Should have at least one tool message (the echo command)
|
||||
const toolMessages = result.messages.filter((m) => m.role === "tool");
|
||||
expect(toolMessages.length).toBeGreaterThan(0);
|
||||
// Tool message should contain the output
|
||||
const toolContent = toolMessages[0]?.content ?? "";
|
||||
expect(toolContent).toContain("TOOL_DETAIL_TEST");
|
||||
// Should have assistant messages with tool_calls
|
||||
const assistantWithTools = result.messages.filter(
|
||||
(m) => m.role === "assistant" && m.tool_calls !== null,
|
||||
);
|
||||
expect(assistantWithTools.length).toBeGreaterThan(0);
|
||||
},
|
||||
{ timeout: 2 * 60 * 1000 },
|
||||
);
|
||||
});
|
||||
@@ -0,0 +1,78 @@
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import type { AgentContext } from "@uncaged/workflow-agent-kit";
|
||||
import type { ThreadId } from "@uncaged/workflow-protocol";
|
||||
import { buildHermesPrompt } from "../src/hermes.js";
|
||||
|
||||
function makeCtx(overrides: Partial<AgentContext> = {}): AgentContext {
|
||||
return {
|
||||
threadId: "01JTEST0000000000000000000" as ThreadId,
|
||||
edgePrompt: "Proceed with the assigned role.",
|
||||
isFirstVisit: true,
|
||||
workflow: {
|
||||
roles: {
|
||||
developer: {
|
||||
description: "TDD implementation per test spec",
|
||||
goal: "Write code",
|
||||
capabilities: ["coding"],
|
||||
procedure: "1. Read spec\n2. Write code",
|
||||
output: "List files changed",
|
||||
frontmatter: "",
|
||||
},
|
||||
},
|
||||
conditions: {},
|
||||
graph: {},
|
||||
},
|
||||
role: "developer",
|
||||
start: { prompt: "Fix the bug", workflowHash: "abc123", threadId: "t1" },
|
||||
steps: [],
|
||||
store: {} as AgentContext["store"],
|
||||
outputFormatInstruction: "Use YAML frontmatter",
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("buildHermesPrompt", () => {
|
||||
test("first visit uses full role prompt and includes moderator instruction", () => {
|
||||
const result = buildHermesPrompt(
|
||||
makeCtx({ edgePrompt: "Focus on the failing test.", isFirstVisit: true }),
|
||||
);
|
||||
|
||||
expect(result).toMatch(/^Use YAML frontmatter/);
|
||||
expect(result).toContain("Write code");
|
||||
expect(result).toContain("## Task\nFix the bug");
|
||||
expect(result).toContain("## Moderator Instruction");
|
||||
expect(result).toContain("Focus on the failing test.");
|
||||
});
|
||||
|
||||
test("re-entry uses continuation prompt with edge instruction", () => {
|
||||
const ctx = makeCtx({
|
||||
isFirstVisit: false,
|
||||
edgePrompt: "The reviewer rejected your work. Fix the issues.",
|
||||
steps: [
|
||||
{ role: "developer", output: { summary: "Initial fix" }, agent: "uwf-hermes" },
|
||||
{ role: "reviewer", output: { approved: false }, agent: "uwf-hermes" },
|
||||
],
|
||||
});
|
||||
|
||||
const result = buildHermesPrompt(ctx);
|
||||
|
||||
expect(result).not.toContain("## Task");
|
||||
expect(result).toContain("## What Happened Since Your Last Turn");
|
||||
expect(result).toContain("## Moderator Instruction");
|
||||
expect(result).toContain("The reviewer rejected your work.");
|
||||
});
|
||||
|
||||
test("forced first visit via isFirstVisit uses initial prompt even when role appears in history", () => {
|
||||
const result = buildHermesPrompt(
|
||||
makeCtx({
|
||||
isFirstVisit: true,
|
||||
steps: [{ role: "developer", output: { done: true }, agent: "uwf-hermes" }],
|
||||
edgePrompt: "Retry with a fresh approach.",
|
||||
}),
|
||||
);
|
||||
|
||||
expect(result).toContain("## Task");
|
||||
expect(result).toContain("Retry with a fresh approach.");
|
||||
expect(result).not.toContain("## What Happened Since Your Last Turn");
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,56 @@
|
||||
import { afterEach, describe, expect, it } from "bun:test";
|
||||
|
||||
import { HermesAcpClient } from "../src/acp-client.js";
|
||||
|
||||
/**
|
||||
* E2E test for cross-process session resume.
|
||||
*
|
||||
* Simulates the workflow re-entry scenario:
|
||||
* 1. Client A: connect → prompt → close (developer first run)
|
||||
* 2. Client B: resume(sessionId) → prompt (developer re-entry after reviewer reject)
|
||||
*
|
||||
* This is what happens when uwf thread step spawns uwf-hermes twice for the same role.
|
||||
*/
|
||||
describe("HermesAcpClient cross-process resume", () => {
|
||||
const clients: HermesAcpClient[] = [];
|
||||
|
||||
afterEach(async () => {
|
||||
for (const c of clients) {
|
||||
await c.close();
|
||||
}
|
||||
clients.length = 0;
|
||||
});
|
||||
|
||||
it(
|
||||
"resume() after close — second prompt returns non-empty text",
|
||||
async () => {
|
||||
// --- Client A: first run ---
|
||||
const clientA = new HermesAcpClient();
|
||||
clients.push(clientA);
|
||||
|
||||
await clientA.connect(process.cwd());
|
||||
const first = await clientA.prompt(
|
||||
"Remember the secret code: WATERMELON. Reply with exactly: ACKNOWLEDGED",
|
||||
);
|
||||
expect(first.text.length).toBeGreaterThan(0);
|
||||
const sessionId = first.sessionId;
|
||||
|
||||
// Close client A (simulates uwf-hermes process exit)
|
||||
await clientA.close();
|
||||
|
||||
// --- Client B: resume (simulates re-entry) ---
|
||||
const clientB = new HermesAcpClient();
|
||||
clients.push(clientB);
|
||||
|
||||
await clientB.resume(sessionId, process.cwd());
|
||||
const second = await clientB.prompt(
|
||||
"What was the secret code I told you earlier? Reply with just the code word.",
|
||||
);
|
||||
|
||||
// The critical assertion: resumed session produces non-empty output
|
||||
expect(second.text.length).toBeGreaterThan(0);
|
||||
expect(second.sessionId).toBe(sessionId);
|
||||
},
|
||||
{ timeout: 3 * 60 * 1000 },
|
||||
);
|
||||
});
|
||||
@@ -22,7 +22,9 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@uncaged/json-cas": "^0.4.0",
|
||||
"@uncaged/workflow-agent-kit": "workspace:^"
|
||||
"@uncaged/workflow-agent-kit": "workspace:^",
|
||||
"@uncaged/workflow-protocol": "workspace:^",
|
||||
"@uncaged/workflow-util": "workspace:^"
|
||||
},
|
||||
"devDependencies": {
|
||||
"typescript": "^5.8.3"
|
||||
|
||||
@@ -0,0 +1,393 @@
|
||||
import type { ChildProcess } from "node:child_process";
|
||||
import { spawn } from "node:child_process";
|
||||
import { createInterface } from "node:readline";
|
||||
|
||||
import type { HermesSessionMessage } from "./types.js";
|
||||
|
||||
const HERMES_COMMAND = "hermes";
|
||||
const PROTOCOL_VERSION = 1;
|
||||
|
||||
type JsonRpcResponse = {
|
||||
jsonrpc: "2.0";
|
||||
id: number;
|
||||
result?: unknown;
|
||||
error?: { code: number; message: string };
|
||||
};
|
||||
|
||||
type PendingRequest = {
|
||||
resolve: (value: JsonRpcResponse) => void;
|
||||
reject: (reason: Error) => void;
|
||||
};
|
||||
|
||||
/** Tracks in-flight tool calls so we can build complete messages when they finish. */
|
||||
type PendingToolCall = {
|
||||
name: string;
|
||||
args: string;
|
||||
};
|
||||
|
||||
export type AcpPromptResult = {
|
||||
text: string;
|
||||
sessionId: string;
|
||||
messages: HermesSessionMessage[];
|
||||
};
|
||||
|
||||
export class HermesAcpClient {
|
||||
private process: ChildProcess | null = null;
|
||||
private nextId = 1;
|
||||
private sessionId: string | null = null;
|
||||
private stderrBuffer = "";
|
||||
private pending = new Map<number, PendingRequest>();
|
||||
|
||||
// Message collection state
|
||||
private messageChunks: string[] = [];
|
||||
private reasoningChunks: string[] = [];
|
||||
private pendingTools = new Map<string, PendingToolCall>();
|
||||
messages: HermesSessionMessage[] = [];
|
||||
|
||||
/** Spawn hermes acp, initialize, create session */
|
||||
async connect(cwd: string): Promise<string> {
|
||||
await this.ensureProcess();
|
||||
await this.initialize();
|
||||
|
||||
const sessionResponse = (await this.sendRequest("session/new", {
|
||||
cwd,
|
||||
mcpServers: [],
|
||||
})) as { result: { sessionId: string } };
|
||||
|
||||
const sessionId = sessionResponse.result?.sessionId;
|
||||
if (typeof sessionId !== "string" || sessionId === "") {
|
||||
throw new Error(`session/new did not return a sessionId: ${JSON.stringify(sessionResponse)}`);
|
||||
}
|
||||
|
||||
this.sessionId = sessionId;
|
||||
return sessionId;
|
||||
}
|
||||
|
||||
/** Spawn hermes acp, initialize, resume an existing session */
|
||||
async resume(sessionId: string, cwd: string): Promise<string> {
|
||||
await this.ensureProcess();
|
||||
await this.initialize();
|
||||
|
||||
const response = await this.sendRequest("session/resume", {
|
||||
cwd,
|
||||
sessionId,
|
||||
mcpServers: [],
|
||||
});
|
||||
|
||||
if ((response as { error?: unknown }).error !== undefined) {
|
||||
throw new Error(
|
||||
`session/resume failed: ${JSON.stringify((response as { error: unknown }).error)}`,
|
||||
);
|
||||
}
|
||||
|
||||
this.sessionId = sessionId;
|
||||
return sessionId;
|
||||
}
|
||||
|
||||
/** Send prompt and collect full response text + structured messages. */
|
||||
async prompt(text: string): Promise<AcpPromptResult> {
|
||||
if (this.sessionId === null) {
|
||||
throw new Error("Not connected — call connect() first");
|
||||
}
|
||||
|
||||
this.messageChunks = [];
|
||||
this.reasoningChunks = [];
|
||||
|
||||
const response = await this.sendRequest("session/prompt", {
|
||||
sessionId: this.sessionId,
|
||||
prompt: [{ type: "text", text }],
|
||||
});
|
||||
|
||||
if ((response as { error?: unknown }).error !== undefined) {
|
||||
throw new Error(
|
||||
`session/prompt failed: ${JSON.stringify((response as { error: unknown }).error)}`,
|
||||
);
|
||||
}
|
||||
|
||||
// Flush any trailing assistant text that wasn't followed by a tool call.
|
||||
this.flushAssistantMessage();
|
||||
|
||||
// Extract the final assistant text from collected messages.
|
||||
let finalText = "";
|
||||
for (let i = this.messages.length - 1; i >= 0; i--) {
|
||||
const msg = this.messages[i];
|
||||
if (
|
||||
msg !== undefined &&
|
||||
msg.role === "assistant" &&
|
||||
msg.content !== null &&
|
||||
msg.content.trim() !== ""
|
||||
) {
|
||||
finalText = msg.content;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
text: finalText,
|
||||
sessionId: this.sessionId,
|
||||
messages: this.messages,
|
||||
};
|
||||
}
|
||||
|
||||
/** Close the connection */
|
||||
async close(): Promise<void> {
|
||||
if (this.process === null) {
|
||||
return;
|
||||
}
|
||||
this.sessionId = null;
|
||||
this.process.stdin?.end();
|
||||
const proc = this.process;
|
||||
await new Promise<void>((resolve) => {
|
||||
proc.on("close", () => resolve());
|
||||
setTimeout(resolve, 5000);
|
||||
});
|
||||
this.process = null;
|
||||
}
|
||||
|
||||
// ---- JSON-RPC transport ----
|
||||
|
||||
private sendRequest(
|
||||
method: string,
|
||||
params: Record<string, unknown>,
|
||||
timeoutMs = 10 * 60 * 1000,
|
||||
): Promise<JsonRpcResponse> {
|
||||
const id = this.nextId++;
|
||||
return new Promise<JsonRpcResponse>((resolve, reject) => {
|
||||
const timer = setTimeout(() => {
|
||||
this.pending.delete(id);
|
||||
reject(new Error(`Timeout waiting for response to ${method} (id=${id})`));
|
||||
}, timeoutMs);
|
||||
|
||||
this.pending.set(id, {
|
||||
resolve: (value) => {
|
||||
clearTimeout(timer);
|
||||
resolve(value);
|
||||
},
|
||||
reject: (err) => {
|
||||
clearTimeout(timer);
|
||||
reject(err);
|
||||
},
|
||||
});
|
||||
|
||||
this.writeLine(JSON.stringify({ jsonrpc: "2.0", id, method, params }));
|
||||
});
|
||||
}
|
||||
|
||||
private sendNotification(method: string, params?: Record<string, unknown>): void {
|
||||
const message: Record<string, unknown> = { jsonrpc: "2.0", method };
|
||||
if (params !== undefined) {
|
||||
message.params = params;
|
||||
}
|
||||
this.writeLine(JSON.stringify(message));
|
||||
}
|
||||
|
||||
private writeLine(line: string): void {
|
||||
if (this.process?.stdin === null || this.process?.stdin === undefined) {
|
||||
throw new Error("Cannot write: hermes acp process stdin not available");
|
||||
}
|
||||
this.process.stdin.write(`${line}\n`);
|
||||
}
|
||||
|
||||
private handleLine(line: string): void {
|
||||
if (line === "") {
|
||||
return;
|
||||
}
|
||||
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(line);
|
||||
} catch {
|
||||
return;
|
||||
}
|
||||
|
||||
const msg = parsed as Record<string, unknown>;
|
||||
|
||||
const hasId = "id" in msg && msg.id !== undefined && msg.id !== null;
|
||||
const hasMethod = typeof msg.method === "string";
|
||||
|
||||
// JSON-RPC response to one of our requests (has "id" but no "method")
|
||||
if (hasId && !hasMethod) {
|
||||
const response = msg as unknown as JsonRpcResponse;
|
||||
const handler = this.pending.get(response.id);
|
||||
if (handler !== undefined) {
|
||||
this.pending.delete(response.id);
|
||||
handler.resolve(response);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Server-initiated JSON-RPC request: session/request_permission (has "id" + "method")
|
||||
if (msg.method === "session/request_permission" && hasId) {
|
||||
const params = msg.params as Record<string, unknown> | undefined;
|
||||
const options = (params?.options ?? []) as Array<{ optionId?: string }>;
|
||||
const firstOptionId = options[0]?.optionId ?? "";
|
||||
this.writeLine(
|
||||
JSON.stringify({
|
||||
jsonrpc: "2.0",
|
||||
id: msg.id,
|
||||
result: { outcome: { outcome: "selected", optionId: firstOptionId } },
|
||||
}),
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// JSON-RPC notification — session/update (no "id")
|
||||
if (msg.method === "session/update") {
|
||||
const params = msg.params as Record<string, unknown> | undefined;
|
||||
const update = params?.update as Record<string, unknown> | undefined;
|
||||
if (update !== undefined) {
|
||||
this.handleSessionUpdate(update);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// ---- Session update → structured messages ----
|
||||
|
||||
private handleSessionUpdate(update: Record<string, unknown>): void {
|
||||
const updateType = update.sessionUpdate as string;
|
||||
|
||||
switch (updateType) {
|
||||
case "agent_message_chunk": {
|
||||
const content = update.content as { type?: string; text?: string } | undefined;
|
||||
if (content?.type === "text" && typeof content.text === "string") {
|
||||
this.messageChunks.push(content.text);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "agent_thought_chunk": {
|
||||
const content = update.content as { type?: string; text?: string } | undefined;
|
||||
if (content?.type === "text" && typeof content.text === "string") {
|
||||
this.reasoningChunks.push(content.text);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "tool_call": {
|
||||
const title = (update.title as string) ?? "";
|
||||
const rawInput = update.rawInput;
|
||||
const args =
|
||||
rawInput !== undefined && rawInput !== null ? JSON.stringify(rawInput) : "";
|
||||
const toolCallId = update.toolCallId as string;
|
||||
this.pendingTools.set(toolCallId, { name: title, args });
|
||||
|
||||
// Flush accumulated assistant text before tool call
|
||||
this.flushAssistantMessage();
|
||||
break;
|
||||
}
|
||||
|
||||
case "tool_call_update": {
|
||||
const status = update.status as string | undefined;
|
||||
if (status === "completed" || status === "failed") {
|
||||
const toolCallId = update.toolCallId as string;
|
||||
const pending = this.pendingTools.get(toolCallId);
|
||||
const toolName = pending?.name ?? toolCallId;
|
||||
const rawOutput = update.rawOutput;
|
||||
const outputStr =
|
||||
rawOutput !== undefined && rawOutput !== null
|
||||
? typeof rawOutput === "string"
|
||||
? rawOutput
|
||||
: JSON.stringify(rawOutput)
|
||||
: "";
|
||||
this.messages.push({
|
||||
role: "assistant",
|
||||
content: null,
|
||||
reasoning: null,
|
||||
tool_calls: [{ function: { name: toolName, arguments: pending?.args ?? "" } }],
|
||||
});
|
||||
this.messages.push({
|
||||
role: "tool",
|
||||
content: outputStr,
|
||||
reasoning: null,
|
||||
tool_calls: null,
|
||||
});
|
||||
this.pendingTools.delete(toolCallId);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/** Flush any accumulated text/reasoning into an assistant message. */
|
||||
private flushAssistantMessage(): void {
|
||||
const text = this.messageChunks.join("");
|
||||
const reasoning = this.reasoningChunks.join("");
|
||||
if (text !== "" || reasoning !== "") {
|
||||
this.messages.push({
|
||||
role: "assistant",
|
||||
content: text || null,
|
||||
reasoning: reasoning || null,
|
||||
tool_calls: null,
|
||||
});
|
||||
}
|
||||
this.messageChunks = [];
|
||||
this.reasoningChunks = [];
|
||||
}
|
||||
|
||||
private rejectAll(err: Error): void {
|
||||
for (const handler of this.pending.values()) {
|
||||
handler.reject(err);
|
||||
}
|
||||
this.pending.clear();
|
||||
}
|
||||
|
||||
private async ensureProcess(): Promise<void> {
|
||||
if (this.process !== null) {
|
||||
return;
|
||||
}
|
||||
|
||||
const child = spawn(HERMES_COMMAND, ["acp"], {
|
||||
env: process.env,
|
||||
shell: false,
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
});
|
||||
|
||||
this.process = child;
|
||||
|
||||
child.stderr?.on("data", (chunk: Buffer) => {
|
||||
this.stderrBuffer += chunk.toString();
|
||||
});
|
||||
|
||||
child.on("error", (cause) => {
|
||||
const message = cause instanceof Error ? cause.message : String(cause);
|
||||
this.rejectAll(new Error(`hermes acp spawn failed: ${message}`));
|
||||
});
|
||||
|
||||
child.on("close", (code) => {
|
||||
if (code !== 0 && this.pending.size > 0) {
|
||||
const detail = this.stderrBuffer.trim() !== "" ? ` stderr=${this.stderrBuffer.trim()}` : "";
|
||||
this.rejectAll(
|
||||
new Error(`hermes acp exited unexpectedly with code ${code ?? "null"}${detail}`),
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
if (child.stdout === null) {
|
||||
throw new Error("hermes acp process stdout is not available");
|
||||
}
|
||||
const rl = createInterface({ input: child.stdout });
|
||||
rl.on("line", (line) => {
|
||||
this.handleLine(line.trim());
|
||||
});
|
||||
}
|
||||
|
||||
private async initialize(): Promise<void> {
|
||||
const initResponse = await this.sendRequest("initialize", {
|
||||
protocolVersion: PROTOCOL_VERSION,
|
||||
clientInfo: { name: "uwf", version: "0.1.0" },
|
||||
capabilities: {},
|
||||
});
|
||||
|
||||
if ((initResponse as { error?: unknown }).error !== undefined) {
|
||||
throw new Error(
|
||||
`initialize failed: ${JSON.stringify((initResponse as { error: unknown }).error)}`,
|
||||
);
|
||||
}
|
||||
|
||||
this.sendNotification("initialized");
|
||||
}
|
||||
}
|
||||
@@ -1,21 +1,18 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import type { Store } from "@uncaged/json-cas";
|
||||
|
||||
import {
|
||||
type AgentContext,
|
||||
type AgentRunResult,
|
||||
buildContinuationPrompt,
|
||||
buildRolePrompt,
|
||||
createAgent,
|
||||
} from "@uncaged/workflow-agent-kit";
|
||||
import { createLogger } from "@uncaged/workflow-util";
|
||||
|
||||
import {
|
||||
loadHermesSession,
|
||||
parseSessionIdFromStdout,
|
||||
storeHermesSessionDetail,
|
||||
} from "./session-detail.js";
|
||||
import { HermesAcpClient } from "./acp-client.js";
|
||||
import { getCachedSessionId, isResumeDisabled, setCachedSessionId } from "./session-cache.js";
|
||||
import { storeHermesSessionDetail } from "./session-detail.js";
|
||||
|
||||
const HERMES_COMMAND = "hermes";
|
||||
const HERMES_MAX_TURNS = 90;
|
||||
const log = createLogger({ sink: { kind: "stderr" } });
|
||||
|
||||
function buildHistorySummary(steps: AgentContext["steps"]): string {
|
||||
if (steps.length === 0) {
|
||||
@@ -36,12 +33,11 @@ function buildHistorySummary(steps: AgentContext["steps"]): string {
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
/** Assemble system prompt, task, and prior step outputs for Hermes. */
|
||||
export function buildHermesPrompt(ctx: AgentContext): string {
|
||||
function buildInitialPrompt(ctx: AgentContext): string {
|
||||
const roleDef = ctx.workflow.roles[ctx.role];
|
||||
const rolePrompt = roleDef !== undefined ? buildRolePrompt(roleDef) : "";
|
||||
const parts: string[] = [];
|
||||
if (ctx.outputFormatInstruction !== undefined && ctx.outputFormatInstruction !== "") {
|
||||
if (ctx.outputFormatInstruction !== "") {
|
||||
parts.push(ctx.outputFormatInstruction, "");
|
||||
}
|
||||
parts.push(rolePrompt, "", "## Task", ctx.start.prompt);
|
||||
@@ -49,116 +45,145 @@ export function buildHermesPrompt(ctx: AgentContext): string {
|
||||
if (historyBlock !== "") {
|
||||
parts.push("", historyBlock);
|
||||
}
|
||||
parts.push("", "## Moderator Instruction", "", ctx.edgePrompt);
|
||||
return parts.join("\n");
|
||||
}
|
||||
|
||||
function spawnHermes(args: string[]): Promise<{ stdout: string; stderr: string }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const child = spawn(HERMES_COMMAND, args, {
|
||||
env: process.env,
|
||||
shell: false,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
child.stdout?.on("data", (chunk: Buffer) => {
|
||||
stdout += chunk.toString();
|
||||
});
|
||||
child.stderr?.on("data", (chunk: Buffer) => {
|
||||
stderr += chunk.toString();
|
||||
});
|
||||
|
||||
child.on("error", (cause) => {
|
||||
const message = cause instanceof Error ? cause.message : String(cause);
|
||||
reject(new Error(`hermes spawn failed: ${message}`));
|
||||
});
|
||||
|
||||
child.on("close", (code) => {
|
||||
if (code === 0) {
|
||||
resolve({ stdout, stderr });
|
||||
return;
|
||||
}
|
||||
const detail = stderr.trim() !== "" ? ` stderr=${stderr.trim()}` : "";
|
||||
reject(new Error(`hermes exited with code ${code ?? "null"}${detail}`));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function spawnHermesChat(prompt: string): Promise<{ stdout: string; stderr: string }> {
|
||||
return spawnHermes([
|
||||
"chat",
|
||||
"-q",
|
||||
prompt,
|
||||
"--yolo",
|
||||
"--max-turns",
|
||||
String(HERMES_MAX_TURNS),
|
||||
"--quiet",
|
||||
]);
|
||||
}
|
||||
|
||||
function spawnHermesResume(
|
||||
sessionId: string,
|
||||
message: string,
|
||||
): Promise<{ stdout: string; stderr: string }> {
|
||||
return spawnHermes([
|
||||
"chat",
|
||||
"--resume",
|
||||
sessionId,
|
||||
"-q",
|
||||
message,
|
||||
"--yolo",
|
||||
"--max-turns",
|
||||
String(HERMES_MAX_TURNS),
|
||||
"--quiet",
|
||||
]);
|
||||
}
|
||||
|
||||
function parseSessionId(stdout: string, stderr: string): string {
|
||||
const sessionId = parseSessionIdFromStdout(stderr) ?? parseSessionIdFromStdout(stdout);
|
||||
if (sessionId === null) {
|
||||
throw new Error(
|
||||
"Failed to parse session_id from hermes output.\n" +
|
||||
`stderr (first 200 chars): ${stderr.slice(0, 200)}\n` +
|
||||
`stdout (first 200 chars): ${stdout.slice(0, 200)}`,
|
||||
);
|
||||
/** Assemble system prompt, task, and prior step outputs for Hermes. */
|
||||
export function buildHermesPrompt(ctx: AgentContext): string {
|
||||
if (!ctx.isFirstVisit) {
|
||||
const parts: string[] = [];
|
||||
if (ctx.outputFormatInstruction !== "") {
|
||||
parts.push(ctx.outputFormatInstruction, "");
|
||||
}
|
||||
parts.push(buildContinuationPrompt(ctx.steps, ctx.role, ctx.edgePrompt));
|
||||
return parts.join("\n");
|
||||
}
|
||||
return sessionId;
|
||||
|
||||
return buildInitialPrompt(ctx);
|
||||
}
|
||||
|
||||
async function buildResultFromSession(sessionId: string, store: Store): Promise<AgentRunResult> {
|
||||
const session = await loadHermesSession(sessionId);
|
||||
if (session === null) {
|
||||
throw new Error(`Failed to load hermes session file for session_id: ${sessionId}`);
|
||||
}
|
||||
const { detailHash, output } = await storeHermesSessionDetail(store, session);
|
||||
return { output, detailHash, sessionId };
|
||||
}
|
||||
|
||||
async function runHermes(ctx: AgentContext): Promise<AgentRunResult> {
|
||||
const fullPrompt = buildHermesPrompt(ctx);
|
||||
const { stdout, stderr } = await spawnHermesChat(fullPrompt);
|
||||
const sessionId = parseSessionId(stdout, stderr);
|
||||
return buildResultFromSession(sessionId, ctx.store);
|
||||
}
|
||||
|
||||
async function continueHermes(
|
||||
sessionId: string,
|
||||
message: string,
|
||||
async function storePromptResult(
|
||||
store: Store,
|
||||
): Promise<AgentRunResult> {
|
||||
const { stdout, stderr } = await spawnHermesResume(sessionId, message);
|
||||
// Resume may return a new session_id
|
||||
const newSessionId = parseSessionIdFromStdout(stderr) ?? parseSessionIdFromStdout(stdout);
|
||||
const resolvedId = newSessionId ?? sessionId;
|
||||
return buildResultFromSession(resolvedId, store);
|
||||
sessionId: string,
|
||||
messages: Awaited<ReturnType<HermesAcpClient["prompt"]>>["messages"],
|
||||
): Promise<{ detailHash: string }> {
|
||||
const session = {
|
||||
session_id: sessionId,
|
||||
model: "",
|
||||
session_start: new Date().toISOString(),
|
||||
messages,
|
||||
};
|
||||
return storeHermesSessionDetail(store, session);
|
||||
}
|
||||
|
||||
/** Agent CLI factory: parses argv, runs Hermes, extracts output, writes StepNode. */
|
||||
type PromptAttempt = {
|
||||
useContinuation: boolean;
|
||||
resumed: boolean;
|
||||
};
|
||||
|
||||
async function prepareSession(
|
||||
client: HermesAcpClient,
|
||||
ctx: AgentContext,
|
||||
cwd: string,
|
||||
): Promise<PromptAttempt> {
|
||||
if (ctx.isFirstVisit || isResumeDisabled()) {
|
||||
await client.connect(cwd);
|
||||
return { useContinuation: false, resumed: false };
|
||||
}
|
||||
|
||||
const cachedSessionId = await getCachedSessionId(ctx.threadId, ctx.role);
|
||||
if (cachedSessionId === null) {
|
||||
log("6RWK3N8Q", `no cached session for ${ctx.threadId}:${ctx.role}, starting new session`);
|
||||
await client.connect(cwd);
|
||||
return { useContinuation: false, resumed: false };
|
||||
}
|
||||
|
||||
try {
|
||||
await client.resume(cachedSessionId, cwd);
|
||||
log("9MHT4V2P", `resumed hermes session ${cachedSessionId} for ${ctx.threadId}:${ctx.role}`);
|
||||
return { useContinuation: true, resumed: true };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
log("3XPN7K4W", `session resume failed, falling back to new session: ${message}`);
|
||||
await client.close();
|
||||
await client.connect(cwd);
|
||||
return { useContinuation: false, resumed: false };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Agent CLI factory: parses argv, runs Hermes, extracts output, writes StepNode.
|
||||
*
|
||||
* A single ACP client is shared across run() and continue() calls so that
|
||||
* frontmatter retry loops keep the same Hermes session context. The client
|
||||
* is closed once the agent process exits (via process.on("exit")).
|
||||
*/
|
||||
export function createHermesAgent(): () => Promise<void> {
|
||||
return createAgent({
|
||||
const client = new HermesAcpClient();
|
||||
|
||||
// Ensure cleanup regardless of how the process exits.
|
||||
process.on("exit", () => {
|
||||
void client.close();
|
||||
});
|
||||
|
||||
async function runPrompt(ctx: AgentContext, useContinuation: boolean): Promise<AgentRunResult> {
|
||||
const effectiveCtx = useContinuation ? ctx : { ...ctx, isFirstVisit: true };
|
||||
const fullPrompt = buildHermesPrompt(effectiveCtx);
|
||||
const { text, sessionId, messages } = await client.prompt(fullPrompt);
|
||||
const { detailHash } = await storePromptResult(ctx.store, sessionId, messages);
|
||||
|
||||
if (!isResumeDisabled()) {
|
||||
await setCachedSessionId(ctx.threadId, ctx.role, sessionId);
|
||||
}
|
||||
|
||||
return { output: text, detailHash, sessionId };
|
||||
}
|
||||
|
||||
async function runHermes(ctx: AgentContext): Promise<AgentRunResult> {
|
||||
const cwd = process.cwd();
|
||||
const attempt = await prepareSession(client, ctx, cwd);
|
||||
|
||||
try {
|
||||
return await runPrompt(ctx, attempt.useContinuation);
|
||||
} catch (error) {
|
||||
if (!attempt.resumed) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
log("8FQW2R6N", `continuation prompt failed, retrying with initial prompt: ${message}`);
|
||||
await client.close();
|
||||
await client.connect(cwd);
|
||||
return runPrompt(ctx, false);
|
||||
}
|
||||
}
|
||||
|
||||
async function continueHermes(
|
||||
_sessionId: string,
|
||||
message: string,
|
||||
store: Store,
|
||||
): Promise<AgentRunResult> {
|
||||
// Client is already connected from runHermes — same ACP session,
|
||||
// so the agent sees the full conversation history (crucial for retries).
|
||||
const { text, sessionId, messages } = await client.prompt(message);
|
||||
const { detailHash } = await storePromptResult(store, sessionId, messages);
|
||||
return { output: text, detailHash, sessionId };
|
||||
}
|
||||
|
||||
const agentMain = createAgent({
|
||||
name: "hermes",
|
||||
run: runHermes,
|
||||
continue: continueHermes,
|
||||
});
|
||||
|
||||
// Wrap to ensure ACP client is closed after agent completes,
|
||||
// so the hermes subprocess exits and bun can terminate.
|
||||
return async () => {
|
||||
try {
|
||||
await agentMain();
|
||||
} finally {
|
||||
await client.close();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
export { HermesAcpClient } from "./acp-client.js";
|
||||
export { buildHermesPrompt, createHermesAgent } from "./hermes.js";
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
||||
import { dirname, join } from "node:path";
|
||||
|
||||
import { resolveStorageRoot } from "@uncaged/workflow-agent-kit";
|
||||
import type { ThreadId } from "@uncaged/workflow-protocol";
|
||||
|
||||
type HermesSessionCache = Record<string, string>;
|
||||
|
||||
function getCachePath(): string {
|
||||
return join(resolveStorageRoot(), "cache", "hermes-sessions.json");
|
||||
}
|
||||
|
||||
function cacheKey(threadId: ThreadId, role: string): string {
|
||||
return `${threadId}:${role}`;
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
async function readCache(): Promise<HermesSessionCache> {
|
||||
const path = getCachePath();
|
||||
try {
|
||||
const text = await readFile(path, "utf8");
|
||||
const raw = JSON.parse(text) as unknown;
|
||||
if (!isRecord(raw)) {
|
||||
return {};
|
||||
}
|
||||
const cache: HermesSessionCache = {};
|
||||
for (const [key, value] of Object.entries(raw)) {
|
||||
if (typeof value === "string" && value !== "") {
|
||||
cache[key] = value;
|
||||
}
|
||||
}
|
||||
return cache;
|
||||
} catch (e) {
|
||||
const err = e as NodeJS.ErrnoException;
|
||||
if (err.code === "ENOENT") {
|
||||
return {};
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
async function writeCache(cache: HermesSessionCache): Promise<void> {
|
||||
const path = getCachePath();
|
||||
await mkdir(dirname(path), { recursive: true });
|
||||
await writeFile(path, `${JSON.stringify(cache, null, 2)}\n`, "utf8");
|
||||
}
|
||||
|
||||
export function isResumeDisabled(): boolean {
|
||||
const flag = process.env.UWF_NO_RESUME;
|
||||
return flag !== undefined && flag !== "";
|
||||
}
|
||||
|
||||
export async function getCachedSessionId(threadId: ThreadId, role: string): Promise<string | null> {
|
||||
const cache = await readCache();
|
||||
const sessionId = cache[cacheKey(threadId, role)];
|
||||
return sessionId ?? null;
|
||||
}
|
||||
|
||||
export async function setCachedSessionId(
|
||||
threadId: ThreadId,
|
||||
role: string,
|
||||
sessionId: string,
|
||||
): Promise<void> {
|
||||
const cache = await readCache();
|
||||
cache[cacheKey(threadId, role)] = sessionId;
|
||||
await writeCache(cache);
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
import type { StepContext } from "@uncaged/workflow-protocol";
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { buildContinuationPrompt } from "../src/build-continuation-prompt.js";
|
||||
|
||||
const reviewerStep: StepContext = {
|
||||
role: "reviewer",
|
||||
output: { approved: false, comments: "Missing tests" },
|
||||
detail: "2MXBG6PN4A8JR",
|
||||
agent: "uwf-hermes",
|
||||
};
|
||||
|
||||
const developerStep: StepContext = {
|
||||
role: "developer",
|
||||
output: { filesChanged: ["src/app.ts"], summary: "Initial fix" },
|
||||
detail: "1VPBG9SM5E7WK",
|
||||
agent: "uwf-hermes",
|
||||
};
|
||||
|
||||
describe("buildContinuationPrompt", () => {
|
||||
test("includes steps after the last matching role and the edge prompt", () => {
|
||||
const steps: StepContext[] = [
|
||||
developerStep,
|
||||
reviewerStep,
|
||||
{
|
||||
role: "planner",
|
||||
output: { plan: "revise approach" },
|
||||
detail: "7BQST3VW9F2MA",
|
||||
agent: "uwf-hermes",
|
||||
},
|
||||
];
|
||||
|
||||
const result = buildContinuationPrompt(
|
||||
steps,
|
||||
"developer",
|
||||
"The reviewer rejected your implementation. Read their feedback and fix the issues.",
|
||||
);
|
||||
|
||||
expect(result).toContain("## What Happened Since Your Last Turn");
|
||||
expect(result).toContain("### Step 2: reviewer");
|
||||
expect(result).toContain("Missing tests");
|
||||
expect(result).toContain("### Step 3: planner");
|
||||
expect(result).toContain("## Moderator Instruction");
|
||||
expect(result).toContain("The reviewer rejected your implementation.");
|
||||
expect(result).not.toContain("Initial fix");
|
||||
});
|
||||
|
||||
test("uses all steps when the role has not run before", () => {
|
||||
const result = buildContinuationPrompt(
|
||||
[developerStep, reviewerStep],
|
||||
"planner",
|
||||
"Continue from the reviewer feedback.",
|
||||
);
|
||||
|
||||
expect(result).toContain("### Step 1: developer");
|
||||
expect(result).toContain("### Step 2: reviewer");
|
||||
expect(result).toContain("Continue from the reviewer feedback.");
|
||||
});
|
||||
|
||||
test("still includes moderator instruction when there are no intervening steps", () => {
|
||||
const result = buildContinuationPrompt(
|
||||
[developerStep],
|
||||
"developer",
|
||||
"Please revise your work.",
|
||||
);
|
||||
|
||||
expect(result).not.toContain("## What Happened Since Your Last Turn");
|
||||
expect(result).toContain("## Moderator Instruction");
|
||||
expect(result).toContain("Please revise your work.");
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,53 @@
|
||||
import type { StepContext } from "@uncaged/workflow-protocol";
|
||||
|
||||
function formatStep(step: StepContext, stepNumber: number): string {
|
||||
return [
|
||||
`### Step ${stepNumber}: ${step.role}`,
|
||||
`Output: ${JSON.stringify(step.output)}`,
|
||||
`Agent: ${step.agent}`,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function findLastRoleIndex(steps: StepContext[], role: string): number {
|
||||
for (let i = steps.length - 1; i >= 0; i--) {
|
||||
const step = steps[i];
|
||||
if (step !== undefined && step.role === role) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a continuation prompt for a role re-entry.
|
||||
*
|
||||
* Finds the most recent step for `role`, collects everything after it as context,
|
||||
* and appends the moderator edge prompt as the instruction.
|
||||
*/
|
||||
export function buildContinuationPrompt(
|
||||
steps: StepContext[],
|
||||
role: string,
|
||||
edgePrompt: string,
|
||||
): string {
|
||||
const lastIndex = findLastRoleIndex(steps, role);
|
||||
const sinceSteps = lastIndex >= 0 ? steps.slice(lastIndex + 1) : steps;
|
||||
|
||||
const parts: string[] = [];
|
||||
|
||||
if (sinceSteps.length > 0) {
|
||||
parts.push("## What Happened Since Your Last Turn");
|
||||
const baseStepNumber = lastIndex >= 0 ? lastIndex + 2 : 1;
|
||||
for (let i = 0; i < sinceSteps.length; i++) {
|
||||
const step = sinceSteps[i];
|
||||
if (step === undefined) {
|
||||
continue;
|
||||
}
|
||||
parts.push("");
|
||||
parts.push(formatStep(step, baseStepNumber + i));
|
||||
}
|
||||
parts.push("");
|
||||
}
|
||||
|
||||
parts.push("## Moderator Instruction", "", edgePrompt);
|
||||
return parts.join("\n");
|
||||
}
|
||||
@@ -21,6 +21,14 @@ function fail(message: string): never {
|
||||
throw new Error(message);
|
||||
}
|
||||
|
||||
function readEdgePrompt(): string {
|
||||
const value = process.env.UWF_EDGE_PROMPT;
|
||||
if (value === undefined || value === "") {
|
||||
fail("UWF_EDGE_PROMPT environment variable is required");
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function walkChain(store: Store, schemas: AgentStore["schemas"], headHash: CasRef): ChainState {
|
||||
const headNode = store.get(headHash);
|
||||
if (headNode === null) {
|
||||
@@ -133,6 +141,8 @@ export async function buildContext(threadId: ThreadId, role: string): Promise<Ag
|
||||
}
|
||||
|
||||
const steps = await buildHistory(store, chain.stepsNewestFirst);
|
||||
const edgePrompt = readEdgePrompt();
|
||||
const isFirstVisit = !steps.some((s) => s.role === role);
|
||||
|
||||
return {
|
||||
threadId,
|
||||
@@ -142,6 +152,8 @@ export async function buildContext(threadId: ThreadId, role: string): Promise<Ag
|
||||
workflow,
|
||||
store,
|
||||
outputFormatInstruction: "",
|
||||
edgePrompt,
|
||||
isFirstVisit,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -178,6 +190,8 @@ export async function buildContextWithMeta(
|
||||
}
|
||||
|
||||
const steps = await buildHistory(store, chain.stepsNewestFirst);
|
||||
const edgePrompt = readEdgePrompt();
|
||||
const isFirstVisit = !steps.some((s) => s.role === role);
|
||||
|
||||
return {
|
||||
threadId,
|
||||
@@ -187,6 +201,8 @@ export async function buildContextWithMeta(
|
||||
workflow,
|
||||
store,
|
||||
outputFormatInstruction: "",
|
||||
edgePrompt,
|
||||
isFirstVisit,
|
||||
meta: { storageRoot, store, schemas, headHash, chain },
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
export { buildContinuationPrompt } from "./build-continuation-prompt.js";
|
||||
export { buildOutputFormatInstruction } from "./build-output-format-instruction.js";
|
||||
export { buildRolePrompt } from "./build-role-prompt.js";
|
||||
export type { BuildContextMeta } from "./context.js";
|
||||
@@ -11,7 +12,7 @@ export {
|
||||
export type { FrontmatterFastPathResult } from "./frontmatter.js";
|
||||
export { tryFrontmatterFastPath } from "./frontmatter.js";
|
||||
export { createAgent } from "./run.js";
|
||||
export { getConfigPath, getEnvPath, loadWorkflowConfig } from "./storage.js";
|
||||
export { getConfigPath, getEnvPath, loadWorkflowConfig, resolveStorageRoot } from "./storage.js";
|
||||
export type {
|
||||
AgentContext,
|
||||
AgentContinueFn,
|
||||
|
||||
@@ -12,6 +12,15 @@ export type AgentContext = ModeratorContext & {
|
||||
* role's output schema. Populated by `createAgent` at run time.
|
||||
*/
|
||||
outputFormatInstruction: string;
|
||||
/**
|
||||
* Edge prompt from the graph transition that led to this role (UWF_EDGE_PROMPT).
|
||||
* Always the real moderator instruction for this step.
|
||||
*/
|
||||
edgePrompt: string;
|
||||
/**
|
||||
* True when the current role has not appeared in steps history before this invocation.
|
||||
*/
|
||||
isFirstVisit: boolean;
|
||||
};
|
||||
|
||||
export type AgentRunResult = {
|
||||
|
||||
@@ -77,9 +77,11 @@ function stepsToPayload(name: string, description: string, steps: WorkFlowSteps)
|
||||
};
|
||||
}
|
||||
}
|
||||
const targetRole = t.target === "END" ? "$END" : t.target;
|
||||
return {
|
||||
role: t.target === "END" ? "$END" : t.target,
|
||||
role: targetRole,
|
||||
condition: condName,
|
||||
prompt: `Transition to ${targetRole}.`,
|
||||
};
|
||||
});
|
||||
|
||||
@@ -87,7 +89,14 @@ function stepsToPayload(name: string, description: string, steps: WorkFlowSteps)
|
||||
}
|
||||
|
||||
if (steps.length > 0) {
|
||||
graph["$START"] = [{ role: steps[0].role.name, condition: null }];
|
||||
const firstRole = steps[0].role.name;
|
||||
graph["$START"] = [
|
||||
{
|
||||
role: firstRole,
|
||||
condition: null,
|
||||
prompt: `Begin workflow at role ${firstRole}.`,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
return { name, description, roles, conditions, graph };
|
||||
|
||||
@@ -9,27 +9,27 @@ const solveIssueWorkflow: WorkflowPayload = {
|
||||
roles: {
|
||||
planner: {
|
||||
description: "Creates implementation plan",
|
||||
identity: "You are a planning agent.",
|
||||
prepare: "Review the issue context.",
|
||||
execute: "Create a step-by-step plan.",
|
||||
report: "Output the plan and steps.",
|
||||
outputSchema: "5GWKR8TN1V3JA",
|
||||
goal: "You are a planning agent.",
|
||||
capabilities: ["planning"],
|
||||
procedure: "Create a step-by-step plan.",
|
||||
output: "Output the plan and steps.",
|
||||
frontmatter: "5GWKR8TN1V3JA",
|
||||
},
|
||||
developer: {
|
||||
description: "Implements code changes",
|
||||
identity: "You are a developer agent.",
|
||||
prepare: "Load coding tools.",
|
||||
execute: "Implement the plan.",
|
||||
report: "List files changed and summary.",
|
||||
outputSchema: "8CNWT4KR6D1HV",
|
||||
goal: "You are a developer agent.",
|
||||
capabilities: ["coding"],
|
||||
procedure: "Implement the plan.",
|
||||
output: "List files changed and summary.",
|
||||
frontmatter: "8CNWT4KR6D1HV",
|
||||
},
|
||||
reviewer: {
|
||||
description: "Reviews code changes",
|
||||
identity: "You are a code reviewer.",
|
||||
prepare: "Review project conventions.",
|
||||
execute: "Review the implementation.",
|
||||
report: "Approve or reject with comments.",
|
||||
outputSchema: "1VPBG9SM5E7WK",
|
||||
goal: "You are a code reviewer.",
|
||||
capabilities: ["code-review"],
|
||||
procedure: "Review the implementation.",
|
||||
output: "Approve or reject with comments.",
|
||||
frontmatter: "1VPBG9SM5E7WK",
|
||||
},
|
||||
},
|
||||
conditions: {
|
||||
@@ -43,15 +43,35 @@ const solveIssueWorkflow: WorkflowPayload = {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: [{ role: "planner", condition: null }],
|
||||
planner: [
|
||||
{ role: "developer", condition: "needsClarification" },
|
||||
{ role: "$END", condition: null },
|
||||
$START: [
|
||||
{
|
||||
role: "planner",
|
||||
condition: null,
|
||||
prompt: "Start planning from the issue in the task.",
|
||||
},
|
||||
],
|
||||
planner: [
|
||||
{
|
||||
role: "developer",
|
||||
condition: "needsClarification",
|
||||
prompt: "Clarification is needed; hand off to developer.",
|
||||
},
|
||||
{ role: "$END", condition: null, prompt: "Planning complete; end workflow." },
|
||||
],
|
||||
developer: [
|
||||
{
|
||||
role: "reviewer",
|
||||
condition: null,
|
||||
prompt: "Implementation done; send to reviewer.",
|
||||
},
|
||||
],
|
||||
developer: [{ role: "reviewer", condition: null }],
|
||||
reviewer: [
|
||||
{ role: "developer", condition: "rejected" },
|
||||
{ role: "$END", condition: null },
|
||||
{
|
||||
role: "developer",
|
||||
condition: "rejected",
|
||||
prompt: "Reviewer rejected; return to developer.",
|
||||
},
|
||||
{ role: "$END", condition: null, prompt: "Review passed; end workflow." },
|
||||
],
|
||||
},
|
||||
};
|
||||
@@ -69,7 +89,10 @@ function makeContext(steps: ModeratorContext["steps"]): ModeratorContext {
|
||||
describe("evaluate", () => {
|
||||
test("$START → first role (fallback)", async () => {
|
||||
const result = await evaluate(solveIssueWorkflow, makeContext([]));
|
||||
expect(result).toEqual({ ok: true, value: "planner" });
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "planner", prompt: "Start planning from the issue in the task." },
|
||||
});
|
||||
});
|
||||
|
||||
test("condition match (rejected → developer)", async () => {
|
||||
@@ -82,7 +105,10 @@ describe("evaluate", () => {
|
||||
},
|
||||
]);
|
||||
const result = await evaluate(solveIssueWorkflow, context);
|
||||
expect(result).toEqual({ ok: true, value: "developer" });
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "developer", prompt: "Reviewer rejected; return to developer." },
|
||||
});
|
||||
});
|
||||
|
||||
test("fallback when condition does not match → $END", async () => {
|
||||
@@ -95,7 +121,10 @@ describe("evaluate", () => {
|
||||
},
|
||||
]);
|
||||
const result = await evaluate(solveIssueWorkflow, context);
|
||||
expect(result).toEqual({ ok: true, value: "$END" });
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "$END", prompt: "Review passed; end workflow." },
|
||||
});
|
||||
});
|
||||
|
||||
test("missing role in graph → error", async () => {
|
||||
@@ -124,7 +153,10 @@ describe("evaluate", () => {
|
||||
},
|
||||
]);
|
||||
const result = await evaluate(solveIssueWorkflow, context);
|
||||
expect(result).toEqual({ ok: true, value: "developer" });
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "developer", prompt: "Clarification is needed; hand off to developer." },
|
||||
});
|
||||
});
|
||||
|
||||
test("$last returns most recent matching role's frontmatter", async () => {
|
||||
@@ -137,10 +169,20 @@ describe("evaluate", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: [{ role: "developer", condition: null }],
|
||||
$START: [
|
||||
{
|
||||
role: "developer",
|
||||
condition: null,
|
||||
prompt: "Begin development.",
|
||||
},
|
||||
],
|
||||
developer: [
|
||||
{ role: "$END", condition: "devFailed" },
|
||||
{ role: "reviewer", condition: null },
|
||||
{ role: "$END", condition: "devFailed", prompt: "Development failed; end." },
|
||||
{
|
||||
role: "reviewer",
|
||||
condition: null,
|
||||
prompt: "Development succeeded; review.",
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
@@ -165,7 +207,10 @@ describe("evaluate", () => {
|
||||
},
|
||||
]);
|
||||
const result = await evaluate(workflow, context);
|
||||
expect(result).toEqual({ ok: true, value: "$END" });
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "$END", prompt: "Development failed; end." },
|
||||
});
|
||||
});
|
||||
|
||||
test("$first returns earliest matching role's frontmatter", async () => {
|
||||
@@ -178,10 +223,20 @@ describe("evaluate", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: [{ role: "planner", condition: null }],
|
||||
$START: [
|
||||
{
|
||||
role: "planner",
|
||||
condition: null,
|
||||
prompt: "Begin planning.",
|
||||
},
|
||||
],
|
||||
planner: [
|
||||
{ role: "$END", condition: "firstPlanReady" },
|
||||
{ role: "developer", condition: null },
|
||||
{ role: "$END", condition: "firstPlanReady", prompt: "First plan was ready; end." },
|
||||
{
|
||||
role: "developer",
|
||||
condition: null,
|
||||
prompt: "Plan not ready on first pass; implement.",
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
@@ -206,7 +261,10 @@ describe("evaluate", () => {
|
||||
},
|
||||
]);
|
||||
const result = await evaluate(workflow, context);
|
||||
expect(result).toEqual({ ok: true, value: "$END" });
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "$END", prompt: "First plan was ready; end." },
|
||||
});
|
||||
});
|
||||
|
||||
test("$last returns undefined for unmatched role", async () => {
|
||||
@@ -219,10 +277,20 @@ describe("evaluate", () => {
|
||||
},
|
||||
},
|
||||
graph: {
|
||||
$START: [{ role: "planner", condition: null }],
|
||||
$START: [
|
||||
{
|
||||
role: "planner",
|
||||
condition: null,
|
||||
prompt: "Begin planning.",
|
||||
},
|
||||
],
|
||||
planner: [
|
||||
{ role: "$END", condition: "hasReviewer" },
|
||||
{ role: "developer", condition: null },
|
||||
{ role: "$END", condition: "hasReviewer", prompt: "Reviewer already ran; end." },
|
||||
{
|
||||
role: "developer",
|
||||
condition: null,
|
||||
prompt: "No reviewer yet; implement.",
|
||||
},
|
||||
],
|
||||
},
|
||||
};
|
||||
@@ -236,6 +304,9 @@ describe("evaluate", () => {
|
||||
]);
|
||||
const result = await evaluate(workflow, context);
|
||||
// no reviewer step → $exists returns false → fallback to developer
|
||||
expect(result).toEqual({ ok: true, value: "developer" });
|
||||
expect(result).toEqual({
|
||||
ok: true,
|
||||
value: { role: "developer", prompt: "No reviewer yet; implement." },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import type { ModeratorContext, WorkflowPayload } from "@uncaged/workflow-protocol";
|
||||
import jsonata from "jsonata";
|
||||
|
||||
import type { Result } from "./types.js";
|
||||
import type { EvaluateResult, Result } from "./types.js";
|
||||
|
||||
const START_ROLE = "$START";
|
||||
|
||||
@@ -78,7 +78,7 @@ function currentRole(context: ModeratorContext): string {
|
||||
export async function evaluate(
|
||||
workflow: WorkflowPayload,
|
||||
context: ModeratorContext,
|
||||
): Promise<Result<string, Error>> {
|
||||
): Promise<Result<EvaluateResult, Error>> {
|
||||
const role = currentRole(context);
|
||||
const transitions = workflow.graph[role];
|
||||
if (transitions === undefined) {
|
||||
@@ -90,7 +90,7 @@ export async function evaluate(
|
||||
|
||||
for (const transition of transitions) {
|
||||
if (transition.condition === null) {
|
||||
return { ok: true, value: transition.role };
|
||||
return { ok: true, value: { role: transition.role, prompt: transition.prompt } };
|
||||
}
|
||||
|
||||
const conditionDef = workflow.conditions[transition.condition];
|
||||
@@ -106,7 +106,7 @@ export async function evaluate(
|
||||
return evalResult;
|
||||
}
|
||||
if (isTruthy(evalResult.value)) {
|
||||
return { ok: true, value: transition.role };
|
||||
return { ok: true, value: { role: transition.role, prompt: transition.prompt } };
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
export { evaluate } from "./evaluate.js";
|
||||
export type { EvaluateResult } from "./types.js";
|
||||
|
||||
@@ -1 +1,7 @@
|
||||
export type Result<T, E> = { ok: true; value: T } | { ok: false; error: E };
|
||||
|
||||
/** The result of moderator evaluation — which role to go to, and the edge prompt. */
|
||||
export type EvaluateResult = {
|
||||
role: string;
|
||||
prompt: string;
|
||||
};
|
||||
|
||||
@@ -26,10 +26,11 @@ const CONDITION_DEFINITION: JSONSchema = {
|
||||
|
||||
const TRANSITION: JSONSchema = {
|
||||
type: "object",
|
||||
required: ["role", "condition"],
|
||||
required: ["role", "condition", "prompt"],
|
||||
properties: {
|
||||
role: { type: "string" },
|
||||
condition: { anyOf: [{ type: "string" }, { type: "null" }] },
|
||||
prompt: { type: "string" },
|
||||
},
|
||||
additionalProperties: false,
|
||||
};
|
||||
|
||||
@@ -28,6 +28,7 @@ export type RoleDefinition = {
|
||||
export type Transition = {
|
||||
role: string;
|
||||
condition: string | null;
|
||||
prompt: string;
|
||||
};
|
||||
|
||||
export type ConditionDefinition = {
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
import { mkdirSync, mkdtempSync, readFileSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, describe, expect, test } from "bun:test";
|
||||
|
||||
import { createProcessLogger } from "../src/process-logger/index.js";
|
||||
|
||||
function logDateKey(date: Date): string {
|
||||
return date.toISOString().slice(0, 10);
|
||||
}
|
||||
|
||||
describe("createProcessLogger", () => {
|
||||
let tmpDir: string;
|
||||
|
||||
afterEach(() => {
|
||||
if (tmpDir !== undefined) {
|
||||
rmSync(tmpDir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
test("writes init and log lines to dated JSONL under storage root", () => {
|
||||
tmpDir = mkdtempSync(join(tmpdir(), "uwf-process-log-"));
|
||||
const plog = createProcessLogger({
|
||||
storageRoot: tmpDir,
|
||||
context: { thread: "THREAD01", workflow: "WORKFLOW01" },
|
||||
});
|
||||
|
||||
expect(plog.pid).toMatch(/^\d+-\d+$/);
|
||||
|
||||
plog.log("7NQW4HBT", "moderator selected role=planner", null);
|
||||
|
||||
const logPath = join(tmpDir, "logs", `${logDateKey(new Date())}.jsonl`);
|
||||
const lines = readFileSync(logPath, "utf8")
|
||||
.trim()
|
||||
.split("\n")
|
||||
.map((line) => JSON.parse(line) as Record<string, string>);
|
||||
|
||||
expect(lines).toHaveLength(2);
|
||||
expect(lines[0]?.tag).toBe("W9F3RK2M");
|
||||
expect(lines[0]?.pid).toBe(plog.pid);
|
||||
expect(lines[0]?.thread).toBe("THREAD01");
|
||||
expect(lines[0]?.workflow).toBe("WORKFLOW01");
|
||||
expect(lines[0]?.msg).toContain("process start");
|
||||
expect(lines[0]?.msg).toContain("node=");
|
||||
|
||||
expect(lines[1]?.tag).toBe("7NQW4HBT");
|
||||
expect(lines[1]?.msg).toBe("moderator selected role=planner");
|
||||
expect(lines[1]?.thread).toBe("THREAD01");
|
||||
expect(lines[1]?.workflow).toBe("WORKFLOW01");
|
||||
});
|
||||
|
||||
test("creates logs directory when missing", () => {
|
||||
tmpDir = mkdtempSync(join(tmpdir(), "uwf-process-log-"));
|
||||
createProcessLogger({
|
||||
storageRoot: tmpDir,
|
||||
context: { thread: null, workflow: null },
|
||||
});
|
||||
mkdirSync(join(tmpDir, "logs"), { recursive: true });
|
||||
expect(() =>
|
||||
readFileSync(join(tmpDir, "logs", `${logDateKey(new Date())}.jsonl`), "utf8"),
|
||||
).not.toThrow();
|
||||
});
|
||||
|
||||
test("merges per-call context into the JSONL entry", () => {
|
||||
tmpDir = mkdtempSync(join(tmpdir(), "uwf-process-log-"));
|
||||
const plog = createProcessLogger({
|
||||
storageRoot: tmpDir,
|
||||
context: { thread: "T1", workflow: null },
|
||||
});
|
||||
plog.log("M3K8V9T1", "spawn agent", { command: "uwf-hermes", args: "tid role" });
|
||||
|
||||
const logPath = join(tmpDir, "logs", `${logDateKey(new Date())}.jsonl`);
|
||||
const lines = readFileSync(logPath, "utf8")
|
||||
.trim()
|
||||
.split("\n")
|
||||
.map((line) => JSON.parse(line) as Record<string, string>);
|
||||
const last = lines[lines.length - 1];
|
||||
expect(last?.command).toBe("uwf-hermes");
|
||||
expect(last?.args).toBe("tid role");
|
||||
});
|
||||
});
|
||||
@@ -26,6 +26,7 @@ uwf workflow list # list all registered workflows
|
||||
uwf thread start <workflow> -p <prompt> # create a thread (no execution)
|
||||
uwf thread step <thread-id> # execute one moderator→agent→extract cycle
|
||||
[--agent <cmd>] # override agent command
|
||||
[-c, --count <number>] # run multiple steps (default: 1)
|
||||
uwf thread show <thread-id> # show thread head pointer
|
||||
uwf thread list # list active threads
|
||||
[--all] # include archived threads
|
||||
@@ -56,6 +57,17 @@ uwf cas schema list # list all registered schemas
|
||||
uwf cas schema get <hash> # show a schema by its type hash
|
||||
\`\`\`
|
||||
|
||||
## Log Commands
|
||||
|
||||
\`\`\`
|
||||
uwf log list # list log files with sizes
|
||||
uwf log show # show all log entries
|
||||
[--thread <thread-id>] # filter by thread ID
|
||||
[--process <pid>] # filter by process ID
|
||||
[--date <YYYY-MM-DD>] # filter by date
|
||||
uwf log clean --before <date> # delete log files before given date
|
||||
\`\`\`
|
||||
|
||||
## Global Options
|
||||
|
||||
\`\`\`
|
||||
@@ -69,6 +81,7 @@ uwf -V, --version # print version
|
||||
- **Thread**: A single workflow execution (ULID). State is an immutable CAS chain; active threads are indexed in \`threads.yaml\`.
|
||||
- **Step**: One moderator→agent→extract cycle. Run \`uwf thread step\` repeatedly until \`$END\`.
|
||||
- **CAS**: Content-Addressed Storage — all nodes are immutable and identified by hash.
|
||||
- **Role**: Named actor with goal, capabilities, procedure, output, and meta; the moderator routes between roles.
|
||||
- **Role**: Named actor with goal, capabilities, procedure, output, and frontmatter schema; the moderator routes between roles.
|
||||
- **Edge Prompt**: Required instruction on each graph edge — the moderator's dispatch message to the agent.
|
||||
`;
|
||||
}
|
||||
|
||||
@@ -13,6 +13,13 @@ export {
|
||||
validateFrontmatter,
|
||||
} from "./frontmatter-markdown/index.js";
|
||||
export { createLogger } from "./logger.js";
|
||||
export { createProcessLogger } from "./process-logger/index.js";
|
||||
export type {
|
||||
CreateProcessLoggerOptions,
|
||||
ProcessLogFn,
|
||||
ProcessLogger,
|
||||
ProcessLoggerContext,
|
||||
} from "./process-logger/index.js";
|
||||
export { normalizeRefsField } from "./refs-field.js";
|
||||
export { err, ok } from "./result.js";
|
||||
export { getDefaultWorkflowStorageRoot, getGlobalCasDir } from "./storage-root.js";
|
||||
|
||||
@@ -1,28 +1,8 @@
|
||||
import { appendFileSync } from "node:fs";
|
||||
|
||||
import { CROCKFORD_BASE32_ALPHABET } from "./base32.js";
|
||||
import { assertValidLogTag } from "./process-logger/log-tag.js";
|
||||
import type { CreateLoggerOptions, LogFn } from "./types.js";
|
||||
|
||||
const TAG_LENGTH = 8;
|
||||
|
||||
const TAG_CHAR_SET: ReadonlySet<string> = new Set(CROCKFORD_BASE32_ALPHABET.split(""));
|
||||
|
||||
function assertValidLogTag(tag: string): void {
|
||||
if (tag.length !== TAG_LENGTH) {
|
||||
throw new Error(`log tag must be exactly ${TAG_LENGTH} characters`);
|
||||
}
|
||||
for (let i = 0; i < tag.length; i++) {
|
||||
const ch = tag[i];
|
||||
if (ch === undefined) {
|
||||
throw new Error("log tag validation failed");
|
||||
}
|
||||
const upper = ch.toUpperCase();
|
||||
if (!TAG_CHAR_SET.has(upper)) {
|
||||
throw new Error(`invalid Crockford Base32 character in log tag: ${ch}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Append one JSONL log record: `{ tag, content, timestamp }` per RFC-001. */
|
||||
export function createLogger(options: CreateLoggerOptions): LogFn {
|
||||
if (options.sink.kind === "stderr") {
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
export { createProcessLogger } from "./process-logger.js";
|
||||
export type {
|
||||
CreateProcessLoggerOptions,
|
||||
ProcessLogFn,
|
||||
ProcessLogger,
|
||||
ProcessLoggerContext,
|
||||
} from "./types.js";
|
||||
@@ -0,0 +1,21 @@
|
||||
import { CROCKFORD_BASE32_ALPHABET } from "../base32.js";
|
||||
|
||||
const TAG_LENGTH = 8;
|
||||
|
||||
const TAG_CHAR_SET: ReadonlySet<string> = new Set(CROCKFORD_BASE32_ALPHABET.split(""));
|
||||
|
||||
export function assertValidLogTag(tag: string): void {
|
||||
if (tag.length !== TAG_LENGTH) {
|
||||
throw new Error(`log tag must be exactly ${TAG_LENGTH} characters`);
|
||||
}
|
||||
for (let i = 0; i < tag.length; i++) {
|
||||
const ch = tag[i];
|
||||
if (ch === undefined) {
|
||||
throw new Error("log tag validation failed");
|
||||
}
|
||||
const upper = ch.toUpperCase();
|
||||
if (!TAG_CHAR_SET.has(upper)) {
|
||||
throw new Error(`invalid Crockford Base32 character in log tag: ${ch}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
import { appendFileSync, mkdirSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
import { getDefaultWorkflowStorageRoot } from "../storage-root.js";
|
||||
import { assertValidLogTag } from "./log-tag.js";
|
||||
import type { CreateProcessLoggerOptions, ProcessLogger, ProcessLoggerContext } from "./types.js";
|
||||
|
||||
const INIT_TAG = "W9F3RK2M";
|
||||
|
||||
function logDateKey(date: Date): string {
|
||||
return date.toISOString().slice(0, 10);
|
||||
}
|
||||
|
||||
function getProcessLogsDir(storageRoot: string): string {
|
||||
return join(storageRoot, "logs");
|
||||
}
|
||||
|
||||
function getProcessLogFilePath(storageRoot: string, date: Date): string {
|
||||
return join(getProcessLogsDir(storageRoot), `${logDateKey(date)}.jsonl`);
|
||||
}
|
||||
|
||||
function buildEntry(
|
||||
processId: string,
|
||||
tag: string,
|
||||
msg: string,
|
||||
baseContext: ProcessLoggerContext,
|
||||
extra: Record<string, string> | null,
|
||||
): Record<string, string> {
|
||||
const entry: Record<string, string> = {
|
||||
ts: new Date().toISOString(),
|
||||
pid: processId,
|
||||
tag: tag.toUpperCase(),
|
||||
msg,
|
||||
};
|
||||
if (baseContext.thread !== null) {
|
||||
entry.thread = baseContext.thread;
|
||||
}
|
||||
if (baseContext.workflow !== null) {
|
||||
entry.workflow = baseContext.workflow;
|
||||
}
|
||||
if (extra !== null) {
|
||||
for (const [key, value] of Object.entries(extra)) {
|
||||
entry[key] = value;
|
||||
}
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
|
||||
function appendEntry(filePath: string, entry: Record<string, string>): void {
|
||||
appendFileSync(filePath, `${JSON.stringify(entry)}\n`, "utf8");
|
||||
}
|
||||
|
||||
/** Process-scoped debug logger — append-only JSONL under `<storageRoot>/logs/YYYY-MM-DD.jsonl`. */
|
||||
export function createProcessLogger(options: CreateProcessLoggerOptions): ProcessLogger {
|
||||
const storageRoot = options.storageRoot ?? getDefaultWorkflowStorageRoot();
|
||||
const processId = `${Date.now()}-${process.pid}`;
|
||||
const baseContext = options.context;
|
||||
const logFilePath = getProcessLogFilePath(storageRoot, new Date());
|
||||
|
||||
mkdirSync(getProcessLogsDir(storageRoot), { recursive: true });
|
||||
|
||||
const log: ProcessLogger["log"] = (tag, msg, context = null) => {
|
||||
assertValidLogTag(tag);
|
||||
appendEntry(logFilePath, buildEntry(processId, tag, msg, baseContext, context));
|
||||
};
|
||||
|
||||
const argvSummary = JSON.stringify(process.argv);
|
||||
const initParts = [`argv=${argvSummary}`, `node=${process.version}`];
|
||||
if (baseContext.thread !== null) {
|
||||
initParts.push(`thread=${baseContext.thread}`);
|
||||
}
|
||||
if (baseContext.workflow !== null) {
|
||||
initParts.push(`workflow=${baseContext.workflow}`);
|
||||
}
|
||||
log(INIT_TAG, `process start ${initParts.join(" ")}`, null);
|
||||
|
||||
return { pid: processId, log };
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
export type ProcessLoggerContext = {
|
||||
thread: string | null;
|
||||
workflow: string | null;
|
||||
};
|
||||
|
||||
export type CreateProcessLoggerOptions = {
|
||||
storageRoot: string | null;
|
||||
context: ProcessLoggerContext;
|
||||
};
|
||||
|
||||
export type ProcessLogFn = (
|
||||
tag: string,
|
||||
msg: string,
|
||||
context: Record<string, string> | null,
|
||||
) => void;
|
||||
|
||||
export type ProcessLogger = {
|
||||
pid: string;
|
||||
log: ProcessLogFn;
|
||||
};
|
||||
@@ -21,6 +21,7 @@ const publishOrder = [
|
||||
"workflow-moderator",
|
||||
"workflow-agent-kit",
|
||||
"workflow-agent-hermes",
|
||||
"workflow-agent-builtin",
|
||||
"cli-workflow",
|
||||
];
|
||||
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
{ "path": "packages/workflow-moderator" },
|
||||
{ "path": "packages/workflow-agent-kit" },
|
||||
{ "path": "packages/workflow-agent-hermes" },
|
||||
{ "path": "packages/workflow-agent-builtin" },
|
||||
{ "path": "packages/cli-workflow" }
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user