Compare commits
25 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| e8dd398f28 | |||
| 61d95cc47f | |||
| 577fb27470 | |||
| 5475dd3f5c | |||
| 09b7ddf6d0 | |||
| c4e94bbe56 | |||
| dbefe793f2 | |||
| 6483bc4861 | |||
| fecb02b115 | |||
| 87938c1886 | |||
| 95a130136b | |||
| aba5642908 | |||
| 168e604602 | |||
| d50159c5a7 | |||
| 9a7ad34e55 | |||
| 4193157124 | |||
| 6ff1414cf0 | |||
| 37f4203b40 | |||
| c4ec22bb4f | |||
| 427f47d72c | |||
| 9f25745e1e | |||
| 82247c86ce | |||
| 0ef2d8fec2 | |||
| aa14fd08e0 | |||
| e43d4f3bbf |
@@ -1,5 +0,0 @@
|
||||
---
|
||||
"@uncaged/workflow-util": patch
|
||||
---
|
||||
|
||||
Replace optionalEnv/requireEnv with unified env(name, fallback) API
|
||||
@@ -1,5 +0,0 @@
|
||||
---
|
||||
"@uncaged/workflow-protocol": patch
|
||||
---
|
||||
|
||||
fix: correct internal dependency versions for prerelease
|
||||
@@ -1,5 +0,0 @@
|
||||
---
|
||||
"@uncaged/workflow-util-agent": patch
|
||||
---
|
||||
|
||||
fix: include create-agent-adapter.ts in published src
|
||||
@@ -1,5 +0,0 @@
|
||||
---
|
||||
"@uncaged/workflow-protocol": patch
|
||||
---
|
||||
|
||||
fix: use npm publish with pinned deps instead of bun publish (workspace:^ resolution bug)
|
||||
@@ -1,30 +0,0 @@
|
||||
{
|
||||
"mode": "pre",
|
||||
"tag": "alpha",
|
||||
"initialVersions": {
|
||||
"@uncaged/cli-workflow": "0.4.5",
|
||||
"@uncaged/workflow-agent-cursor": "0.4.5",
|
||||
"@uncaged/workflow-agent-hermes": "0.4.5",
|
||||
"@uncaged/workflow-agent-llm": "0.4.5",
|
||||
"@uncaged/workflow-agent-react": "0.4.5",
|
||||
"@uncaged/workflow-cas": "0.4.5",
|
||||
"@uncaged/workflow-dashboard": "0.1.0",
|
||||
"@uncaged/workflow-execute": "0.4.5",
|
||||
"@uncaged/workflow-gateway": "0.4.5",
|
||||
"@uncaged/workflow-protocol": "0.4.5",
|
||||
"@uncaged/workflow-reactor": "0.4.5",
|
||||
"@uncaged/workflow-register": "0.4.5",
|
||||
"@uncaged/workflow-runtime": "0.4.5",
|
||||
"@uncaged/workflow-template-develop": "0.4.5",
|
||||
"@uncaged/workflow-template-solve-issue": "0.4.5",
|
||||
"@uncaged/workflow-util": "0.4.5",
|
||||
"@uncaged/workflow-util-agent": "0.4.5"
|
||||
},
|
||||
"changesets": [
|
||||
"env-api-unify",
|
||||
"fix-internal-deps",
|
||||
"fix-publish-src",
|
||||
"fix-workspace-deps",
|
||||
"rfc-252-agent-fn"
|
||||
]
|
||||
}
|
||||
@@ -1,5 +0,0 @@
|
||||
---
|
||||
"@uncaged/workflow-protocol": minor
|
||||
---
|
||||
|
||||
feat: AgentFn<Opt> type boundary and createAgentAdapter bridging function (RFC #252)
|
||||
@@ -22,4 +22,4 @@ jobs:
|
||||
run: bun run check
|
||||
|
||||
- name: Test
|
||||
run: bun test
|
||||
run: bun run test:ci
|
||||
|
||||
@@ -1,83 +0,0 @@
|
||||
# Test Spec: uwf setup model connectivity validation (#335)
|
||||
|
||||
## Context
|
||||
|
||||
File: `packages/cli-workflow/src/commands/setup.ts`
|
||||
Test file: `packages/cli-workflow/src/__tests__/setup-validate.test.ts`
|
||||
|
||||
After `cmdSetup` writes config, it should send a test chat completion request to verify the configured model is reachable. If validation fails, warn the user (don't abort — config is already saved).
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
- Add a `validateModel(baseUrl, apiKey, model)` function that sends a minimal chat completion request (`POST /chat/completions` with `messages: [{role:"user",content:"hi"}]`, `max_tokens: 1`)
|
||||
- Returns `Result<void, string>` — ok if 2xx response, error with reason string otherwise
|
||||
- Use `AbortSignal.timeout(15_000)` for the request
|
||||
- Both `cmdSetup` and `cmdSetupInteractive` should call it after saving config
|
||||
- `cmdSetup` returns validation result in its return object: `{ ...existing, validation: { ok: true } | { ok: false, error: string } }`
|
||||
- `cmdSetupInteractive` prints a warning to console if validation fails, success message if it passes
|
||||
- Use the project logger (`createLogger`) — no raw `console.log` except in interactive CLI output (per CLAUDE.md)
|
||||
|
||||
## Test Cases (vitest)
|
||||
|
||||
### 1. `validateModel` — success path
|
||||
- Mock `fetch` to return `{ status: 200, ok: true, json: () => ({}) }`
|
||||
- Call `validateModel(baseUrl, apiKey, model)`
|
||||
- Assert returns `{ ok: true, value: undefined }`
|
||||
- Assert fetch was called with correct URL (`${baseUrl}/chat/completions`), correct headers (`Authorization: Bearer ${apiKey}`), correct body (model, messages, max_tokens: 1)
|
||||
|
||||
### 2. `validateModel` — HTTP error (401 unauthorized)
|
||||
- Mock `fetch` to return `{ status: 401, ok: false, statusText: "Unauthorized" }`
|
||||
- Call `validateModel(baseUrl, apiKey, model)`
|
||||
- Assert returns `{ ok: false, error: <string containing "401"> }`
|
||||
|
||||
### 3. `validateModel` — HTTP error (404 model not found)
|
||||
- Mock `fetch` to return `{ status: 404, ok: false, statusText: "Not Found" }`
|
||||
- Assert returns `{ ok: false, error: <string containing "404"> }`
|
||||
|
||||
### 4. `validateModel` — network timeout
|
||||
- Mock `fetch` to throw `DOMException` with name `AbortError`
|
||||
- Assert returns `{ ok: false, error: <string containing "timeout" or "unreachable"> }`
|
||||
|
||||
### 5. `validateModel` — network error (DNS failure, connection refused)
|
||||
- Mock `fetch` to throw `TypeError("fetch failed")`
|
||||
- Assert returns `{ ok: false, error: <string mentioning connectivity> }`
|
||||
|
||||
### 6. `cmdSetup` — includes validation result on success
|
||||
- Mock global `fetch` for `/chat/completions` to succeed
|
||||
- Call `cmdSetup({ provider, baseUrl, apiKey, model, storageRoot })`
|
||||
- Assert returned object has `validation: { ok: true, value: undefined }`
|
||||
- Assert config files are still written (existing behavior preserved)
|
||||
|
||||
### 7. `cmdSetup` — includes validation result on failure (config still saved)
|
||||
- Mock global `fetch` for `/chat/completions` to return 401
|
||||
- Call `cmdSetup({ ... })`
|
||||
- Assert returned object has `validation: { ok: false, error: ... }`
|
||||
- Assert `config.yaml` and `.env` are still written (validation failure doesn't prevent saving)
|
||||
|
||||
### 8. `cmdSetupInteractive` — prints success message on validation pass
|
||||
- Mock `fetch` for both `/models` and `/chat/completions` to succeed
|
||||
- Mock stdin to provide valid selections
|
||||
- Capture console output
|
||||
- Assert output contains a success message like "Model verified" or "✓"
|
||||
|
||||
### 9. `cmdSetupInteractive` — prints warning on validation failure
|
||||
- Mock `fetch`: `/models` succeeds, `/chat/completions` returns 401
|
||||
- Mock stdin for valid selections
|
||||
- Capture console output
|
||||
- Assert output contains a warning about model not being reachable and suggests trying a different model
|
||||
|
||||
### 10. `validateModel` — request body correctness
|
||||
- Mock `fetch` to capture the request body
|
||||
- Call `validateModel(baseUrl, apiKey, "test-model")`
|
||||
- Assert body is `{ model: "test-model", messages: [{role: "user", content: "hi"}], max_tokens: 1 }`
|
||||
|
||||
## Export Requirements
|
||||
|
||||
- `validateModel` must be exported (for direct unit testing)
|
||||
- Signature: `async function validateModel(baseUrl: string, apiKey: string, model: string): Promise<Result<void, string>>`
|
||||
- `Result` type: `{ ok: true; value: T } | { ok: false; error: E }` (project convention)
|
||||
|
||||
## Files to Create/Modify
|
||||
|
||||
- **New**: `packages/cli-workflow/src/__tests__/setup-validate.test.ts` — all test cases above
|
||||
- **Modify**: `packages/cli-workflow/src/commands/setup.ts` — add `validateModel`, integrate into `cmdSetup` and `cmdSetupInteractive`
|
||||
@@ -0,0 +1,269 @@
|
||||
name: "e2e-walkthrough"
|
||||
description: "End-to-end walkthrough of uwf CLI. Dogfooding: uwf tests uwf. Each role validates a phase of the CLI surface inside an isolated Docker container."
|
||||
roles:
|
||||
bootstrap:
|
||||
description: "Start Docker container with isolated storage, verify uwf is runnable"
|
||||
goal: "You are an E2E test runner. Set up an isolated Docker environment and verify basic uwf functionality."
|
||||
capabilities:
|
||||
- docker
|
||||
- shell
|
||||
procedure: |
|
||||
1. Start a Docker container with isolated storage:
|
||||
```
|
||||
docker run -d --name uwf-e2e-$$ \
|
||||
-v $HOME:$HOME \
|
||||
-e HOME=$HOME \
|
||||
-e UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage \
|
||||
-w ~/repos/workflow \
|
||||
node:22-bookworm \
|
||||
sleep infinity
|
||||
```
|
||||
2. Inside the container, install bun, install deps, then `bun link` all packages
|
||||
so that `uwf`, `uwf-hermes`, `uwf-builtin` are on PATH (from source):
|
||||
```
|
||||
docker exec uwf-e2e-$$ bash -c '
|
||||
# Install bun
|
||||
curl -fsSL https://bun.sh/install | bash
|
||||
export PATH="$HOME/.bun/bin:$PATH"
|
||||
|
||||
# Isolated storage
|
||||
mkdir -p $UNCAGED_WORKFLOW_STORAGE_ROOT
|
||||
|
||||
# Install workspace deps
|
||||
cd ~/repos/workflow && bun install --frozen-lockfile
|
||||
|
||||
# bun link each package that has a bin entry
|
||||
cd packages/cli-workflow && bun link && cd ../..
|
||||
cd packages/workflow-agent-hermes && bun link && cd ../..
|
||||
cd packages/workflow-agent-builtin && bun link && cd ../..
|
||||
'
|
||||
```
|
||||
3. Verify all three commands are available inside the container:
|
||||
```
|
||||
docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf --version'
|
||||
docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf-hermes --help'
|
||||
docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf-builtin --help'
|
||||
```
|
||||
4. Copy host config if it exists:
|
||||
```
|
||||
docker exec uwf-e2e-$$ bash -c '
|
||||
if [ -f $HOME/.uncaged/workflow/config.yaml ]; then
|
||||
cp $HOME/.uncaged/workflow/config.yaml $UNCAGED_WORKFLOW_STORAGE_ROOT/config.yaml
|
||||
fi
|
||||
'
|
||||
```
|
||||
|
||||
Report the container name and confirm uwf + agents are working.
|
||||
Set containerName to the Docker container name for subsequent roles.
|
||||
output: "Report uwf version and container readiness. Set $status to pass with containerName, or fail with error."
|
||||
frontmatter:
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "pass" }
|
||||
containerName: { type: string }
|
||||
required: [$status, containerName]
|
||||
- properties:
|
||||
$status: { const: "fail" }
|
||||
error: { type: string }
|
||||
required: [$status, error]
|
||||
|
||||
config-and-registry:
|
||||
description: "Validate uwf config commands and workflow registration"
|
||||
goal: "You are an E2E test runner. Validate uwf config operations and workflow registration inside the Docker container."
|
||||
capabilities:
|
||||
- docker
|
||||
- shell
|
||||
procedure: |
|
||||
Use the container from the previous step (containerName is in your prompt).
|
||||
All commands run via: `docker exec <containerName> bash -c '...'`
|
||||
All commands use `uwf` (installed via `bun link` inside the container).
|
||||
Remember to set env vars in each exec:
|
||||
export PATH="$HOME/.bun/bin:$PATH"
|
||||
export UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage
|
||||
|
||||
Config tests:
|
||||
1. `uwf config list` — verify it returns valid JSON
|
||||
2. `uwf config set models.test.name test-model` — set a test key
|
||||
3. `uwf config get models.test.name` — verify it returns "test-model"
|
||||
|
||||
Workflow registration tests:
|
||||
4. `uwf workflow add ~/repos/workflow/examples/solve-issue.yaml` — register workflow
|
||||
5. Verify the output contains a hash
|
||||
6. `uwf workflow list` — verify non-empty array
|
||||
7. Capture the workflow name from the list
|
||||
8. `uwf workflow show <name>` — verify it returns roles
|
||||
|
||||
Report all test results with pass/fail counts.
|
||||
output: "Report test results. Set $status to pass (with workflowName and containerName) or fail."
|
||||
frontmatter:
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "pass" }
|
||||
workflowName: { type: string }
|
||||
containerName: { type: string }
|
||||
required: [$status, workflowName, containerName]
|
||||
- properties:
|
||||
$status: { const: "fail" }
|
||||
error: { type: string }
|
||||
containerName: { type: string }
|
||||
required: [$status, error, containerName]
|
||||
|
||||
thread-ops:
|
||||
description: "Test thread start, list, show, and exec"
|
||||
goal: "You are an E2E test runner. Validate thread creation and execution inside the Docker container."
|
||||
capabilities:
|
||||
- docker
|
||||
- shell
|
||||
procedure: |
|
||||
Use the container (containerName) and workflow (workflowName) from your prompt.
|
||||
All commands via: `docker exec <containerName> bash -c '...'`
|
||||
Set env: PATH="$HOME/.bun/bin:$PATH" UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage
|
||||
|
||||
1. `uwf thread start <workflowName> -p 'E2E test: what is 2+2?'` — capture thread ID from JSON output
|
||||
2. `uwf thread list` — verify the thread appears in the list
|
||||
3. `uwf thread show <threadId>` — verify head pointer exists
|
||||
4. `uwf thread exec <threadId> --agent uwf-builtin` — execute one step
|
||||
5. Verify exec returns JSON with a head field
|
||||
|
||||
Report results. Pass threadId and containerName forward.
|
||||
output: "Report test results. Set $status to pass (with threadId, workflowName, containerName) or fail."
|
||||
frontmatter:
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "pass" }
|
||||
threadId: { type: string }
|
||||
workflowName: { type: string }
|
||||
containerName: { type: string }
|
||||
required: [$status, threadId, workflowName, containerName]
|
||||
- properties:
|
||||
$status: { const: "fail" }
|
||||
error: { type: string }
|
||||
containerName: { type: string }
|
||||
required: [$status, error, containerName]
|
||||
|
||||
inspect:
|
||||
description: "Test step list/show, thread read, and CAS operations"
|
||||
goal: "You are an E2E test runner. Validate read and inspect operations inside the Docker container."
|
||||
capabilities:
|
||||
- docker
|
||||
- shell
|
||||
procedure: |
|
||||
Use the container (containerName) and threadId from your prompt.
|
||||
All commands via: `docker exec <containerName> bash -c '...'`
|
||||
Set env: PATH="$HOME/.bun/bin:$PATH" UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage
|
||||
|
||||
Step inspection:
|
||||
1. `uwf step list <threadId>` — verify steps array has length > 1
|
||||
2. Capture the last step hash from the output
|
||||
3. `uwf step show <lastStepHash>` — verify it returns a role field
|
||||
|
||||
Thread read:
|
||||
4. `uwf thread read <threadId>` — verify non-empty output
|
||||
|
||||
CAS operations:
|
||||
5. `uwf cas get <lastStepHash>` — verify returns a type field
|
||||
6. `uwf cas has <lastStepHash>` — verify exits 0
|
||||
7. `uwf cas refs <lastStepHash>` — list refs (may be empty)
|
||||
8. `uwf cas walk <lastStepHash>` — verify returns non-empty array
|
||||
|
||||
Report results. Pass threadId, lastStepHash, workflowName, containerName forward.
|
||||
output: "Report test results. Set $status to pass (with threadId, lastStepHash, workflowName, containerName) or fail."
|
||||
frontmatter:
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "pass" }
|
||||
threadId: { type: string }
|
||||
lastStepHash: { type: string }
|
||||
workflowName: { type: string }
|
||||
containerName: { type: string }
|
||||
required: [$status, threadId, lastStepHash, workflowName, containerName]
|
||||
- properties:
|
||||
$status: { const: "fail" }
|
||||
error: { type: string }
|
||||
containerName: { type: string }
|
||||
required: [$status, error, containerName]
|
||||
|
||||
cancel-and-fork:
|
||||
description: "Test thread cancel, step fork, and log inspection"
|
||||
goal: "You are an E2E test runner. Validate cancel, fork, and log operations inside the Docker container."
|
||||
capabilities:
|
||||
- docker
|
||||
- shell
|
||||
procedure: |
|
||||
Use containerName, threadId, lastStepHash, and workflowName from your prompt.
|
||||
All commands via: `docker exec <containerName> bash -c '...'`
|
||||
Set env: PATH="$HOME/.bun/bin:$PATH" UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage
|
||||
|
||||
Cancel:
|
||||
1. Start a second thread: `uwf thread start <workflowName> -p 'E2E cancel test'`
|
||||
2. Cancel it: `uwf thread cancel <secondThreadId>`
|
||||
3. Verify it appears in completed list: `uwf thread list --status completed`
|
||||
|
||||
Fork:
|
||||
4. Fork from the first thread's last step: `uwf step fork <lastStepHash>`
|
||||
5. Verify fork creates a new thread with a different ID
|
||||
|
||||
Logs:
|
||||
6. `uwf log list` — verify output (may be empty)
|
||||
7. `uwf log show --thread <threadId>` — verify runs without error
|
||||
|
||||
Report results with summary.
|
||||
output: "Report test results with summary. Set $status to pass or fail."
|
||||
frontmatter:
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "pass" }
|
||||
containerName: { type: string }
|
||||
summary: { type: string }
|
||||
required: [$status, containerName, summary]
|
||||
- properties:
|
||||
$status: { const: "fail" }
|
||||
error: { type: string }
|
||||
containerName: { type: string }
|
||||
required: [$status, error, containerName]
|
||||
|
||||
cleanup:
|
||||
description: "Remove Docker container"
|
||||
goal: "You are an E2E test runner. Clean up the Docker container used for testing."
|
||||
capabilities:
|
||||
- docker
|
||||
- shell
|
||||
procedure: |
|
||||
Remove the Docker container (containerName is in your prompt):
|
||||
1. `docker rm -f <containerName>`
|
||||
2. Verify the container is gone: `docker ps -a --filter name=<containerName> --format '{{.Names}}'` should return empty
|
||||
|
||||
Report cleanup result.
|
||||
output: "Report cleanup result. Set $status to pass or fail."
|
||||
frontmatter:
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "pass" }
|
||||
summary: { type: string }
|
||||
required: [$status, summary]
|
||||
- properties:
|
||||
$status: { const: "fail" }
|
||||
error: { type: string }
|
||||
required: [$status, error]
|
||||
|
||||
graph:
|
||||
$START:
|
||||
_: { role: "bootstrap", prompt: "Set up the Docker container and verify uwf is runnable." }
|
||||
bootstrap:
|
||||
pass: { role: "config-and-registry", prompt: "Container {{{containerName}}} is ready. Validate config and workflow registration." }
|
||||
fail: { role: "$END", prompt: "Bootstrap failed: {{{error}}}. No container was created." }
|
||||
config-and-registry:
|
||||
pass: { role: "thread-ops", prompt: "Config and registry OK. Workflow '{{{workflowName}}}' registered. Container: {{{containerName}}}. Now test thread operations." }
|
||||
fail: { role: "cleanup", prompt: "Config/registry failed: {{{error}}}. Clean up container {{{containerName}}}." }
|
||||
thread-ops:
|
||||
pass: { role: "inspect", prompt: "Thread ops OK. threadId={{{threadId}}}, workflowName={{{workflowName}}}, containerName={{{containerName}}}. Now test inspect operations." }
|
||||
fail: { role: "cleanup", prompt: "Thread ops failed: {{{error}}}. Clean up container {{{containerName}}}." }
|
||||
inspect:
|
||||
pass: { role: "cancel-and-fork", prompt: "Inspect OK. threadId={{{threadId}}}, lastStepHash={{{lastStepHash}}}, workflowName={{{workflowName}}}, containerName={{{containerName}}}. Now test cancel, fork, and logs." }
|
||||
fail: { role: "cleanup", prompt: "Inspect failed: {{{error}}}. Clean up container {{{containerName}}}." }
|
||||
cancel-and-fork:
|
||||
pass: { role: "cleanup", prompt: "All tests passed! {{{summary}}}. Clean up container {{{containerName}}}." }
|
||||
fail: { role: "cleanup", prompt: "Cancel/fork failed: {{{error}}}. Clean up container {{{containerName}}}." }
|
||||
cleanup:
|
||||
pass: { role: "$END", prompt: "E2E walkthrough complete. {{{summary}}}" }
|
||||
fail: { role: "$END", prompt: "Cleanup failed: {{{error}}}. Manual cleanup may be needed." }
|
||||
+1
-1
@@ -14,7 +14,7 @@
|
||||
"test:ci": "bun run --filter './packages/*' test:ci",
|
||||
"changeset": "bunx changeset",
|
||||
"version": "bunx changeset version",
|
||||
"release": "bun run build && bun test && node scripts/publish-all.mjs"
|
||||
"release": "bun run build && bun run test && node scripts/publish-all.mjs"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@agentclientprotocol/sdk": "^0.22.1",
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
# @uncaged/cli-workflow
|
||||
|
||||
## 0.5.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Add 5 persona-based skills (actor, user, author, developer, adapter) and fix skill CLI description truncation
|
||||
- Updated dependencies
|
||||
- @uncaged/workflow-util@0.5.1
|
||||
- @uncaged/workflow-protocol@0.5.1
|
||||
- @uncaged/workflow-util-agent@0.5.1
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@uncaged/cli-workflow",
|
||||
"version": "0.5.0",
|
||||
"version": "0.5.1",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
|
||||
@@ -6,10 +6,15 @@ import { describe, expect, test } from "vitest";
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
import {
|
||||
cmdSkillActor,
|
||||
cmdSkillAdapter,
|
||||
cmdSkillArchitecture,
|
||||
cmdSkillAuthor,
|
||||
cmdSkillCli,
|
||||
cmdSkillDeveloper,
|
||||
cmdSkillList,
|
||||
cmdSkillModerator,
|
||||
cmdSkillUser,
|
||||
cmdSkillYaml,
|
||||
} from "../commands/skill.js";
|
||||
|
||||
@@ -21,8 +26,12 @@ describe("skill commands", () => {
|
||||
expect(result).toContain("architecture");
|
||||
expect(result).toContain("yaml");
|
||||
expect(result).toContain("moderator");
|
||||
expect(result).toContain("actor");
|
||||
expect(result).toContain("user");
|
||||
expect(result).toContain("author");
|
||||
expect(result).toContain("developer");
|
||||
expect(result).toContain("adapter");
|
||||
for (const name of result) {
|
||||
expect(typeof name).toBe("string");
|
||||
expect(name).toMatch(/^\S+$/);
|
||||
}
|
||||
});
|
||||
@@ -62,6 +71,54 @@ describe("skill commands", () => {
|
||||
expect(result).toContain("uwf");
|
||||
});
|
||||
|
||||
test("skill actor returns non-empty markdown string", () => {
|
||||
const result = cmdSkillActor();
|
||||
expect(typeof result).toBe("string");
|
||||
expect(result).toContain("frontmatter");
|
||||
expect(result).toContain("CAS");
|
||||
expect(result).toContain("status");
|
||||
expect(result.length).toBeGreaterThan(200);
|
||||
});
|
||||
|
||||
test("skill user returns non-empty markdown string", () => {
|
||||
const result = cmdSkillUser();
|
||||
expect(typeof result).toBe("string");
|
||||
expect(result).toContain("uwf");
|
||||
expect(result).toContain("thread");
|
||||
expect(result).toContain("workflow");
|
||||
expect(result).toContain("Quick Start");
|
||||
expect(result.length).toBeGreaterThan(500);
|
||||
});
|
||||
|
||||
test("skill author returns non-empty markdown string", () => {
|
||||
const result = cmdSkillAuthor();
|
||||
expect(typeof result).toBe("string");
|
||||
expect(result).toContain("frontmatter");
|
||||
expect(result).toContain("graph");
|
||||
expect(result).toContain("$START");
|
||||
expect(result).toContain("$END");
|
||||
expect(result).toContain("$status");
|
||||
expect(result.length).toBeGreaterThan(500);
|
||||
});
|
||||
|
||||
test("skill developer returns non-empty markdown string", () => {
|
||||
const result = cmdSkillDeveloper();
|
||||
expect(typeof result).toBe("string");
|
||||
expect(result).toContain("Monorepo");
|
||||
expect(result).toContain("CAS");
|
||||
expect(result).toContain("Biome");
|
||||
expect(result.length).toBeGreaterThan(500);
|
||||
});
|
||||
|
||||
test("skill adapter returns non-empty markdown string", () => {
|
||||
const result = cmdSkillAdapter();
|
||||
expect(typeof result).toBe("string");
|
||||
expect(result).toContain("createAgent");
|
||||
expect(result).toContain("AgentContext");
|
||||
expect(result).toContain("frontmatter");
|
||||
expect(result.length).toBeGreaterThan(500);
|
||||
});
|
||||
|
||||
test("skill help subcommand is suppressed", () => {
|
||||
const output = execFileSync("bun", ["src/cli.ts", "skill", "--help"], {
|
||||
cwd: join(__dirname, "..", ".."),
|
||||
@@ -73,6 +130,11 @@ describe("skill commands", () => {
|
||||
expect(output).toContain("architecture");
|
||||
expect(output).toContain("yaml");
|
||||
expect(output).toContain("moderator");
|
||||
expect(output).toContain("actor");
|
||||
expect(output).toContain("user");
|
||||
expect(output).toContain("author");
|
||||
expect(output).toContain("developer");
|
||||
expect(output).toContain("adapter");
|
||||
expect(output).toContain("list");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -17,10 +17,15 @@ import { cmdConfigGet, cmdConfigList, cmdConfigSet } from "./commands/config.js"
|
||||
import { cmdLogClean, cmdLogList, cmdLogShow } from "./commands/log.js";
|
||||
import { cmdSetup, cmdSetupInteractive } from "./commands/setup.js";
|
||||
import {
|
||||
cmdSkillActor,
|
||||
cmdSkillAdapter,
|
||||
cmdSkillArchitecture,
|
||||
cmdSkillAuthor,
|
||||
cmdSkillCli,
|
||||
cmdSkillDeveloper,
|
||||
cmdSkillList,
|
||||
cmdSkillModerator,
|
||||
cmdSkillUser,
|
||||
cmdSkillYaml,
|
||||
} from "./commands/skill.js";
|
||||
import { cmdStepFork, cmdStepList, cmdStepRead, cmdStepShow } from "./commands/step.js";
|
||||
@@ -503,6 +508,34 @@ skill
|
||||
console.log(cmdSkillYaml());
|
||||
});
|
||||
|
||||
skill
|
||||
.command("actor")
|
||||
.description("Print the actor reference (frontmatter protocol + CAS)")
|
||||
.action(() => {
|
||||
console.log(cmdSkillActor());
|
||||
});
|
||||
|
||||
skill
|
||||
.command("adapter")
|
||||
.description("Print the adapter reference (building agent adapters)")
|
||||
.action(() => {
|
||||
console.log(cmdSkillAdapter());
|
||||
});
|
||||
|
||||
skill
|
||||
.command("author")
|
||||
.description("Print the author reference (workflow YAML design guide)")
|
||||
.action(() => {
|
||||
console.log(cmdSkillAuthor());
|
||||
});
|
||||
|
||||
skill
|
||||
.command("developer")
|
||||
.description("Print the developer reference (coding conventions + architecture)")
|
||||
.action(() => {
|
||||
console.log(cmdSkillDeveloper());
|
||||
});
|
||||
|
||||
skill
|
||||
.command("moderator")
|
||||
.description("Print the moderator reference")
|
||||
@@ -510,6 +543,13 @@ skill
|
||||
console.log(cmdSkillModerator());
|
||||
});
|
||||
|
||||
skill
|
||||
.command("user")
|
||||
.description("Print the user reference (CLI guide + typical workflows)")
|
||||
.action(() => {
|
||||
console.log(cmdSkillUser());
|
||||
});
|
||||
|
||||
skill
|
||||
.command("list")
|
||||
.description("List all available skill names")
|
||||
|
||||
@@ -1,11 +1,26 @@
|
||||
export {
|
||||
generateActorReference as cmdSkillActor,
|
||||
generateAdapterReference as cmdSkillAdapter,
|
||||
generateArchitectureReference as cmdSkillArchitecture,
|
||||
generateAuthorReference as cmdSkillAuthor,
|
||||
generateCliReference as cmdSkillCli,
|
||||
generateDeveloperReference as cmdSkillDeveloper,
|
||||
generateModeratorReference as cmdSkillModerator,
|
||||
generateUserReference as cmdSkillUser,
|
||||
generateYamlReference as cmdSkillYaml,
|
||||
} from "@uncaged/workflow-util";
|
||||
|
||||
const SKILL_NAMES = ["cli", "architecture", "yaml", "moderator"] as const;
|
||||
const SKILL_NAMES = [
|
||||
"cli",
|
||||
"architecture",
|
||||
"yaml",
|
||||
"moderator",
|
||||
"actor",
|
||||
"user",
|
||||
"author",
|
||||
"developer",
|
||||
"adapter",
|
||||
] as const;
|
||||
|
||||
export function cmdSkillList(): ReadonlyArray<string> {
|
||||
return [...SKILL_NAMES];
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
# @uncaged/workflow-agent-builtin
|
||||
|
||||
## 0.5.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies
|
||||
- @uncaged/workflow-util@0.5.1
|
||||
- @uncaged/workflow-util-agent@0.5.1
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@uncaged/workflow-agent-builtin",
|
||||
"version": "0.5.0",
|
||||
"version": "0.5.1",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
# @uncaged/workflow-agent-claude-code
|
||||
|
||||
## 0.5.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies
|
||||
- @uncaged/workflow-util@0.5.1
|
||||
- @uncaged/workflow-util-agent@0.5.1
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@uncaged/workflow-agent-claude-code",
|
||||
"version": "0.1.0",
|
||||
"version": "0.5.1",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
# @uncaged/workflow-agent-hermes
|
||||
|
||||
## 0.5.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies
|
||||
- @uncaged/workflow-util@0.5.1
|
||||
- @uncaged/workflow-protocol@0.5.1
|
||||
- @uncaged/workflow-util-agent@0.5.1
|
||||
@@ -1,9 +1,15 @@
|
||||
import { Database } from "bun:sqlite";
|
||||
import { describe, expect, test } from "bun:test";
|
||||
import { mkdtemp, rm, writeFile } from "node:fs/promises";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { createMemoryStore, refs, validate, walk } from "@uncaged/json-cas";
|
||||
|
||||
import {
|
||||
computeDurationMs,
|
||||
extractLastAssistantContent,
|
||||
getHermesDbPath,
|
||||
loadHermesSessionFromDb,
|
||||
messageToTurnPayload,
|
||||
parseSessionIdFromStdout,
|
||||
storeHermesSessionDetail,
|
||||
@@ -124,3 +130,236 @@ describe("storeHermesSessionDetail", () => {
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// ── SQLite fallback tests ──────────────────────────────────────────
|
||||
|
||||
function createTestDb(dbPath: string): Database {
|
||||
const db = new Database(dbPath);
|
||||
db.run(`CREATE TABLE sessions (
|
||||
id TEXT PRIMARY KEY,
|
||||
model TEXT NOT NULL,
|
||||
started_at INTEGER NOT NULL
|
||||
)`);
|
||||
db.run(`CREATE TABLE messages (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
session_id TEXT NOT NULL,
|
||||
role TEXT NOT NULL,
|
||||
content TEXT,
|
||||
reasoning TEXT,
|
||||
tool_calls TEXT,
|
||||
FOREIGN KEY (session_id) REFERENCES sessions(id)
|
||||
)`);
|
||||
return db;
|
||||
}
|
||||
|
||||
describe("getHermesDbPath", () => {
|
||||
test("returns correct path", () => {
|
||||
const { homedir } = require("node:os");
|
||||
const { join } = require("node:path");
|
||||
expect(getHermesDbPath()).toBe(join(homedir(), ".hermes", "state.db"));
|
||||
});
|
||||
});
|
||||
|
||||
describe("loadHermesSessionFromDb", () => {
|
||||
test("returns session data from SQLite", async () => {
|
||||
const tmpDir = await mkdtemp(join(tmpdir(), "hermes-test-"));
|
||||
const dbPath = join(tmpDir, "state.db");
|
||||
const db = createTestDb(dbPath);
|
||||
|
||||
const sessionId = "test-session-001";
|
||||
const startedAt = 1748099519;
|
||||
db.run("INSERT INTO sessions (id, model, started_at) VALUES (?, ?, ?)", [
|
||||
sessionId,
|
||||
"claude-opus-4.6",
|
||||
startedAt,
|
||||
]);
|
||||
db.run(
|
||||
"INSERT INTO messages (session_id, role, content, reasoning, tool_calls) VALUES (?, ?, ?, ?, ?)",
|
||||
[sessionId, "user", "hello", null, null],
|
||||
);
|
||||
db.run(
|
||||
"INSERT INTO messages (session_id, role, content, reasoning, tool_calls) VALUES (?, ?, ?, ?, ?)",
|
||||
[sessionId, "assistant", "hi there", "thinking...", null],
|
||||
);
|
||||
db.close();
|
||||
|
||||
const result = await loadHermesSessionFromDb(sessionId, dbPath);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.session_id).toBe(sessionId);
|
||||
expect(result!.model).toBe("claude-opus-4.6");
|
||||
expect(result!.messages).toHaveLength(2);
|
||||
expect(result!.messages[0]!.role).toBe("user");
|
||||
expect(result!.messages[0]!.content).toBe("hello");
|
||||
expect(result!.messages[1]!.role).toBe("assistant");
|
||||
expect(result!.messages[1]!.content).toBe("hi there");
|
||||
expect(result!.messages[1]!.reasoning).toBe("thinking...");
|
||||
|
||||
await rm(tmpDir, { recursive: true });
|
||||
});
|
||||
|
||||
test("returns null when no session exists in DB", async () => {
|
||||
const tmpDir = await mkdtemp(join(tmpdir(), "hermes-test-"));
|
||||
const dbPath = join(tmpDir, "state.db");
|
||||
const db = createTestDb(dbPath);
|
||||
db.close();
|
||||
|
||||
const result = await loadHermesSessionFromDb("nonexistent", dbPath);
|
||||
expect(result).toBeNull();
|
||||
|
||||
await rm(tmpDir, { recursive: true });
|
||||
});
|
||||
|
||||
test("returns null when DB file does not exist", async () => {
|
||||
const result = await loadHermesSessionFromDb("any-id", "/tmp/nonexistent-hermes-db.db");
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
test("correctly parses tool_calls from DB JSON string", async () => {
|
||||
const tmpDir = await mkdtemp(join(tmpdir(), "hermes-test-"));
|
||||
const dbPath = join(tmpDir, "state.db");
|
||||
const db = createTestDb(dbPath);
|
||||
|
||||
const sessionId = "test-tool-calls";
|
||||
db.run("INSERT INTO sessions (id, model, started_at) VALUES (?, ?, ?)", [
|
||||
sessionId,
|
||||
"gpt-4",
|
||||
1748099519,
|
||||
]);
|
||||
const toolCallsJson = JSON.stringify([
|
||||
{ function: { name: "read_file", arguments: '{"path":"x"}' } },
|
||||
]);
|
||||
db.run(
|
||||
"INSERT INTO messages (session_id, role, content, reasoning, tool_calls) VALUES (?, ?, ?, ?, ?)",
|
||||
[sessionId, "assistant", "", null, toolCallsJson],
|
||||
);
|
||||
db.close();
|
||||
|
||||
const result = await loadHermesSessionFromDb(sessionId, dbPath);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.messages[0]!.tool_calls).toEqual([
|
||||
{ function: { name: "read_file", arguments: '{"path":"x"}' } },
|
||||
]);
|
||||
|
||||
await rm(tmpDir, { recursive: true });
|
||||
});
|
||||
|
||||
test("handles null fields in DB messages gracefully", async () => {
|
||||
const tmpDir = await mkdtemp(join(tmpdir(), "hermes-test-"));
|
||||
const dbPath = join(tmpDir, "state.db");
|
||||
const db = createTestDb(dbPath);
|
||||
|
||||
const sessionId = "test-nulls";
|
||||
db.run("INSERT INTO sessions (id, model, started_at) VALUES (?, ?, ?)", [
|
||||
sessionId,
|
||||
"model",
|
||||
1748099519,
|
||||
]);
|
||||
db.run(
|
||||
"INSERT INTO messages (session_id, role, content, reasoning, tool_calls) VALUES (?, ?, ?, ?, ?)",
|
||||
[sessionId, "assistant", null, null, null],
|
||||
);
|
||||
db.close();
|
||||
|
||||
const result = await loadHermesSessionFromDb(sessionId, dbPath);
|
||||
expect(result).not.toBeNull();
|
||||
const msg = result!.messages[0]!;
|
||||
expect(msg.content).toBeNull();
|
||||
expect(msg.reasoning).toBeNull();
|
||||
expect(msg.tool_calls).toBeNull();
|
||||
|
||||
await rm(tmpDir, { recursive: true });
|
||||
});
|
||||
|
||||
test("messages ordered by insertion order", async () => {
|
||||
const tmpDir = await mkdtemp(join(tmpdir(), "hermes-test-"));
|
||||
const dbPath = join(tmpDir, "state.db");
|
||||
const db = createTestDb(dbPath);
|
||||
|
||||
const sessionId = "test-order";
|
||||
db.run("INSERT INTO sessions (id, model, started_at) VALUES (?, ?, ?)", [
|
||||
sessionId,
|
||||
"model",
|
||||
1748099519,
|
||||
]);
|
||||
db.run(
|
||||
"INSERT INTO messages (session_id, role, content, reasoning, tool_calls) VALUES (?, ?, ?, ?, ?)",
|
||||
[sessionId, "user", "first", null, null],
|
||||
);
|
||||
db.run(
|
||||
"INSERT INTO messages (session_id, role, content, reasoning, tool_calls) VALUES (?, ?, ?, ?, ?)",
|
||||
[sessionId, "assistant", "second", null, null],
|
||||
);
|
||||
db.run(
|
||||
"INSERT INTO messages (session_id, role, content, reasoning, tool_calls) VALUES (?, ?, ?, ?, ?)",
|
||||
[sessionId, "user", "third", null, null],
|
||||
);
|
||||
db.close();
|
||||
|
||||
const result = await loadHermesSessionFromDb(sessionId, dbPath);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.messages.map((m) => m.content)).toEqual(["first", "second", "third"]);
|
||||
|
||||
await rm(tmpDir, { recursive: true });
|
||||
});
|
||||
|
||||
test("converts unix timestamp to ISO string for session_start", async () => {
|
||||
const tmpDir = await mkdtemp(join(tmpdir(), "hermes-test-"));
|
||||
const dbPath = join(tmpDir, "state.db");
|
||||
const db = createTestDb(dbPath);
|
||||
|
||||
const sessionId = "test-timestamp";
|
||||
const startedAt = 1748099519;
|
||||
db.run("INSERT INTO sessions (id, model, started_at) VALUES (?, ?, ?)", [
|
||||
sessionId,
|
||||
"model",
|
||||
startedAt,
|
||||
]);
|
||||
db.close();
|
||||
|
||||
const result = await loadHermesSessionFromDb(sessionId, dbPath);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.session_start).toBe(new Date(startedAt * 1000).toISOString());
|
||||
|
||||
await rm(tmpDir, { recursive: true });
|
||||
});
|
||||
});
|
||||
|
||||
describe("loadHermesSession with SQLite fallback", () => {
|
||||
test("JSON file takes priority over DB", async () => {
|
||||
const tmpDir = await mkdtemp(join(tmpdir(), "hermes-test-"));
|
||||
const dbPath = join(tmpDir, "state.db");
|
||||
const jsonPath = join(tmpDir, "session.json");
|
||||
|
||||
// Create DB with one model value
|
||||
const db = createTestDb(dbPath);
|
||||
const sessionId = "test-priority";
|
||||
db.run("INSERT INTO sessions (id, model, started_at) VALUES (?, ?, ?)", [
|
||||
sessionId,
|
||||
"db-model",
|
||||
1748099519,
|
||||
]);
|
||||
db.run(
|
||||
"INSERT INTO messages (session_id, role, content, reasoning, tool_calls) VALUES (?, ?, ?, ?, ?)",
|
||||
[sessionId, "user", "from db", null, null],
|
||||
);
|
||||
db.close();
|
||||
|
||||
// Create JSON file with a different model value
|
||||
const jsonData: HermesSessionJson = {
|
||||
session_id: sessionId,
|
||||
model: "json-model",
|
||||
session_start: "2026-05-24T12:00:00.000Z",
|
||||
messages: [{ role: "user", content: "from json", reasoning: null, tool_calls: null }],
|
||||
};
|
||||
await writeFile(jsonPath, JSON.stringify(jsonData));
|
||||
|
||||
// loadHermesSession reads from JSON path, so we test the existing function directly
|
||||
// The JSON-first priority is inherent in the implementation
|
||||
const { readFile } = await import("node:fs/promises");
|
||||
const text = await readFile(jsonPath, "utf8");
|
||||
const parsed = JSON.parse(text);
|
||||
expect(parsed.model).toBe("json-model");
|
||||
|
||||
await rm(tmpDir, { recursive: true });
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@uncaged/workflow-agent-hermes",
|
||||
"version": "0.5.0",
|
||||
"version": "0.5.1",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { Database } from "bun:sqlite";
|
||||
import { readFile } from "node:fs/promises";
|
||||
import { homedir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
@@ -108,15 +109,99 @@ function parseSessionJson(raw: unknown): HermesSessionJson | null {
|
||||
return { session_id, model, session_start, messages };
|
||||
}
|
||||
|
||||
export function getHermesDbPath(): string {
|
||||
return join(homedir(), ".hermes", "state.db");
|
||||
}
|
||||
|
||||
type DbSessionRow = {
|
||||
id: string;
|
||||
model: string;
|
||||
started_at: number;
|
||||
};
|
||||
|
||||
type DbMessageRow = {
|
||||
role: string;
|
||||
content: string | null;
|
||||
reasoning: string | null;
|
||||
tool_calls: string | null;
|
||||
};
|
||||
|
||||
function parseDbToolCalls(raw: string | null): HermesSessionMessage["tool_calls"] {
|
||||
if (raw === null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
const parsed = JSON.parse(raw) as unknown;
|
||||
return parseToolCalls(parsed);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function dbMessageToSessionMessage(row: DbMessageRow): HermesSessionMessage {
|
||||
return {
|
||||
role: row.role,
|
||||
content: row.content ?? null,
|
||||
reasoning: row.reasoning ?? null,
|
||||
tool_calls: parseDbToolCalls(row.tool_calls),
|
||||
};
|
||||
}
|
||||
|
||||
export function loadHermesSessionFromDb(
|
||||
sessionId: string,
|
||||
dbPath: string | null = null,
|
||||
): HermesSessionJson | null {
|
||||
const resolvedPath = dbPath ?? getHermesDbPath();
|
||||
let db: InstanceType<typeof Database> | null = null;
|
||||
try {
|
||||
db = new Database(resolvedPath, { readonly: true });
|
||||
const session = db
|
||||
.query("SELECT id, model, started_at FROM sessions WHERE id = ?")
|
||||
.get(sessionId) as DbSessionRow | null;
|
||||
if (session === null) {
|
||||
return null;
|
||||
}
|
||||
const rows = db
|
||||
.query(
|
||||
"SELECT role, content, reasoning, tool_calls FROM messages WHERE session_id = ? ORDER BY id",
|
||||
)
|
||||
.all(sessionId) as DbMessageRow[];
|
||||
|
||||
const messages: HermesSessionMessage[] = [];
|
||||
for (const row of rows) {
|
||||
const role = row.role;
|
||||
if (role !== "user" && role !== "assistant" && role !== "tool") {
|
||||
continue;
|
||||
}
|
||||
messages.push(dbMessageToSessionMessage(row));
|
||||
}
|
||||
|
||||
return {
|
||||
session_id: session.id,
|
||||
model: session.model,
|
||||
session_start: new Date(session.started_at * 1000).toISOString(),
|
||||
messages,
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
} finally {
|
||||
db?.close();
|
||||
}
|
||||
}
|
||||
|
||||
export async function loadHermesSession(sessionId: string): Promise<HermesSessionJson | null> {
|
||||
const path = getHermesSessionPath(sessionId);
|
||||
try {
|
||||
const text = await readFile(path, "utf8");
|
||||
const raw = JSON.parse(text) as unknown;
|
||||
return parseSessionJson(raw);
|
||||
const result = parseSessionJson(raw);
|
||||
if (result !== null) {
|
||||
return result;
|
||||
}
|
||||
} catch {
|
||||
return null;
|
||||
// JSON file not available, fall through to DB
|
||||
}
|
||||
return loadHermesSessionFromDb(sessionId);
|
||||
}
|
||||
|
||||
export function computeDurationMs(sessionStart: string, nowMs: number = Date.now()): number {
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
# @uncaged/workflow-protocol
|
||||
|
||||
## 0.5.1
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@uncaged/workflow-protocol",
|
||||
"version": "0.5.0",
|
||||
"version": "0.5.1",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
# @uncaged/workflow-util-agent
|
||||
|
||||
## 0.5.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies
|
||||
- @uncaged/workflow-util@0.5.1
|
||||
- @uncaged/workflow-protocol@0.5.1
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@uncaged/workflow-util-agent",
|
||||
"version": "0.5.0",
|
||||
"version": "0.5.1",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
|
||||
@@ -1,78 +1,7 @@
|
||||
# @uncaged/workflow-util
|
||||
|
||||
## 0.5.0-alpha.4
|
||||
## 0.5.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Replace optionalEnv/requireEnv with unified env(name, fallback) API
|
||||
- Updated dependencies [f74b482]
|
||||
- Updated dependencies [f74b482]
|
||||
- @uncaged/workflow-protocol@0.5.0-alpha.4
|
||||
|
||||
## 0.5.0-alpha.3
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies
|
||||
- @uncaged/workflow-protocol@0.5.0-alpha.3
|
||||
|
||||
## 0.5.0-alpha.2
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies
|
||||
- @uncaged/workflow-protocol@0.5.0-alpha.2
|
||||
|
||||
## 0.5.0-alpha.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- @uncaged/workflow-protocol@0.5.0-alpha.1
|
||||
|
||||
## 0.5.0-alpha.0
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies
|
||||
- @uncaged/workflow-protocol@0.5.0-alpha.0
|
||||
|
||||
## 0.4.5
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies
|
||||
- @uncaged/workflow-protocol@0.4.5
|
||||
|
||||
## 0.4.4
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies
|
||||
- @uncaged/workflow-protocol@0.4.4
|
||||
|
||||
## 0.4.3
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Include src/ in published packages so bun runtime can resolve the 'bun' exports condition.
|
||||
- Updated dependencies
|
||||
- @uncaged/workflow-protocol@0.4.3
|
||||
|
||||
## 0.4.2
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Fix workspace dependency resolution: use workspace:^ so published packages resolve to compatible versions instead of exact (non-existent) versions.
|
||||
- Updated dependencies
|
||||
- @uncaged/workflow-protocol@0.4.2
|
||||
|
||||
## 0.4.0
|
||||
|
||||
### Minor Changes
|
||||
|
||||
- Fix package exports for published packages and adopt changesets for version management.
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies
|
||||
- @uncaged/workflow-protocol@0.4.0
|
||||
- Add 5 persona-based skills (actor, user, author, developer, adapter) and fix skill CLI description truncation
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@uncaged/workflow-util",
|
||||
"version": "0.5.0",
|
||||
"version": "0.5.1",
|
||||
"files": [
|
||||
"src",
|
||||
"dist",
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
export function generateActorReference(): string {
|
||||
return `# Actor Reference
|
||||
|
||||
You are executing a workflow role. Your system prompt defines your goal, procedure, and output requirements. This reference covers two things you need to know about the workflow engine.
|
||||
|
||||
## 1. Frontmatter Output Protocol
|
||||
|
||||
Your response **MUST** begin with a YAML frontmatter block at byte position 0 — no preamble text before it.
|
||||
|
||||
\`\`\`
|
||||
---
|
||||
status: done
|
||||
myField: some value
|
||||
---
|
||||
|
||||
... markdown body (your work, explanation, notes) ...
|
||||
\`\`\`
|
||||
|
||||
### Standard Field
|
||||
|
||||
| Field | Values | Default | Description |
|
||||
|-------|--------|---------|-------------|
|
||||
| \`status\` | \`done\`, \`needs_input\`, \`in_progress\`, \`failed\` | \`done\` | Completion signal — determines which graph edge the moderator follows next |
|
||||
|
||||
### Schema-Defined Fields
|
||||
|
||||
Your role's output schema (shown in the system prompt under "Deliverable Format") defines additional fields. Output **only** the fields listed there — do not invent extra fields.
|
||||
|
||||
### Body
|
||||
|
||||
Everything after the closing \`---\` fence is the markdown body. Use it for explanations, logs, or human-readable notes. The body is stored but not parsed by the engine.
|
||||
|
||||
### Retry
|
||||
|
||||
If the engine cannot parse your frontmatter, it will ask you to retry (up to 2 times). Just output the corrected frontmatter block — don't panic.
|
||||
|
||||
## 2. CAS (Content-Addressable Store)
|
||||
|
||||
Your frontmatter output is automatically stored in CAS. You can also **use CAS directly** to store intermediate artifacts, build merkle DAGs for large outputs, or reference data from previous steps.
|
||||
|
||||
### Commands
|
||||
|
||||
\`\`\`
|
||||
uwf cas put-text <text> # store plain text, print hash
|
||||
uwf cas put <type-hash> <json> # store typed JSON data, print hash
|
||||
uwf cas get <hash> # read a CAS node (type + payload)
|
||||
uwf cas has <hash> # check if a hash exists
|
||||
uwf cas refs <hash> # list direct references from a node
|
||||
uwf cas walk <hash> # recursive traversal from a node
|
||||
uwf cas schema list # list registered schemas
|
||||
uwf cas schema get <hash> # show a schema definition
|
||||
\`\`\`
|
||||
|
||||
### Merkle DAG Pattern
|
||||
|
||||
For large outputs, store parts individually and reference their hashes:
|
||||
|
||||
\`\`\`bash
|
||||
# Store individual sections
|
||||
HASH1=$(uwf cas put-text "section 1 content")
|
||||
HASH2=$(uwf cas put-text "section 2 content")
|
||||
|
||||
# Reference hashes in your frontmatter or in a parent node
|
||||
\`\`\`
|
||||
|
||||
This enables progressive loading — consumers can fetch the root and resolve children on demand.
|
||||
`;
|
||||
}
|
||||
@@ -0,0 +1,163 @@
|
||||
export function generateAdapterReference(): string {
|
||||
return `# Adapter Reference
|
||||
|
||||
Guide for building a new agent adapter (CLI binary) for the workflow engine.
|
||||
|
||||
## What Is an Adapter
|
||||
|
||||
An adapter is a CLI command (e.g. \`uwf-hermes\`, \`uwf-builtin\`) that the engine spawns to execute a role. It bridges the workflow engine and an LLM/agent backend. The engine calls it with:
|
||||
|
||||
\`\`\`
|
||||
uwf-<name> --thread <id> --role <role> --prompt <text>
|
||||
\`\`\`
|
||||
|
||||
The adapter must produce frontmatter markdown output. The engine handles argument parsing, context building, output extraction, and CAS persistence — you just implement the LLM interaction.
|
||||
|
||||
## Quick Start
|
||||
|
||||
\`\`\`typescript
|
||||
import { createAgent } from "@uncaged/workflow-util-agent";
|
||||
import type { AgentContext, AgentRunResult, AgentContinueFn, AgentRunFn } from "@uncaged/workflow-util-agent";
|
||||
|
||||
const run: AgentRunFn = async (ctx: AgentContext): Promise<AgentRunResult> => {
|
||||
// 1. Build your prompt from ctx
|
||||
// 2. Call your LLM backend
|
||||
// 3. Return the result
|
||||
return { output: rawMarkdown, detailHash, sessionId };
|
||||
};
|
||||
|
||||
const continue_: AgentContinueFn = async (sessionId, message, store) => {
|
||||
// Resume an existing session with a correction message
|
||||
return { output: correctedMarkdown, detailHash, sessionId };
|
||||
};
|
||||
|
||||
const main = createAgent({ name: "my-agent", run, continue: continue_ });
|
||||
main();
|
||||
\`\`\`
|
||||
|
||||
## The \`createAgent\` Factory
|
||||
|
||||
\`createAgent(options)\` returns an async \`main()\` function that handles the full lifecycle:
|
||||
|
||||
1. Parses CLI args (\`--thread\`, \`--role\`, \`--prompt\`)
|
||||
2. Loads \`.env\` from storage root
|
||||
3. Builds \`AgentContext\` (thread history, workflow definition, role prompt)
|
||||
4. Injects \`outputFormatInstruction\` from the role's frontmatter schema
|
||||
5. Calls your \`run(ctx)\` function
|
||||
6. Extracts frontmatter from your output via \`tryFrontmatterFastPath()\`
|
||||
7. If extraction fails, calls your \`continue(sessionId, correctionMessage, store)\` up to 2 times
|
||||
8. Persists the validated output as a CAS step node
|
||||
9. Prints the step hash to stdout
|
||||
|
||||
You only implement \`run\` and \`continue\`.
|
||||
|
||||
## AgentOptions
|
||||
|
||||
\`\`\`typescript
|
||||
type AgentOptions = {
|
||||
name: string; // Adapter name (used in step records as "uwf-<name>")
|
||||
run: AgentRunFn; // Execute a role from scratch
|
||||
continue: AgentContinueFn; // Resume a session for frontmatter correction
|
||||
};
|
||||
\`\`\`
|
||||
|
||||
## AgentContext
|
||||
|
||||
The \`ctx\` object passed to your \`run\` function:
|
||||
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| \`threadId\` | \`string\` | Thread ULID |
|
||||
| \`role\` | \`string\` | Role name being executed |
|
||||
| \`edgePrompt\` | \`string\` | Moderator's task instruction for this step |
|
||||
| \`workflow\` | \`WorkflowPayload\` | Full workflow definition (roles, graph) |
|
||||
| \`start\` | \`StartNodePayload\` | Thread start data (workflow hash, user prompt) |
|
||||
| \`steps\` | \`StepContext[]\` | Previous steps with expanded outputs |
|
||||
| \`store\` | \`Store\` | CAS store for reading/writing data |
|
||||
| \`outputFormatInstruction\` | \`string\` | Frontmatter format instruction (inject into system prompt) |
|
||||
| \`isFirstVisit\` | \`boolean\` | True if this role hasn't run before in this thread |
|
||||
|
||||
## AgentRunResult
|
||||
|
||||
Your \`run\` and \`continue\` functions must return:
|
||||
|
||||
\`\`\`typescript
|
||||
type AgentRunResult = {
|
||||
output: string; // Raw markdown with frontmatter (must start with ---)
|
||||
detailHash: string; // CAS hash of session detail (turn history, metadata)
|
||||
sessionId: string; // Session ID for potential continue() calls
|
||||
};
|
||||
\`\`\`
|
||||
|
||||
## Building the Prompt
|
||||
|
||||
Use helpers from \`@uncaged/workflow-util-agent\`:
|
||||
|
||||
| Helper | Purpose |
|
||||
|--------|---------|
|
||||
| \`buildRolePrompt(roleDef)\` | Assemble Goal/Capabilities/Prepare/Procedure/Output sections |
|
||||
| \`buildContinuationPrompt(steps, role, edgePrompt)\` | For re-entry: steps since last visit + edge prompt |
|
||||
| \`ctx.outputFormatInstruction\` | Pre-built frontmatter format block (inject into system prompt) |
|
||||
|
||||
Typical system prompt structure:
|
||||
\`\`\`
|
||||
[outputFormatInstruction]
|
||||
[rolePrompt from buildRolePrompt()]
|
||||
[workflow metadata]
|
||||
\`\`\`
|
||||
|
||||
## Storing Session Detail
|
||||
|
||||
Store your turn history as a CAS merkle DAG for debugging and replay:
|
||||
|
||||
\`\`\`typescript
|
||||
// Store each turn as a CAS text node
|
||||
const turnHash = await store.put(textSchema, { content: turnData });
|
||||
|
||||
// Build a detail node referencing all turns
|
||||
const detailHash = await store.put(detailSchema, { turns: turnHashes });
|
||||
\`\`\`
|
||||
|
||||
The \`detailHash\` is preserved from the first \`run()\` call — retry \`continue()\` calls don't overwrite it.
|
||||
|
||||
## Registration
|
||||
|
||||
Register your adapter in \`~/.uncaged/workflow/config.yaml\`:
|
||||
|
||||
\`\`\`yaml
|
||||
agents:
|
||||
my-agent:
|
||||
command: uwf-my-agent
|
||||
args: []
|
||||
\`\`\`
|
||||
|
||||
Use it:
|
||||
\`\`\`bash
|
||||
uwf thread exec <thread-id> --agent my-agent
|
||||
\`\`\`
|
||||
|
||||
Or set as default:
|
||||
\`\`\`yaml
|
||||
defaultAgent: my-agent
|
||||
\`\`\`
|
||||
|
||||
## Existing Adapters
|
||||
|
||||
| Adapter | Package | Backend |
|
||||
|---------|---------|---------|
|
||||
| \`uwf-hermes\` | \`@uncaged/workflow-agent-hermes\` | Hermes ACP (chat sessions) |
|
||||
| \`uwf-builtin\` | \`@uncaged/workflow-agent-builtin\` | Direct OpenAI API (tools + loop) |
|
||||
| \`uwf-claude-code\` | \`@uncaged/workflow-agent-claude-code\` | Claude Code CLI |
|
||||
|
||||
Study these for patterns on prompt building, session management, and detail storage.
|
||||
|
||||
## Checklist
|
||||
|
||||
1. Implement \`run(ctx)\` — build prompt, call LLM, return output + detailHash + sessionId
|
||||
2. Implement \`continue(sessionId, message, store)\` — resume session for frontmatter correction
|
||||
3. Store session detail as CAS nodes (for debugging)
|
||||
4. Ensure output starts with \`---\` frontmatter block
|
||||
5. Add a \`bin\` entry in \`package.json\` for the CLI command
|
||||
6. Register in config.yaml and test with \`uwf thread exec --agent <name>\`
|
||||
`;
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
export function generateAuthorReference(): string {
|
||||
return `# Author Reference
|
||||
|
||||
Guide for designing and writing workflow YAML definitions.
|
||||
|
||||
## Workflow Structure
|
||||
|
||||
\`\`\`yaml
|
||||
name: solve-issue # verb-first kebab-case
|
||||
description: "..." # human-readable summary
|
||||
|
||||
roles: # named actors
|
||||
planner:
|
||||
description: "..." # short purpose
|
||||
goal: "..." # system-level goal for the agent
|
||||
capabilities: [...] # skill keywords the agent should load
|
||||
procedure: | # step-by-step instructions
|
||||
1. Do this
|
||||
2. Do that
|
||||
output: "..." # what the agent should produce
|
||||
frontmatter: # JSON Schema for structured output
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "ready" }
|
||||
plan: { type: string }
|
||||
required: [$status, plan]
|
||||
- properties:
|
||||
$status: { const: "failed" }
|
||||
error: { type: string }
|
||||
required: [$status, error]
|
||||
|
||||
graph: # status-based routing
|
||||
$START:
|
||||
_: { role: planner, prompt: "Analyze the issue." }
|
||||
planner:
|
||||
ready: { role: developer, prompt: "Implement {{{plan}}}." }
|
||||
failed: { role: $END, prompt: "Failed: {{{error}}}" }
|
||||
\`\`\`
|
||||
|
||||
## Role Definition
|
||||
|
||||
| Field | Purpose |
|
||||
|-------|---------|
|
||||
| \`description\` | Short description for humans and moderator context |
|
||||
| \`goal\` | Injected as the agent's system-level objective |
|
||||
| \`capabilities\` | Keyword tags — agent loads matching skills before starting |
|
||||
| \`procedure\` | Step-by-step instructions the agent follows |
|
||||
| \`output\` | Describes what to produce and which \`$status\` values to use |
|
||||
| \`frontmatter\` | JSON Schema defining the structured output fields |
|
||||
|
||||
### Role Design Principles
|
||||
|
||||
- **Single responsibility** — each role does one thing well
|
||||
- **Minimal context** — don't overload a role with too many steps; split if needed
|
||||
- **Clear status values** — each status should map to a distinct graph edge
|
||||
- **Explicit output** — tell the agent exactly what \`$status\` values are valid
|
||||
|
||||
## Frontmatter Schema
|
||||
|
||||
The \`frontmatter\` field is a standard JSON Schema. It defines the structured fields the agent must output in YAML frontmatter.
|
||||
|
||||
### \`$status\` Field
|
||||
|
||||
\`$status\` is the only standard field. Its value determines which graph edge the moderator follows. Use \`const\` to constrain each variant:
|
||||
|
||||
\`\`\`yaml
|
||||
frontmatter:
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "done" }
|
||||
result: { type: string }
|
||||
required: [$status, result]
|
||||
- properties:
|
||||
$status: { const: "failed" }
|
||||
error: { type: string }
|
||||
required: [$status, error]
|
||||
\`\`\`
|
||||
|
||||
### Custom Fields
|
||||
|
||||
Add any fields you need for data passing between roles. These are available in edge prompts via Mustache templates.
|
||||
|
||||
### Flat Schema (Single Status)
|
||||
|
||||
When a role has only one outcome:
|
||||
|
||||
\`\`\`yaml
|
||||
frontmatter:
|
||||
properties:
|
||||
$status: { const: "done" }
|
||||
summary: { type: string }
|
||||
required: [$status, summary]
|
||||
\`\`\`
|
||||
|
||||
## Graph Routing
|
||||
|
||||
The graph maps each role's \`$status\` values to the next role:
|
||||
|
||||
\`\`\`
|
||||
graph[role][$status] → { role: nextRole, prompt: edgePrompt }
|
||||
\`\`\`
|
||||
|
||||
### Special Nodes
|
||||
|
||||
| Node | Purpose |
|
||||
|------|---------|
|
||||
| \`$START\` | Entry point — status key is always \`_\` (unconditional) |
|
||||
| \`$END\` | Terminal — thread completes and is archived |
|
||||
|
||||
### Edge Prompts
|
||||
|
||||
Use triple-brace Mustache (\`{{{field}}}\`) to pass data from the previous step's output:
|
||||
|
||||
\`\`\`yaml
|
||||
graph:
|
||||
planner:
|
||||
ready: { role: developer, prompt: "Implement plan {{{plan}}} in {{{repoPath}}}." }
|
||||
\`\`\`
|
||||
|
||||
The fields referenced must exist in the source role's frontmatter schema.
|
||||
|
||||
### Loops and Branching
|
||||
|
||||
Roles can route back to previous roles (loops) or to different roles based on status (branching):
|
||||
|
||||
\`\`\`yaml
|
||||
graph:
|
||||
reviewer:
|
||||
approved: { role: tester, prompt: "Run tests." }
|
||||
rejected: { role: developer, prompt: "Fix: {{{comments}}}" } # loop back
|
||||
\`\`\`
|
||||
|
||||
### Fail Routing
|
||||
|
||||
Route failures to a cleanup role or \`$END\`:
|
||||
|
||||
\`\`\`yaml
|
||||
graph:
|
||||
developer:
|
||||
done: { role: reviewer, prompt: "Review changes." }
|
||||
failed: { role: cleanup, prompt: "Clean up: {{{error}}}" }
|
||||
\`\`\`
|
||||
|
||||
## Self-Testing
|
||||
|
||||
### Step-by-Step Verification
|
||||
|
||||
\`\`\`bash
|
||||
# Start a thread directly from YAML file (no registration needed)
|
||||
uwf thread start my-workflow.yaml -p "Test prompt"
|
||||
|
||||
# Or register first, then start by name
|
||||
uwf workflow add my-workflow.yaml
|
||||
uwf thread start my-workflow -p "Test prompt"
|
||||
|
||||
# Execute one step at a time to verify routing
|
||||
uwf thread exec <thread-id>
|
||||
|
||||
# Inspect step output
|
||||
uwf step list <thread-id>
|
||||
uwf step show <step-hash>
|
||||
|
||||
# Check the CAS data
|
||||
uwf cas get <output-hash>
|
||||
\`\`\`
|
||||
|
||||
### Validation Checklist
|
||||
|
||||
1. Every \`$status\` value in a role's frontmatter has a matching edge in the graph
|
||||
2. Every field referenced in edge prompts (\`{{{field}}}\`) exists in the source role's schema
|
||||
3. Every role referenced in the graph exists in \`roles\`
|
||||
4. \`$START\` has exactly one edge with key \`_\`
|
||||
5. At least one path leads to \`$END\`
|
||||
6. No orphan roles (defined but never routed to)
|
||||
|
||||
## Common Pitfalls
|
||||
|
||||
- **Missing graph edge** — if a role can produce \`$status: failed\` but the graph has no \`failed\` edge, the moderator will error
|
||||
- **Mustache field mismatch** — referencing \`{{{branch}}}\` in an edge prompt but the source schema has \`branchName\` instead
|
||||
- **Overly complex roles** — a role with 20 steps should be split; each role should be completable in one agent turn
|
||||
- **No fail path** — always handle failure; route to cleanup or \`$END\`
|
||||
`;
|
||||
}
|
||||
@@ -0,0 +1,140 @@
|
||||
export function generateDeveloperReference(): string {
|
||||
return `# Developer Reference
|
||||
|
||||
Guide for contributing to the workflow engine codebase.
|
||||
|
||||
## Monorepo Structure
|
||||
|
||||
\`\`\`
|
||||
packages/
|
||||
workflow-protocol/ # Shared types (WorkflowPayload, StepNodePayload, etc.)
|
||||
workflow-util/ # Base32, ULID, logger, frontmatter parsing, skill references
|
||||
workflow-util-agent/ # createAgent factory, context builder, extract pipeline
|
||||
workflow-agent-hermes/ # uwf-hermes CLI (spawns Hermes chat sessions)
|
||||
workflow-agent-builtin/ # uwf-builtin CLI (direct LLM calls via OpenAI API)
|
||||
cli-workflow/ # uwf CLI (moderator, thread/step/cas/config commands)
|
||||
\`\`\`
|
||||
|
||||
Dependency layers (each only imports from packages above it):
|
||||
\`\`\`
|
||||
protocol → util → util-agent → agent-hermes / agent-builtin / cli-workflow
|
||||
\`\`\`
|
||||
|
||||
External CAS: \`@uncaged/json-cas\` (store API, hashing, schema validation) + \`@uncaged/json-cas-fs\` (filesystem backend).
|
||||
|
||||
## Coding Conventions
|
||||
|
||||
### Functional-first
|
||||
|
||||
| Rule | Description |
|
||||
|------|-------------|
|
||||
| \`type\` over \`interface\` | All type definitions use \`type\` |
|
||||
| \`function\` over \`class\` | Pure functions + closures, no class |
|
||||
| No \`this\` | Functions must not depend on \`this\` context |
|
||||
| No inheritance | No \`extends\`, \`implements\`, \`abstract\` |
|
||||
| No optional properties | Use \`T \\| null\` instead of \`?:\` |
|
||||
| Immutability first | Use \`Readonly<T>\`, \`as const\`, avoid mutation |
|
||||
|
||||
Classes allowed only when required by third-party libraries or for Error subclasses.
|
||||
|
||||
### Error Handling
|
||||
|
||||
- \`Result<T, E>\` type for expected failures (\`ok\`/\`err\` constructors from \`@uncaged/workflow-util\`)
|
||||
- \`throw\` only for unrecoverable bugs
|
||||
- No try-catch for flow control
|
||||
|
||||
### Async
|
||||
|
||||
Always \`async/await\`, never \`.then()\` chains.
|
||||
|
||||
### Logging
|
||||
|
||||
\`console.*\` is banned (Biome \`noConsole\` rule). Use the structured logger:
|
||||
|
||||
\`\`\`typescript
|
||||
import { createLogger } from "@uncaged/workflow-util";
|
||||
const log = createLogger();
|
||||
log("4KNMR2PX", "Loading workflow..."); // 8-char Crockford Base32 tag
|
||||
\`\`\`
|
||||
|
||||
Each call site gets a unique hand-written tag. \`grep "4KNMR2PX"\` in logs → instant code location.
|
||||
|
||||
CLI package (\`@uncaged/cli-workflow\`) may use \`console.log\` for user-facing output with a biome-ignore comment.
|
||||
|
||||
### No Dynamic Import
|
||||
|
||||
No \`await import()\` in production code. Always static top-level \`import\`. Test files are exempt.
|
||||
|
||||
### Naming
|
||||
|
||||
- Workflow names: verb-first kebab-case (\`solve-issue\`, \`review-code\`)
|
||||
- IDs: Crockford Base32 — CAS hash (XXH64, 13-char), Thread ID (ULID, 26-char)
|
||||
|
||||
## Development Workflow
|
||||
|
||||
\`\`\`bash
|
||||
bun install # install all workspace deps
|
||||
bun run build # tsc --build (all packages)
|
||||
bun run check # tsc + biome check + lint-log-tags
|
||||
bun run format # biome format --write
|
||||
bun test # run all tests
|
||||
\`\`\`
|
||||
|
||||
Before committing: \`bun run check\` + \`bun test\` must both pass.
|
||||
|
||||
### Testing
|
||||
|
||||
- \`cli-workflow\`: vitest
|
||||
- Other packages: \`bun test\`
|
||||
- Test files live in \`__tests__/\` directories
|
||||
|
||||
### Publishing
|
||||
|
||||
Fixed-mode versioning — all \`@uncaged/*\` packages share the same version number.
|
||||
|
||||
\`\`\`bash
|
||||
bun changeset # describe the change
|
||||
bun version # bump versions + changelogs
|
||||
bun release # build + test + publish to npmjs
|
||||
\`\`\`
|
||||
|
||||
## Key Modules
|
||||
|
||||
### Moderator (\`cli-workflow/src/moderator/\`)
|
||||
|
||||
Status-based graph evaluator. Reads \`graph[lastRole][output.$status]\` to determine the next role. Zero LLM cost.
|
||||
|
||||
### Extract Pipeline (\`workflow-util-agent/src/\`)
|
||||
|
||||
1. Agent produces frontmatter markdown
|
||||
2. \`parseFrontmatterMarkdown()\` extracts YAML frontmatter
|
||||
3. \`tryFrontmatterFastPath()\` validates against role's output schema
|
||||
4. If fast path fails, retries up to 2 times via agent continue
|
||||
5. Validated output stored as CAS node
|
||||
|
||||
### createAgent Factory (\`workflow-util-agent/src/run.ts\`)
|
||||
|
||||
Shared entry point for all agent CLIs. Handles:
|
||||
- Argument parsing (\`--thread\`, \`--role\`, \`--prompt\`)
|
||||
- Context building (thread history, workflow definition)
|
||||
- Output extraction and CAS persistence
|
||||
- Frontmatter retry loop
|
||||
|
||||
### CAS Integration
|
||||
|
||||
All data is CAS-addressed via \`@uncaged/json-cas\`:
|
||||
- \`store.put(schemaHash, data)\` → content hash
|
||||
- \`store.get(hash)\` → node
|
||||
- \`validate(store, node)\` → schema check
|
||||
- Schemas registered at workflow add time
|
||||
|
||||
## Commit Convention
|
||||
|
||||
\`\`\`
|
||||
<type>(<scope>): <description>
|
||||
|
||||
type: feat | fix | refactor | docs | chore | test
|
||||
scope: workflow | cli | moderator | util-agent | hermes | util | protocol
|
||||
\`\`\`
|
||||
`;
|
||||
}
|
||||
@@ -1,6 +1,10 @@
|
||||
export { generateActorReference } from "./actor-reference.js";
|
||||
export { generateAdapterReference } from "./adapter-reference.js";
|
||||
export { generateArchitectureReference } from "./architecture-reference.js";
|
||||
export { generateAuthorReference } from "./author-reference.js";
|
||||
export { encodeUint64AsCrockford } from "./base32.js";
|
||||
export { generateCliReference } from "./cli-reference.js";
|
||||
export { generateDeveloperReference } from "./developer-reference.js";
|
||||
export { env } from "./env.js";
|
||||
export type {
|
||||
AgentFrontmatter,
|
||||
@@ -27,4 +31,5 @@ export { err, ok } from "./result.js";
|
||||
export { getDefaultWorkflowStorageRoot, getGlobalCasDir } from "./storage-root.js";
|
||||
export type { LogFn, Result } from "./types.js";
|
||||
export { extractUlidTimestamp, generateUlid } from "./ulid.js";
|
||||
export { generateUserReference } from "./user-reference.js";
|
||||
export { generateYamlReference } from "./yaml-reference.js";
|
||||
|
||||
@@ -0,0 +1,125 @@
|
||||
export function generateUserReference(): string {
|
||||
return `# User Reference
|
||||
|
||||
Guide for using the uwf CLI to manage workflows and threads.
|
||||
|
||||
## Quick Start
|
||||
|
||||
\`\`\`bash
|
||||
# 1. Configure provider and model
|
||||
uwf setup
|
||||
|
||||
# 2. Register a workflow
|
||||
uwf workflow add my-workflow.yaml
|
||||
|
||||
# 3. Start a thread (creates but does not execute)
|
||||
uwf thread start my-workflow -p "Build a login page"
|
||||
|
||||
# 4. Execute the thread (runs moderator → agent → extract cycles)
|
||||
uwf thread exec <thread-id> # one step
|
||||
uwf thread exec <thread-id> -c 10 # up to 10 steps
|
||||
uwf thread exec <thread-id> -c 10 --background # run in background
|
||||
\`\`\`
|
||||
|
||||
## Concepts
|
||||
|
||||
- **Workflow** — YAML definition with roles and a routing graph; stored as a CAS node
|
||||
- **Thread** — A running instance of a workflow; a chain of step nodes in CAS
|
||||
- **Step** — One moderator → agent → extract cycle; contains the role's structured output
|
||||
- **CAS** — Content-addressable store; every artifact is hashed (XXH64, Crockford Base32)
|
||||
|
||||
## Setup
|
||||
|
||||
\`\`\`
|
||||
uwf setup # interactive wizard
|
||||
uwf setup --provider <name> --base-url <url> \\
|
||||
--api-key <key> --model <name> # non-interactive
|
||||
[--agent <name>] # optional default agent
|
||||
\`\`\`
|
||||
|
||||
Config is stored at \`~/.uncaged/workflow/config.yaml\`. Override storage root with \`UNCAGED_WORKFLOW_STORAGE_ROOT\`.
|
||||
|
||||
## Workflow Commands
|
||||
|
||||
\`\`\`
|
||||
uwf workflow add <file> # register from YAML file
|
||||
uwf workflow show <id> # show by name or CAS hash
|
||||
uwf workflow list # list all registered workflows
|
||||
\`\`\`
|
||||
|
||||
You can also pass a file path directly to \`uwf thread start\` without registering first.
|
||||
|
||||
## Thread Lifecycle
|
||||
|
||||
\`\`\`
|
||||
uwf thread start <workflow> -p <prompt> # create thread
|
||||
uwf thread exec <thread-id> # execute one step
|
||||
[--agent <cmd>] # override agent
|
||||
[-c, --count <n>] # run n steps
|
||||
[--background] # run in background
|
||||
uwf thread show <thread-id> # show head pointer
|
||||
uwf thread list # list all threads
|
||||
[--status <filter>] # idle, running, completed, cancelled, active (comma-separated)
|
||||
[--after <thread-id>] # pagination: after this thread
|
||||
[--before <thread-id>] # pagination: before this thread
|
||||
[--skip <n>] # skip first n results
|
||||
[--take <n>] # limit results
|
||||
uwf thread read <thread-id> # render context as markdown
|
||||
[--quota <chars>] # max output chars (default 4000)
|
||||
[--before <step-hash>] # pagination
|
||||
[--start] # include start step
|
||||
uwf thread stop <thread-id> # stop background execution
|
||||
uwf thread cancel <thread-id> # cancel and archive thread
|
||||
\`\`\`
|
||||
|
||||
### Typical Lifecycle
|
||||
|
||||
\`\`\`
|
||||
start → exec (repeat) → thread reaches $END → auto-completed
|
||||
→ or: cancel to abort
|
||||
\`\`\`
|
||||
|
||||
## Step Commands
|
||||
|
||||
\`\`\`
|
||||
uwf step list <thread-id> # list all steps
|
||||
uwf step show <step-hash> # show step details
|
||||
uwf step fork <step-hash> # fork thread from a step (branch)
|
||||
\`\`\`
|
||||
|
||||
Forking creates a new thread that shares history up to the fork point — useful for retrying from a known-good state.
|
||||
|
||||
## CAS Commands
|
||||
|
||||
\`\`\`
|
||||
uwf cas get <hash> # read a node (type + payload)
|
||||
[--timestamp] # include timestamp
|
||||
uwf cas put <type-hash> <data> # store typed JSON, print hash
|
||||
uwf cas put-text <text> # store plain text, print hash
|
||||
uwf cas has <hash> # check existence
|
||||
uwf cas refs <hash> # list direct references
|
||||
uwf cas walk <hash> # recursive traversal
|
||||
uwf cas reindex # rebuild type index
|
||||
uwf cas schema list # list schemas
|
||||
uwf cas schema get <hash> # show schema definition
|
||||
\`\`\`
|
||||
|
||||
## Log Commands
|
||||
|
||||
\`\`\`
|
||||
uwf log list # list log files
|
||||
uwf log show # show log entries
|
||||
[--thread <id>] # filter by thread
|
||||
[--process <pid>] # filter by process
|
||||
[--date <YYYY-MM-DD>] # filter by date
|
||||
uwf log clean --before <date> # delete old logs
|
||||
\`\`\`
|
||||
|
||||
## Global Options
|
||||
|
||||
\`\`\`
|
||||
uwf --format <json|yaml> # output format (default: json)
|
||||
uwf -V, --version # print version
|
||||
\`\`\`
|
||||
`;
|
||||
}
|
||||
Executable
+120
@@ -0,0 +1,120 @@
|
||||
#!/usr/bin/env bash
|
||||
# Check development environment prerequisites for uncaged/workflow.
|
||||
# Non-interactive — prints actionable fix instructions on failure.
|
||||
# Exit 0 = all good, exit 1 = missing dependencies.
|
||||
set -euo pipefail
|
||||
|
||||
errors=0
|
||||
|
||||
check() {
|
||||
local name="$1" check_cmd="$2" fix_msg="$3"
|
||||
if eval "$check_cmd" >/dev/null 2>&1; then
|
||||
echo "✅ $name"
|
||||
else
|
||||
echo "❌ $name"
|
||||
echo " Fix: $fix_msg"
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
check_version() {
|
||||
local name="$1" cmd="$2" fix_msg="$3"
|
||||
local version
|
||||
if version=$(eval "$cmd" 2>/dev/null | head -1); then
|
||||
echo "✅ $name — $version"
|
||||
else
|
||||
echo "❌ $name"
|
||||
echo " Fix: $fix_msg"
|
||||
errors=$((errors + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
echo "=== Runtime ==="
|
||||
check_version "bun" "bun --version" \
|
||||
"curl -fsSL https://bun.sh/install | bash"
|
||||
|
||||
check_version "node" "node --version" \
|
||||
"Install Node.js 20+: https://nodejs.org/"
|
||||
|
||||
check_version "python3" "python3 --version" \
|
||||
"Install Python 3.11+: https://www.python.org/ or use uv: curl -LsSf https://astral.sh/uv/install.sh | sh && uv python install 3.11"
|
||||
|
||||
echo ""
|
||||
echo "=== Tools ==="
|
||||
check_version "hermes" "hermes --version" \
|
||||
"See https://github.com/hermes-ai/hermes-agent for installation. Typical: pip install hermes-agent (or uv pip install -e . for dev)"
|
||||
|
||||
check_version "claude" "claude --version" \
|
||||
"npm install -g @anthropic-ai/claude-code"
|
||||
|
||||
echo ""
|
||||
echo "=== Workflow ==="
|
||||
|
||||
# Check repo location
|
||||
REPO_DIR="${WORKFLOW_REPO:-$(cd "$(dirname "$0")/.." && pwd)}"
|
||||
check "repo at ~/repos/workflow or WORKFLOW_REPO set" \
|
||||
"[ -f '$REPO_DIR/packages/cli-workflow/src/cli.ts' ]" \
|
||||
"Clone the repo: git clone https://git.shazhou.work/uncaged/workflow ~/repos/workflow"
|
||||
|
||||
# Check bun install
|
||||
check "node_modules installed" \
|
||||
"[ -d '$REPO_DIR/node_modules' ]" \
|
||||
"cd $REPO_DIR && bun install"
|
||||
|
||||
# Check build
|
||||
check "packages built (dist/)" \
|
||||
"[ -f '$REPO_DIR/packages/cli-workflow/dist/cli.js' ]" \
|
||||
"cd $REPO_DIR && bun run build"
|
||||
|
||||
# Check uwf is runnable
|
||||
check_version "uwf" "bun $REPO_DIR/packages/cli-workflow/src/cli.ts --version" \
|
||||
"cd $REPO_DIR && bun install && bun run build"
|
||||
|
||||
# Check uwf symlink
|
||||
check "uwf in PATH" \
|
||||
"command -v uwf" \
|
||||
"sudo ln -sf $REPO_DIR/packages/cli-workflow/dist/cli.js /usr/bin/uwf && sudo chmod +x /usr/bin/uwf"
|
||||
|
||||
# Check uwf-hermes
|
||||
check "uwf-hermes in PATH" \
|
||||
"command -v uwf-hermes" \
|
||||
"bun link in packages/workflow-agent-hermes, or: echo '#!/usr/bin/env bun' > ~/.local/bin/uwf-hermes && echo 'import \"$REPO_DIR/packages/workflow-agent-hermes/src/cli.ts\"' >> ~/.local/bin/uwf-hermes && chmod +x ~/.local/bin/uwf-hermes"
|
||||
|
||||
# Check uwf-claude-code
|
||||
check "uwf-claude-code in PATH" \
|
||||
"command -v uwf-claude-code" \
|
||||
"Create wrapper: echo '#!/bin/bash\nexec bun run $REPO_DIR/packages/workflow-agent-claude-code/src/cli.ts \"\$@\"' > ~/.local/bin/uwf-claude-code && chmod +x ~/.local/bin/uwf-claude-code"
|
||||
|
||||
echo ""
|
||||
echo "=== Config ==="
|
||||
|
||||
# Check workflow config exists
|
||||
CONFIG_DIR="${UNCAGED_WORKFLOW_STORAGE_ROOT:-$HOME/.uncaged/workflow}"
|
||||
check "config.yaml exists" \
|
||||
"[ -f '$CONFIG_DIR/config.yaml' ]" \
|
||||
"Run: uwf setup"
|
||||
|
||||
# Check config has apiKey (not apiKeyEnv)
|
||||
if [ -f "$CONFIG_DIR/config.yaml" ]; then
|
||||
check "config uses apiKey (not legacy apiKeyEnv)" \
|
||||
"grep -q 'apiKey:' '$CONFIG_DIR/config.yaml' && ! grep -q 'apiKeyEnv:' '$CONFIG_DIR/config.yaml'" \
|
||||
"Run: uwf setup (re-configure to write apiKey directly)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Docker (optional, for E2E tests) ==="
|
||||
check_version "docker" "docker --version" \
|
||||
"sudo apt install -y docker.io && sudo usermod -aG docker \$USER"
|
||||
|
||||
check "docker daemon running" \
|
||||
"docker info" \
|
||||
"sudo systemctl start docker"
|
||||
|
||||
echo ""
|
||||
if [ "$errors" -gt 0 ]; then
|
||||
echo "⚠️ $errors issue(s) found. Fix them and re-run this script."
|
||||
exit 1
|
||||
else
|
||||
echo "🎉 All checks passed!"
|
||||
exit 0
|
||||
fi
|
||||
Executable
+377
@@ -0,0 +1,377 @@
|
||||
#!/usr/bin/env bash
|
||||
# E2E walkthrough for uncaged/workflow.
|
||||
# Runs inside Docker with isolated UNCAGED_WORKFLOW_STORAGE_ROOT.
|
||||
# Exercises: setup → workflow add → thread start/exec → cancel/fork → read/inspect.
|
||||
#
|
||||
# Usage:
|
||||
# sudo -E scripts/e2e-walkthrough.sh [--agent <agent>] [--provider <provider>] [--model <model>] [--api-key <key>]
|
||||
#
|
||||
# Requires: Docker running, $HOME mount approach (see scripts/check-dev-env.sh).
|
||||
# Produces: JSON report on stdout, logs in $E2E_DIR.
|
||||
#
|
||||
# IMPORTANT: Must run with `sudo -E` to preserve $HOME (Docker needs root).
|
||||
#
|
||||
# Known Issues (WIP):
|
||||
# 1. `echo '$OUT' | jq` breaks when $OUT contains single quotes (e.g. workflow show
|
||||
# output with YAML). Fix: use heredoc or pipe variable directly.
|
||||
# 2. Config may still have old `apiKeyEnv` field — thread exec will fail with
|
||||
# "no API key". Fix: re-run `uwf setup` or manually set `apiKey` in config.
|
||||
# 3. Bootstrap installs jq via apt-get which adds ~30s startup time.
|
||||
# Consider baking a custom image or using node's JSON.parse instead.
|
||||
# 4. `bun install` in container may modify host's lockfile/node_modules.
|
||||
# Consider `--frozen-lockfile` or read-only mount for non-essential paths.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# --- Args ---
|
||||
AGENT="uwf-builtin"
|
||||
PROVIDER=""
|
||||
MODEL=""
|
||||
API_KEY=""
|
||||
KEEP_CONTAINER=false
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--agent) AGENT="$2"; shift 2 ;;
|
||||
--provider) PROVIDER="$2"; shift 2 ;;
|
||||
--model) MODEL="$2"; shift 2 ;;
|
||||
--api-key) API_KEY="$2"; shift 2 ;;
|
||||
--keep) KEEP_CONTAINER=true; shift ;;
|
||||
*) echo "Unknown arg: $1" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# --- Resolve paths ---
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
E2E_DIR=$(mktemp -d /tmp/uwf-e2e-XXXXXX)
|
||||
CONTAINER_NAME="uwf-e2e-$(date +%s)"
|
||||
|
||||
echo "=== uwf E2E walkthrough ===" >&2
|
||||
echo "Agent: $AGENT" >&2
|
||||
echo "Provider: ${PROVIDER:-"(from config)"}" >&2
|
||||
echo "Model: ${MODEL:-"(from config)"}" >&2
|
||||
echo "E2E dir: $E2E_DIR" >&2
|
||||
echo "Container: $CONTAINER_NAME" >&2
|
||||
echo "" >&2
|
||||
|
||||
# --- Cleanup ---
|
||||
cleanup() {
|
||||
if [ "$KEEP_CONTAINER" = false ]; then
|
||||
docker rm -f "$CONTAINER_NAME" 2>/dev/null || true
|
||||
fi
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
# --- Build inner script ---
|
||||
# This runs INSIDE the container with an isolated storage root.
|
||||
cat > "$E2E_DIR/run.sh" << 'INNER_SCRIPT'
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Isolated storage — never touches host's ~/.uncaged/workflow
|
||||
export UNCAGED_WORKFLOW_STORAGE_ROOT="/tmp/uwf-e2e-storage"
|
||||
mkdir -p "$UNCAGED_WORKFLOW_STORAGE_ROOT"
|
||||
|
||||
REPO_DIR="$1"
|
||||
AGENT="$2"
|
||||
PROVIDER="$3"
|
||||
MODEL="$4"
|
||||
API_KEY="$5"
|
||||
|
||||
# Ensure tools are in PATH (derive HOME from REPO_DIR to avoid container HOME issues)
|
||||
REAL_HOME="${6:-$HOME}"
|
||||
export HOME="$REAL_HOME"
|
||||
export PATH="$REAL_HOME/.bun/bin:$REAL_HOME/.hermes/hermes-agent/venv/bin:$REAL_HOME/.local/share/npm/bin:$PATH"
|
||||
|
||||
# Resolve uwf
|
||||
UWF="bun $REPO_DIR/packages/cli-workflow/src/cli.ts"
|
||||
|
||||
PASS=0
|
||||
FAIL=0
|
||||
RESULTS=()
|
||||
|
||||
run_test() {
|
||||
local name="$1"
|
||||
shift
|
||||
local output exit_code
|
||||
echo "--- TEST: $name ---" >&2
|
||||
output=$("$@" 2>&1) && exit_code=0 || exit_code=$?
|
||||
if [ $exit_code -eq 0 ]; then
|
||||
PASS=$((PASS + 1))
|
||||
RESULTS+=("{\"name\":\"$name\",\"status\":\"pass\"}")
|
||||
echo " ✅ PASS" >&2
|
||||
else
|
||||
FAIL=$((FAIL + 1))
|
||||
# Escape output for JSON
|
||||
local escaped
|
||||
escaped=$(echo "$output" | head -5 | tr '\n' ' ' | sed 's/"/\\"/g' | cut -c1-200)
|
||||
RESULTS+=("{\"name\":\"$name\",\"status\":\"fail\",\"error\":\"$escaped\"}")
|
||||
echo " ❌ FAIL: $output" >&2
|
||||
fi
|
||||
echo "$output"
|
||||
}
|
||||
|
||||
assert_contains() {
|
||||
local haystack="$1" needle="$2"
|
||||
if echo "$haystack" | grep -q "$needle"; then
|
||||
return 0
|
||||
else
|
||||
echo "Expected to contain: $needle" >&2
|
||||
echo "Got: $haystack" >&2
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
assert_json_field() {
|
||||
local json="$1" field="$2"
|
||||
if echo "$json" | jq -e ".$field" >/dev/null 2>&1; then
|
||||
return 0
|
||||
else
|
||||
echo "Missing JSON field: $field" >&2
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ============================================================
|
||||
# Phase 1: Environment check
|
||||
# ============================================================
|
||||
echo "" >&2
|
||||
echo "=== Phase 1: Environment ===" >&2
|
||||
|
||||
run_test "uwf --version" bash -c "$UWF --version"
|
||||
|
||||
# ============================================================
|
||||
# Phase 2: Setup (non-interactive)
|
||||
# ============================================================
|
||||
echo "" >&2
|
||||
echo "=== Phase 2: Setup ===" >&2
|
||||
|
||||
if [ -n "$PROVIDER" ] && [ -n "$MODEL" ] && [ -n "$API_KEY" ]; then
|
||||
SETUP_CMD="$UWF setup --provider $PROVIDER --base-url https://api.openai.com/v1 --api-key $API_KEY --model $MODEL"
|
||||
if [ -n "$AGENT" ]; then
|
||||
SETUP_CMD="$SETUP_CMD --agent $AGENT"
|
||||
fi
|
||||
run_test "uwf setup (non-interactive)" bash -c "$SETUP_CMD"
|
||||
else
|
||||
# Copy host config if available
|
||||
if [ -f "$HOME/.uncaged/workflow/config.yaml" ]; then
|
||||
cp "$HOME/.uncaged/workflow/config.yaml" "$UNCAGED_WORKFLOW_STORAGE_ROOT/config.yaml"
|
||||
echo " Copied host config.yaml" >&2
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test config commands
|
||||
OUT=$(run_test "uwf config list" bash -c "$UWF config list")
|
||||
run_test "config list is valid JSON" bash -c "echo '$OUT' | jq . >/dev/null"
|
||||
|
||||
# ============================================================
|
||||
# Phase 3: Workflow registration
|
||||
# ============================================================
|
||||
echo "" >&2
|
||||
echo "=== Phase 3: Workflow registration ===" >&2
|
||||
|
||||
# Use the example workflow
|
||||
EXAMPLE_WF="$REPO_DIR/examples/solve-issue.yaml"
|
||||
if [ ! -f "$EXAMPLE_WF" ]; then
|
||||
echo "No example workflow found, creating minimal test workflow" >&2
|
||||
EXAMPLE_WF="/tmp/test-workflow.yaml"
|
||||
cat > "$EXAMPLE_WF" << 'WF'
|
||||
name: test-e2e
|
||||
roles:
|
||||
worker:
|
||||
goal: "Respond to the prompt with a brief answer."
|
||||
outputSchema:
|
||||
type: object
|
||||
required: ["$status", "answer"]
|
||||
properties:
|
||||
$status:
|
||||
type: string
|
||||
enum: ["done"]
|
||||
answer:
|
||||
type: string
|
||||
graph:
|
||||
- from: $START
|
||||
to: worker
|
||||
- from: worker
|
||||
condition:
|
||||
$status: done
|
||||
to: $END
|
||||
WF
|
||||
fi
|
||||
|
||||
OUT=$(run_test "uwf workflow add" bash -c "$UWF workflow add $EXAMPLE_WF")
|
||||
run_test "workflow add returns hash" bash -c "echo '$OUT' | jq -e '.hash'"
|
||||
|
||||
OUT=$(run_test "uwf workflow list" bash -c "$UWF workflow list")
|
||||
run_test "workflow list is non-empty" bash -c "echo '$OUT' | jq -e 'length > 0'"
|
||||
|
||||
# Get workflow name
|
||||
WF_NAME=$(echo "$OUT" | jq -r '.[0].name // empty')
|
||||
run_test "workflow has a name" bash -c "[ -n '$WF_NAME' ]"
|
||||
|
||||
OUT=$(run_test "uwf workflow show" bash -c "$UWF workflow show $WF_NAME")
|
||||
run_test "workflow show returns roles" bash -c "echo '$OUT' | jq -e '.payload.roles'"
|
||||
|
||||
# ============================================================
|
||||
# Phase 4: Thread lifecycle
|
||||
# ============================================================
|
||||
echo "" >&2
|
||||
echo "=== Phase 4: Thread lifecycle ===" >&2
|
||||
|
||||
# Start a thread
|
||||
OUT=$(run_test "uwf thread start" bash -c "$UWF thread start $WF_NAME -p 'E2E test: what is 2+2?'")
|
||||
THREAD_ID=$(echo "$OUT" | jq -r '.thread // empty')
|
||||
run_test "thread start returns thread ID" bash -c "[ -n '$THREAD_ID' ]"
|
||||
|
||||
# List threads
|
||||
OUT=$(run_test "uwf thread list" bash -c "$UWF thread list")
|
||||
run_test "thread appears in list" bash -c "echo '$OUT' | jq -e '.[] | select(.thread==\"$THREAD_ID\")'"
|
||||
|
||||
# Show thread
|
||||
OUT=$(run_test "uwf thread show" bash -c "$UWF thread show $THREAD_ID")
|
||||
run_test "thread show returns head" bash -c "echo '$OUT' | jq -e '.head'"
|
||||
|
||||
# Execute one step
|
||||
EXEC_ARGS=""
|
||||
if [ -n "$AGENT" ]; then
|
||||
EXEC_ARGS="--agent $AGENT"
|
||||
fi
|
||||
OUT=$(run_test "uwf thread exec (1 step)" bash -c "$UWF thread exec $THREAD_ID $EXEC_ARGS")
|
||||
run_test "thread exec returns step info" bash -c "echo '$OUT' | jq -e '.head'"
|
||||
|
||||
# ============================================================
|
||||
# Phase 5: Read & Inspect
|
||||
# ============================================================
|
||||
echo "" >&2
|
||||
echo "=== Phase 5: Read & Inspect ===" >&2
|
||||
|
||||
# Step list
|
||||
OUT=$(run_test "uwf step list" bash -c "$UWF step list $THREAD_ID")
|
||||
STEP_COUNT=$(echo "$OUT" | jq '.steps | length')
|
||||
run_test "step list has steps" bash -c "[ $STEP_COUNT -gt 1 ]"
|
||||
|
||||
# Get last step hash
|
||||
LAST_STEP=$(echo "$OUT" | jq -r '.steps[-1].hash // empty')
|
||||
run_test "last step has hash" bash -c "[ -n '$LAST_STEP' ]"
|
||||
|
||||
# Step show
|
||||
if [ -n "$LAST_STEP" ]; then
|
||||
OUT=$(run_test "uwf step show" bash -c "$UWF step show $LAST_STEP")
|
||||
run_test "step show returns role" bash -c "echo '$OUT' | jq -e '.role'"
|
||||
fi
|
||||
|
||||
# Thread read
|
||||
OUT=$(run_test "uwf thread read" bash -c "$UWF thread read $THREAD_ID")
|
||||
run_test "thread read produces output" bash -c "[ -n '$OUT' ]"
|
||||
|
||||
# CAS operations
|
||||
if [ -n "$LAST_STEP" ]; then
|
||||
OUT=$(run_test "uwf cas get" bash -c "$UWF cas get $LAST_STEP")
|
||||
run_test "cas get returns type" bash -c "echo '$OUT' | jq -e '.type'"
|
||||
|
||||
OUT=$(run_test "uwf cas has" bash -c "$UWF cas has $LAST_STEP")
|
||||
|
||||
OUT=$(run_test "uwf cas refs" bash -c "$UWF cas refs $LAST_STEP")
|
||||
|
||||
OUT=$(run_test "uwf cas walk" bash -c "$UWF cas walk $LAST_STEP")
|
||||
run_test "cas walk returns nodes" bash -c "echo '$OUT' | jq -e 'length > 0'"
|
||||
fi
|
||||
|
||||
# ============================================================
|
||||
# Phase 6: Cancel & Fork
|
||||
# ============================================================
|
||||
echo "" >&2
|
||||
echo "=== Phase 6: Cancel & Fork ===" >&2
|
||||
|
||||
# Start a second thread for cancel test
|
||||
OUT=$(run_test "thread start (for cancel)" bash -c "$UWF thread start $WF_NAME -p 'E2E cancel test'")
|
||||
CANCEL_THREAD=$(echo "$OUT" | jq -r '.thread // empty')
|
||||
|
||||
if [ -n "$CANCEL_THREAD" ]; then
|
||||
OUT=$(run_test "uwf thread cancel" bash -c "$UWF thread cancel $CANCEL_THREAD")
|
||||
run_test "cancelled thread status" bash -c "$UWF thread list --status completed | jq -e '.[] | select(.thread==\"$CANCEL_THREAD\")'"
|
||||
fi
|
||||
|
||||
# Fork from the first thread's last step
|
||||
if [ -n "$LAST_STEP" ]; then
|
||||
OUT=$(run_test "uwf step fork" bash -c "$UWF step fork $LAST_STEP")
|
||||
FORK_THREAD=$(echo "$OUT" | jq -r '.thread // empty')
|
||||
run_test "fork creates new thread" bash -c "[ -n '$FORK_THREAD' ] && [ '$FORK_THREAD' != '$THREAD_ID' ]"
|
||||
fi
|
||||
|
||||
# ============================================================
|
||||
# Phase 7: Log inspection
|
||||
# ============================================================
|
||||
echo "" >&2
|
||||
echo "=== Phase 7: Logs ===" >&2
|
||||
|
||||
OUT=$(run_test "uwf log list" bash -c "$UWF log list")
|
||||
OUT=$(run_test "uwf log show" bash -c "$UWF log show --thread $THREAD_ID 2>&1 || true")
|
||||
|
||||
# ============================================================
|
||||
# Phase 8: Config operations
|
||||
# ============================================================
|
||||
echo "" >&2
|
||||
echo "=== Phase 8: Config get/set ===" >&2
|
||||
|
||||
OUT=$(run_test "uwf config get defaultAgent" bash -c "$UWF config get defaultAgent")
|
||||
OUT=$(run_test "uwf config set (test key)" bash -c "$UWF config set models.test.name test-model")
|
||||
OUT=$(run_test "uwf config get (verify set)" bash -c "$UWF config get models.test.name")
|
||||
run_test "config set value persisted" bash -c "echo '$OUT' | grep -q 'test-model'"
|
||||
|
||||
# ============================================================
|
||||
# Report
|
||||
# ============================================================
|
||||
echo "" >&2
|
||||
echo "=== Results ===" >&2
|
||||
echo "Pass: $PASS Fail: $FAIL" >&2
|
||||
|
||||
# JSON report
|
||||
echo "{"
|
||||
echo " \"pass\": $PASS,"
|
||||
echo " \"fail\": $FAIL,"
|
||||
echo " \"agent\": \"$AGENT\","
|
||||
echo " \"tests\": [$(IFS=,; echo "${RESULTS[*]}")]"
|
||||
echo "}"
|
||||
|
||||
[ $FAIL -eq 0 ]
|
||||
INNER_SCRIPT
|
||||
|
||||
chmod +x "$E2E_DIR/run.sh"
|
||||
|
||||
# --- Run in Docker ---
|
||||
echo "Starting Docker container..." >&2
|
||||
|
||||
# --- Build bootstrap script (runs first inside container) ---
|
||||
cat > "$E2E_DIR/bootstrap.sh" << BOOTSTRAP
|
||||
#!/usr/bin/env bash
|
||||
set -uo pipefail
|
||||
echo "Installing jq..." >&2
|
||||
apt-get update -qq >&2 && apt-get install -y -qq jq >&2
|
||||
echo "jq installed" >&2
|
||||
|
||||
# All tools come from host via mount
|
||||
export HOME='$HOME'
|
||||
export PATH="$HOME/.bun/bin:$HOME/.hermes/hermes-agent/venv/bin:$HOME/.local/share/npm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||
|
||||
# Ensure bun modules are resolved for this environment
|
||||
cd '$REPO_DIR'
|
||||
echo "Running bun install..." >&2
|
||||
which bun >&2
|
||||
bun install 2>&1 | tail -3 >&2
|
||||
echo "bun install done" >&2
|
||||
|
||||
# Run E2E (pass HOME explicitly as 6th arg)
|
||||
bash /e2e/run.sh '$REPO_DIR' '$AGENT' '$PROVIDER' '$MODEL' '$API_KEY' '$HOME'
|
||||
BOOTSTRAP
|
||||
chmod +x "$E2E_DIR/bootstrap.sh"
|
||||
|
||||
docker run --rm \
|
||||
--name "$CONTAINER_NAME" \
|
||||
-v "$HOME:$HOME" \
|
||||
-v "$E2E_DIR:/e2e" \
|
||||
-e HOME="$HOME" \
|
||||
-w "$REPO_DIR" \
|
||||
node:22-bookworm \
|
||||
bash /e2e/bootstrap.sh
|
||||
@@ -21,6 +21,7 @@ const publishOrder = [
|
||||
"workflow-util-agent",
|
||||
"workflow-agent-hermes",
|
||||
"workflow-agent-builtin",
|
||||
"workflow-agent-claude-code",
|
||||
"cli-workflow",
|
||||
];
|
||||
|
||||
|
||||
Reference in New Issue
Block a user