refactor: split e2e-walkthrough into 6 roles with dedicated cleanup
- bootstrap: Docker + bun install + bun link + verify
- config-and-registry: config get/set/list + workflow add/show/list
- thread-ops: thread start/list/show/exec
- inspect: step list/show + thread read + CAS get/has/refs/walk
- cancel-and-fork: cancel + fork + logs
- cleanup: docker rm -f (all fail paths route here)
小橘 🍊
This commit is contained in:
+121
-62
@@ -8,8 +8,7 @@ roles:
|
|||||||
- docker
|
- docker
|
||||||
- shell
|
- shell
|
||||||
procedure: |
|
procedure: |
|
||||||
1. Create a temp dir for this E2E run: `E2E_DIR=$(mktemp -d /tmp/uwf-e2e-XXXXXX)`
|
1. Start a Docker container with isolated storage:
|
||||||
2. Start a Docker container with isolated storage:
|
|
||||||
```
|
```
|
||||||
docker run -d --name uwf-e2e-$$ \
|
docker run -d --name uwf-e2e-$$ \
|
||||||
-v $HOME:$HOME \
|
-v $HOME:$HOME \
|
||||||
@@ -19,7 +18,7 @@ roles:
|
|||||||
node:22-bookworm \
|
node:22-bookworm \
|
||||||
sleep infinity
|
sleep infinity
|
||||||
```
|
```
|
||||||
3. Inside the container, install bun, install deps, then `bun link` all packages
|
2. Inside the container, install bun, install deps, then `bun link` all packages
|
||||||
so that `uwf`, `uwf-hermes`, `uwf-builtin` are on PATH (from source):
|
so that `uwf`, `uwf-hermes`, `uwf-builtin` are on PATH (from source):
|
||||||
```
|
```
|
||||||
docker exec uwf-e2e-$$ bash -c '
|
docker exec uwf-e2e-$$ bash -c '
|
||||||
@@ -39,13 +38,13 @@ roles:
|
|||||||
cd packages/workflow-agent-builtin && bun link && cd ../..
|
cd packages/workflow-agent-builtin && bun link && cd ../..
|
||||||
'
|
'
|
||||||
```
|
```
|
||||||
4. Verify all three commands are available inside the container:
|
3. Verify all three commands are available inside the container:
|
||||||
```
|
```
|
||||||
docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf --version'
|
docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf --version'
|
||||||
docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf-hermes --help'
|
docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf-hermes --help'
|
||||||
docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf-builtin --help'
|
docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf-builtin --help'
|
||||||
```
|
```
|
||||||
5. Copy host config if it exists:
|
4. Copy host config if it exists:
|
||||||
```
|
```
|
||||||
docker exec uwf-e2e-$$ bash -c '
|
docker exec uwf-e2e-$$ bash -c '
|
||||||
if [ -f $HOME/.uncaged/workflow/config.yaml ]; then
|
if [ -f $HOME/.uncaged/workflow/config.yaml ]; then
|
||||||
@@ -68,8 +67,8 @@ roles:
|
|||||||
error: { type: string }
|
error: { type: string }
|
||||||
required: [$status, error]
|
required: [$status, error]
|
||||||
|
|
||||||
setup-and-registry:
|
config-and-registry:
|
||||||
description: "Validate uwf setup, config commands, and workflow registration"
|
description: "Validate uwf config commands and workflow registration"
|
||||||
goal: "You are an E2E test runner. Validate uwf config operations and workflow registration inside the Docker container."
|
goal: "You are an E2E test runner. Validate uwf config operations and workflow registration inside the Docker container."
|
||||||
capabilities:
|
capabilities:
|
||||||
- docker
|
- docker
|
||||||
@@ -82,12 +81,12 @@ roles:
|
|||||||
export PATH="$HOME/.bun/bin:$PATH"
|
export PATH="$HOME/.bun/bin:$PATH"
|
||||||
export UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage
|
export UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage
|
||||||
|
|
||||||
Phase 2 — Config:
|
Config tests:
|
||||||
1. `uwf config list` — verify it returns valid JSON
|
1. `uwf config list` — verify it returns valid JSON
|
||||||
2. `uwf config set models.test.name test-model` — set a test key
|
2. `uwf config set models.test.name test-model` — set a test key
|
||||||
3. `uwf config get models.test.name` — verify it returns "test-model"
|
3. `uwf config get models.test.name` — verify it returns "test-model"
|
||||||
|
|
||||||
Phase 3 — Workflow registration:
|
Workflow registration tests:
|
||||||
4. `uwf workflow add ~/repos/workflow/examples/solve-issue.yaml` — register workflow
|
4. `uwf workflow add ~/repos/workflow/examples/solve-issue.yaml` — register workflow
|
||||||
5. Verify the output contains a hash
|
5. Verify the output contains a hash
|
||||||
6. `uwf workflow list` — verify non-empty array
|
6. `uwf workflow list` — verify non-empty array
|
||||||
@@ -95,116 +94,176 @@ roles:
|
|||||||
8. `uwf workflow show <name>` — verify it returns roles
|
8. `uwf workflow show <name>` — verify it returns roles
|
||||||
|
|
||||||
Report all test results with pass/fail counts.
|
Report all test results with pass/fail counts.
|
||||||
output: "Report test results. Set $status to pass (with workflowName and containerName) or fail (with error and partial results)."
|
output: "Report test results. Set $status to pass (with workflowName and containerName) or fail."
|
||||||
frontmatter:
|
frontmatter:
|
||||||
oneOf:
|
oneOf:
|
||||||
- properties:
|
- properties:
|
||||||
$status: { const: "pass" }
|
$status: { const: "pass" }
|
||||||
workflowName: { type: string }
|
workflowName: { type: string }
|
||||||
containerName: { type: string }
|
containerName: { type: string }
|
||||||
testsPassed: { type: number }
|
|
||||||
required: [$status, workflowName, containerName]
|
required: [$status, workflowName, containerName]
|
||||||
- properties:
|
- properties:
|
||||||
$status: { const: "fail" }
|
$status: { const: "fail" }
|
||||||
error: { type: string }
|
error: { type: string }
|
||||||
required: [$status, error]
|
containerName: { type: string }
|
||||||
|
required: [$status, error, containerName]
|
||||||
|
|
||||||
thread-lifecycle:
|
thread-ops:
|
||||||
description: "Test thread start, exec, read, step list/show, and CAS operations"
|
description: "Test thread start, list, show, and exec"
|
||||||
goal: "You are an E2E test runner. Validate the full thread lifecycle and CAS operations."
|
goal: "You are an E2E test runner. Validate thread creation and execution inside the Docker container."
|
||||||
capabilities:
|
capabilities:
|
||||||
- docker
|
- docker
|
||||||
- shell
|
- shell
|
||||||
procedure: |
|
procedure: |
|
||||||
Use the container (containerName) and workflow (workflowName) from your prompt.
|
Use the container (containerName) and workflow (workflowName) from your prompt.
|
||||||
All commands via: `docker exec <containerName> bash -c '...'`
|
All commands via: `docker exec <containerName> bash -c '...'`
|
||||||
Set env: PATH, UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage
|
Set env: PATH="$HOME/.bun/bin:$PATH" UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage
|
||||||
|
|
||||||
Phase 4 — Thread lifecycle:
|
1. `uwf thread start <workflowName> -p 'E2E test: what is 2+2?'` — capture thread ID from JSON output
|
||||||
1. `uwf thread start <workflowName> -p 'E2E test: what is 2+2?'` — capture thread ID
|
2. `uwf thread list` — verify the thread appears in the list
|
||||||
2. `uwf thread list` — verify thread appears
|
|
||||||
3. `uwf thread show <threadId>` — verify head pointer exists
|
3. `uwf thread show <threadId>` — verify head pointer exists
|
||||||
4. `uwf thread exec <threadId> --agent uwf-builtin` — execute one step
|
4. `uwf thread exec <threadId> --agent uwf-builtin` — execute one step
|
||||||
5. Verify exec returns step info with head
|
5. Verify exec returns JSON with a head field
|
||||||
|
|
||||||
Phase 5 — Read & Inspect:
|
Report results. Pass threadId and containerName forward.
|
||||||
6. `uwf step list <threadId>` — verify steps exist (length > 1)
|
output: "Report test results. Set $status to pass (with threadId, workflowName, containerName) or fail."
|
||||||
7. Capture last step hash
|
frontmatter:
|
||||||
8. `uwf step show <lastStepHash>` — verify it returns role
|
oneOf:
|
||||||
9. `uwf thread read <threadId>` — verify non-empty output
|
- properties:
|
||||||
10. `uwf cas get <lastStepHash>` — verify returns type
|
$status: { const: "pass" }
|
||||||
11. `uwf cas has <lastStepHash>` — verify exists
|
threadId: { type: string }
|
||||||
12. `uwf cas refs <lastStepHash>` — list refs
|
workflowName: { type: string }
|
||||||
13. `uwf cas walk <lastStepHash>` — verify returns nodes
|
containerName: { type: string }
|
||||||
|
required: [$status, threadId, workflowName, containerName]
|
||||||
|
- properties:
|
||||||
|
$status: { const: "fail" }
|
||||||
|
error: { type: string }
|
||||||
|
containerName: { type: string }
|
||||||
|
required: [$status, error, containerName]
|
||||||
|
|
||||||
Report all results. Pass the threadId and lastStepHash forward.
|
inspect:
|
||||||
output: "Report test results. Set $status to pass (with threadId, lastStepHash, containerName) or fail."
|
description: "Test step list/show, thread read, and CAS operations"
|
||||||
|
goal: "You are an E2E test runner. Validate read and inspect operations inside the Docker container."
|
||||||
|
capabilities:
|
||||||
|
- docker
|
||||||
|
- shell
|
||||||
|
procedure: |
|
||||||
|
Use the container (containerName) and threadId from your prompt.
|
||||||
|
All commands via: `docker exec <containerName> bash -c '...'`
|
||||||
|
Set env: PATH="$HOME/.bun/bin:$PATH" UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage
|
||||||
|
|
||||||
|
Step inspection:
|
||||||
|
1. `uwf step list <threadId>` — verify steps array has length > 1
|
||||||
|
2. Capture the last step hash from the output
|
||||||
|
3. `uwf step show <lastStepHash>` — verify it returns a role field
|
||||||
|
|
||||||
|
Thread read:
|
||||||
|
4. `uwf thread read <threadId>` — verify non-empty output
|
||||||
|
|
||||||
|
CAS operations:
|
||||||
|
5. `uwf cas get <lastStepHash>` — verify returns a type field
|
||||||
|
6. `uwf cas has <lastStepHash>` — verify exits 0
|
||||||
|
7. `uwf cas refs <lastStepHash>` — list refs (may be empty)
|
||||||
|
8. `uwf cas walk <lastStepHash>` — verify returns non-empty array
|
||||||
|
|
||||||
|
Report results. Pass threadId, lastStepHash, workflowName, containerName forward.
|
||||||
|
output: "Report test results. Set $status to pass (with threadId, lastStepHash, workflowName, containerName) or fail."
|
||||||
frontmatter:
|
frontmatter:
|
||||||
oneOf:
|
oneOf:
|
||||||
- properties:
|
- properties:
|
||||||
$status: { const: "pass" }
|
$status: { const: "pass" }
|
||||||
threadId: { type: string }
|
threadId: { type: string }
|
||||||
lastStepHash: { type: string }
|
lastStepHash: { type: string }
|
||||||
|
workflowName: { type: string }
|
||||||
containerName: { type: string }
|
containerName: { type: string }
|
||||||
testsPassed: { type: number }
|
required: [$status, threadId, lastStepHash, workflowName, containerName]
|
||||||
required: [$status, threadId, lastStepHash, containerName]
|
|
||||||
- properties:
|
- properties:
|
||||||
$status: { const: "fail" }
|
$status: { const: "fail" }
|
||||||
error: { type: string }
|
error: { type: string }
|
||||||
required: [$status, error]
|
containerName: { type: string }
|
||||||
|
required: [$status, error, containerName]
|
||||||
|
|
||||||
cancel-fork-and-logs:
|
cancel-and-fork:
|
||||||
description: "Test thread cancel, step fork, and log inspection"
|
description: "Test thread cancel, step fork, and log inspection"
|
||||||
goal: "You are an E2E test runner. Validate cancel, fork, and log operations."
|
goal: "You are an E2E test runner. Validate cancel, fork, and log operations inside the Docker container."
|
||||||
capabilities:
|
capabilities:
|
||||||
- docker
|
- docker
|
||||||
- shell
|
- shell
|
||||||
procedure: |
|
procedure: |
|
||||||
Use containerName, threadId (first thread), lastStepHash, and workflowName from your prompt.
|
Use containerName, threadId, lastStepHash, and workflowName from your prompt.
|
||||||
All commands via: `docker exec <containerName> bash -c '...'`
|
All commands via: `docker exec <containerName> bash -c '...'`
|
||||||
Set env: PATH, UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage
|
Set env: PATH="$HOME/.bun/bin:$PATH" UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage
|
||||||
|
|
||||||
Phase 6 — Cancel & Fork:
|
Cancel:
|
||||||
1. Start a second thread: `uwf thread start <workflowName> -p 'E2E cancel test'`
|
1. Start a second thread: `uwf thread start <workflowName> -p 'E2E cancel test'`
|
||||||
2. Cancel it: `uwf thread cancel <secondThreadId>`
|
2. Cancel it: `uwf thread cancel <secondThreadId>`
|
||||||
3. Verify it appears in completed list: `uwf thread list --status completed`
|
3. Verify it appears in completed list: `uwf thread list --status completed`
|
||||||
|
|
||||||
|
Fork:
|
||||||
4. Fork from the first thread's last step: `uwf step fork <lastStepHash>`
|
4. Fork from the first thread's last step: `uwf step fork <lastStepHash>`
|
||||||
5. Verify fork creates a new thread with different ID
|
5. Verify fork creates a new thread with a different ID
|
||||||
|
|
||||||
Phase 7 — Logs:
|
Logs:
|
||||||
6. `uwf log list` — check log files exist
|
6. `uwf log list` — verify output (may be empty)
|
||||||
7. `uwf log show --thread <threadId>` — verify log output (may be empty, that's ok)
|
7. `uwf log show --thread <threadId>` — verify runs without error
|
||||||
|
|
||||||
Phase 8 — Cleanup:
|
Report results with summary.
|
||||||
8. Stop and remove the Docker container: `docker rm -f <containerName>`
|
output: "Report test results with summary. Set $status to pass or fail."
|
||||||
|
|
||||||
Report final results with full summary of all phases.
|
|
||||||
output: "Report final test results with pass/fail counts. Set $status to pass or fail."
|
|
||||||
frontmatter:
|
frontmatter:
|
||||||
oneOf:
|
oneOf:
|
||||||
- properties:
|
- properties:
|
||||||
$status: { const: "pass" }
|
$status: { const: "pass" }
|
||||||
totalPassed: { type: number }
|
containerName: { type: string }
|
||||||
summary: { type: string }
|
summary: { type: string }
|
||||||
required: [$status, totalPassed, summary]
|
required: [$status, containerName, summary]
|
||||||
|
- properties:
|
||||||
|
$status: { const: "fail" }
|
||||||
|
error: { type: string }
|
||||||
|
containerName: { type: string }
|
||||||
|
required: [$status, error, containerName]
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
description: "Remove Docker container"
|
||||||
|
goal: "You are an E2E test runner. Clean up the Docker container used for testing."
|
||||||
|
capabilities:
|
||||||
|
- docker
|
||||||
|
- shell
|
||||||
|
procedure: |
|
||||||
|
Remove the Docker container (containerName is in your prompt):
|
||||||
|
1. `docker rm -f <containerName>`
|
||||||
|
2. Verify the container is gone: `docker ps -a --filter name=<containerName> --format '{{.Names}}'` should return empty
|
||||||
|
|
||||||
|
Report cleanup result.
|
||||||
|
output: "Report cleanup result. Set $status to pass or fail."
|
||||||
|
frontmatter:
|
||||||
|
oneOf:
|
||||||
|
- properties:
|
||||||
|
$status: { const: "pass" }
|
||||||
|
summary: { type: string }
|
||||||
|
required: [$status, summary]
|
||||||
- properties:
|
- properties:
|
||||||
$status: { const: "fail" }
|
$status: { const: "fail" }
|
||||||
error: { type: string }
|
error: { type: string }
|
||||||
totalPassed: { type: number }
|
|
||||||
required: [$status, error]
|
required: [$status, error]
|
||||||
|
|
||||||
graph:
|
graph:
|
||||||
$START:
|
$START:
|
||||||
_: { role: "bootstrap", prompt: "Set up the Docker container and verify uwf is runnable." }
|
_: { role: "bootstrap", prompt: "Set up the Docker container and verify uwf is runnable." }
|
||||||
bootstrap:
|
bootstrap:
|
||||||
pass: { role: "setup-and-registry", prompt: "Container {{{containerName}}} is ready. Validate config and workflow registration." }
|
pass: { role: "config-and-registry", prompt: "Container {{{containerName}}} is ready. Validate config and workflow registration." }
|
||||||
fail: { role: "$END", prompt: "Bootstrap failed: {{{error}}}" }
|
fail: { role: "$END", prompt: "Bootstrap failed: {{{error}}}. No container was created." }
|
||||||
setup-and-registry:
|
config-and-registry:
|
||||||
pass: { role: "thread-lifecycle", prompt: "Config and registry OK. Workflow '{{{workflowName}}}' registered. Container: {{{containerName}}}. Now test thread lifecycle." }
|
pass: { role: "thread-ops", prompt: "Config and registry OK. Workflow '{{{workflowName}}}' registered. Container: {{{containerName}}}. Now test thread operations." }
|
||||||
fail: { role: "$END", prompt: "Setup/registry failed: {{{error}}}" }
|
fail: { role: "cleanup", prompt: "Config/registry failed: {{{error}}}. Clean up container {{{containerName}}}." }
|
||||||
thread-lifecycle:
|
thread-ops:
|
||||||
pass: { role: "cancel-fork-and-logs", prompt: "Thread lifecycle OK. threadId={{{threadId}}}, lastStepHash={{{lastStepHash}}}, containerName={{{containerName}}}. Now test cancel, fork, logs, and cleanup." }
|
pass: { role: "inspect", prompt: "Thread ops OK. threadId={{{threadId}}}, workflowName={{{workflowName}}}, containerName={{{containerName}}}. Now test inspect operations." }
|
||||||
fail: { role: "$END", prompt: "Thread lifecycle failed: {{{error}}}" }
|
fail: { role: "cleanup", prompt: "Thread ops failed: {{{error}}}. Clean up container {{{containerName}}}." }
|
||||||
cancel-fork-and-logs:
|
inspect:
|
||||||
pass: { role: "$END", prompt: "All E2E tests passed! {{{summary}}}" }
|
pass: { role: "cancel-and-fork", prompt: "Inspect OK. threadId={{{threadId}}}, lastStepHash={{{lastStepHash}}}, workflowName={{{workflowName}}}, containerName={{{containerName}}}. Now test cancel, fork, and logs." }
|
||||||
fail: { role: "$END", prompt: "Cancel/fork/logs phase failed: {{{error}}}. Passed: {{{totalPassed}}}" }
|
fail: { role: "cleanup", prompt: "Inspect failed: {{{error}}}. Clean up container {{{containerName}}}." }
|
||||||
|
cancel-and-fork:
|
||||||
|
pass: { role: "cleanup", prompt: "All tests passed! {{{summary}}}. Clean up container {{{containerName}}}." }
|
||||||
|
fail: { role: "cleanup", prompt: "Cancel/fork failed: {{{error}}}. Clean up container {{{containerName}}}." }
|
||||||
|
cleanup:
|
||||||
|
pass: { role: "$END", prompt: "E2E walkthrough complete. {{{summary}}}" }
|
||||||
|
fail: { role: "$END", prompt: "Cleanup failed: {{{error}}}. Manual cleanup may be needed." }
|
||||||
|
|||||||
Reference in New Issue
Block a user