diff --git a/.workflows/e2e-walkthrough.yaml b/.workflows/e2e-walkthrough.yaml index 694824c..825ea63 100644 --- a/.workflows/e2e-walkthrough.yaml +++ b/.workflows/e2e-walkthrough.yaml @@ -8,8 +8,7 @@ roles: - docker - shell procedure: | - 1. Create a temp dir for this E2E run: `E2E_DIR=$(mktemp -d /tmp/uwf-e2e-XXXXXX)` - 2. Start a Docker container with isolated storage: + 1. Start a Docker container with isolated storage: ``` docker run -d --name uwf-e2e-$$ \ -v $HOME:$HOME \ @@ -19,7 +18,7 @@ roles: node:22-bookworm \ sleep infinity ``` - 3. Inside the container, install bun, install deps, then `bun link` all packages + 2. Inside the container, install bun, install deps, then `bun link` all packages so that `uwf`, `uwf-hermes`, `uwf-builtin` are on PATH (from source): ``` docker exec uwf-e2e-$$ bash -c ' @@ -39,13 +38,13 @@ roles: cd packages/workflow-agent-builtin && bun link && cd ../.. ' ``` - 4. Verify all three commands are available inside the container: + 3. Verify all three commands are available inside the container: ``` docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf --version' docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf-hermes --help' docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf-builtin --help' ``` - 5. Copy host config if it exists: + 4. Copy host config if it exists: ``` docker exec uwf-e2e-$$ bash -c ' if [ -f $HOME/.uncaged/workflow/config.yaml ]; then @@ -68,8 +67,8 @@ roles: error: { type: string } required: [$status, error] - setup-and-registry: - description: "Validate uwf setup, config commands, and workflow registration" + config-and-registry: + description: "Validate uwf config commands and workflow registration" goal: "You are an E2E test runner. Validate uwf config operations and workflow registration inside the Docker container." capabilities: - docker @@ -82,12 +81,12 @@ roles: export PATH="$HOME/.bun/bin:$PATH" export UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage - Phase 2 — Config: + Config tests: 1. `uwf config list` — verify it returns valid JSON 2. `uwf config set models.test.name test-model` — set a test key 3. `uwf config get models.test.name` — verify it returns "test-model" - Phase 3 — Workflow registration: + Workflow registration tests: 4. `uwf workflow add ~/repos/workflow/examples/solve-issue.yaml` — register workflow 5. Verify the output contains a hash 6. `uwf workflow list` — verify non-empty array @@ -95,116 +94,176 @@ roles: 8. `uwf workflow show ` — verify it returns roles Report all test results with pass/fail counts. - output: "Report test results. Set $status to pass (with workflowName and containerName) or fail (with error and partial results)." + output: "Report test results. Set $status to pass (with workflowName and containerName) or fail." frontmatter: oneOf: - properties: $status: { const: "pass" } workflowName: { type: string } containerName: { type: string } - testsPassed: { type: number } required: [$status, workflowName, containerName] - properties: $status: { const: "fail" } error: { type: string } - required: [$status, error] + containerName: { type: string } + required: [$status, error, containerName] - thread-lifecycle: - description: "Test thread start, exec, read, step list/show, and CAS operations" - goal: "You are an E2E test runner. Validate the full thread lifecycle and CAS operations." + thread-ops: + description: "Test thread start, list, show, and exec" + goal: "You are an E2E test runner. Validate thread creation and execution inside the Docker container." capabilities: - docker - shell procedure: | Use the container (containerName) and workflow (workflowName) from your prompt. All commands via: `docker exec bash -c '...'` - Set env: PATH, UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage + Set env: PATH="$HOME/.bun/bin:$PATH" UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage - Phase 4 — Thread lifecycle: - 1. `uwf thread start -p 'E2E test: what is 2+2?'` — capture thread ID - 2. `uwf thread list` — verify thread appears + 1. `uwf thread start -p 'E2E test: what is 2+2?'` — capture thread ID from JSON output + 2. `uwf thread list` — verify the thread appears in the list 3. `uwf thread show ` — verify head pointer exists 4. `uwf thread exec --agent uwf-builtin` — execute one step - 5. Verify exec returns step info with head + 5. Verify exec returns JSON with a head field - Phase 5 — Read & Inspect: - 6. `uwf step list ` — verify steps exist (length > 1) - 7. Capture last step hash - 8. `uwf step show ` — verify it returns role - 9. `uwf thread read ` — verify non-empty output - 10. `uwf cas get ` — verify returns type - 11. `uwf cas has ` — verify exists - 12. `uwf cas refs ` — list refs - 13. `uwf cas walk ` — verify returns nodes + Report results. Pass threadId and containerName forward. + output: "Report test results. Set $status to pass (with threadId, workflowName, containerName) or fail." + frontmatter: + oneOf: + - properties: + $status: { const: "pass" } + threadId: { type: string } + workflowName: { type: string } + containerName: { type: string } + required: [$status, threadId, workflowName, containerName] + - properties: + $status: { const: "fail" } + error: { type: string } + containerName: { type: string } + required: [$status, error, containerName] - Report all results. Pass the threadId and lastStepHash forward. - output: "Report test results. Set $status to pass (with threadId, lastStepHash, containerName) or fail." + inspect: + description: "Test step list/show, thread read, and CAS operations" + goal: "You are an E2E test runner. Validate read and inspect operations inside the Docker container." + capabilities: + - docker + - shell + procedure: | + Use the container (containerName) and threadId from your prompt. + All commands via: `docker exec bash -c '...'` + Set env: PATH="$HOME/.bun/bin:$PATH" UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage + + Step inspection: + 1. `uwf step list ` — verify steps array has length > 1 + 2. Capture the last step hash from the output + 3. `uwf step show ` — verify it returns a role field + + Thread read: + 4. `uwf thread read ` — verify non-empty output + + CAS operations: + 5. `uwf cas get ` — verify returns a type field + 6. `uwf cas has ` — verify exits 0 + 7. `uwf cas refs ` — list refs (may be empty) + 8. `uwf cas walk ` — verify returns non-empty array + + Report results. Pass threadId, lastStepHash, workflowName, containerName forward. + output: "Report test results. Set $status to pass (with threadId, lastStepHash, workflowName, containerName) or fail." frontmatter: oneOf: - properties: $status: { const: "pass" } threadId: { type: string } lastStepHash: { type: string } + workflowName: { type: string } containerName: { type: string } - testsPassed: { type: number } - required: [$status, threadId, lastStepHash, containerName] + required: [$status, threadId, lastStepHash, workflowName, containerName] - properties: $status: { const: "fail" } error: { type: string } - required: [$status, error] + containerName: { type: string } + required: [$status, error, containerName] - cancel-fork-and-logs: + cancel-and-fork: description: "Test thread cancel, step fork, and log inspection" - goal: "You are an E2E test runner. Validate cancel, fork, and log operations." + goal: "You are an E2E test runner. Validate cancel, fork, and log operations inside the Docker container." capabilities: - docker - shell procedure: | - Use containerName, threadId (first thread), lastStepHash, and workflowName from your prompt. + Use containerName, threadId, lastStepHash, and workflowName from your prompt. All commands via: `docker exec bash -c '...'` - Set env: PATH, UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage + Set env: PATH="$HOME/.bun/bin:$PATH" UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage - Phase 6 — Cancel & Fork: + Cancel: 1. Start a second thread: `uwf thread start -p 'E2E cancel test'` 2. Cancel it: `uwf thread cancel ` 3. Verify it appears in completed list: `uwf thread list --status completed` + + Fork: 4. Fork from the first thread's last step: `uwf step fork ` - 5. Verify fork creates a new thread with different ID + 5. Verify fork creates a new thread with a different ID - Phase 7 — Logs: - 6. `uwf log list` — check log files exist - 7. `uwf log show --thread ` — verify log output (may be empty, that's ok) + Logs: + 6. `uwf log list` — verify output (may be empty) + 7. `uwf log show --thread ` — verify runs without error - Phase 8 — Cleanup: - 8. Stop and remove the Docker container: `docker rm -f ` - - Report final results with full summary of all phases. - output: "Report final test results with pass/fail counts. Set $status to pass or fail." + Report results with summary. + output: "Report test results with summary. Set $status to pass or fail." frontmatter: oneOf: - properties: $status: { const: "pass" } - totalPassed: { type: number } + containerName: { type: string } summary: { type: string } - required: [$status, totalPassed, summary] + required: [$status, containerName, summary] + - properties: + $status: { const: "fail" } + error: { type: string } + containerName: { type: string } + required: [$status, error, containerName] + + cleanup: + description: "Remove Docker container" + goal: "You are an E2E test runner. Clean up the Docker container used for testing." + capabilities: + - docker + - shell + procedure: | + Remove the Docker container (containerName is in your prompt): + 1. `docker rm -f ` + 2. Verify the container is gone: `docker ps -a --filter name= --format '{{.Names}}'` should return empty + + Report cleanup result. + output: "Report cleanup result. Set $status to pass or fail." + frontmatter: + oneOf: + - properties: + $status: { const: "pass" } + summary: { type: string } + required: [$status, summary] - properties: $status: { const: "fail" } error: { type: string } - totalPassed: { type: number } required: [$status, error] graph: $START: _: { role: "bootstrap", prompt: "Set up the Docker container and verify uwf is runnable." } bootstrap: - pass: { role: "setup-and-registry", prompt: "Container {{{containerName}}} is ready. Validate config and workflow registration." } - fail: { role: "$END", prompt: "Bootstrap failed: {{{error}}}" } - setup-and-registry: - pass: { role: "thread-lifecycle", prompt: "Config and registry OK. Workflow '{{{workflowName}}}' registered. Container: {{{containerName}}}. Now test thread lifecycle." } - fail: { role: "$END", prompt: "Setup/registry failed: {{{error}}}" } - thread-lifecycle: - pass: { role: "cancel-fork-and-logs", prompt: "Thread lifecycle OK. threadId={{{threadId}}}, lastStepHash={{{lastStepHash}}}, containerName={{{containerName}}}. Now test cancel, fork, logs, and cleanup." } - fail: { role: "$END", prompt: "Thread lifecycle failed: {{{error}}}" } - cancel-fork-and-logs: - pass: { role: "$END", prompt: "All E2E tests passed! {{{summary}}}" } - fail: { role: "$END", prompt: "Cancel/fork/logs phase failed: {{{error}}}. Passed: {{{totalPassed}}}" } + pass: { role: "config-and-registry", prompt: "Container {{{containerName}}} is ready. Validate config and workflow registration." } + fail: { role: "$END", prompt: "Bootstrap failed: {{{error}}}. No container was created." } + config-and-registry: + pass: { role: "thread-ops", prompt: "Config and registry OK. Workflow '{{{workflowName}}}' registered. Container: {{{containerName}}}. Now test thread operations." } + fail: { role: "cleanup", prompt: "Config/registry failed: {{{error}}}. Clean up container {{{containerName}}}." } + thread-ops: + pass: { role: "inspect", prompt: "Thread ops OK. threadId={{{threadId}}}, workflowName={{{workflowName}}}, containerName={{{containerName}}}. Now test inspect operations." } + fail: { role: "cleanup", prompt: "Thread ops failed: {{{error}}}. Clean up container {{{containerName}}}." } + inspect: + pass: { role: "cancel-and-fork", prompt: "Inspect OK. threadId={{{threadId}}}, lastStepHash={{{lastStepHash}}}, workflowName={{{workflowName}}}, containerName={{{containerName}}}. Now test cancel, fork, and logs." } + fail: { role: "cleanup", prompt: "Inspect failed: {{{error}}}. Clean up container {{{containerName}}}." } + cancel-and-fork: + pass: { role: "cleanup", prompt: "All tests passed! {{{summary}}}. Clean up container {{{containerName}}}." } + fail: { role: "cleanup", prompt: "Cancel/fork failed: {{{error}}}. Clean up container {{{containerName}}}." } + cleanup: + pass: { role: "$END", prompt: "E2E walkthrough complete. {{{summary}}}" } + fail: { role: "$END", prompt: "Cleanup failed: {{{error}}}. Manual cleanup may be needed." }