united-workforce/.workflows/e2e-walkthrough.yaml

name: "e2e-walkthrough"
description: "End-to-end walkthrough of uwf CLI. Dogfooding: uwf tests uwf. Each role validates a phase of the CLI surface inside an isolated Docker container."
roles:
  bootstrap:
    description: "Start Docker container with isolated storage, verify uwf is runnable"
    goal: "You are an E2E test runner. Set up an isolated Docker environment and verify basic uwf functionality."
    capabilities:
      - docker
      - shell
    procedure: |
      1. Start a Docker container with isolated storage:
         ```
         docker run -d --name uwf-e2e-$$ \
           -v $HOME:$HOME \
           -e HOME=$HOME \
           -e UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage \
           -w ~/repos/workflow \
           node:22-bookworm \
           sleep infinity
         ```
      2. Inside the container, install bun, install deps, then `bun link` all packages
         so that `uwf`, `uwf-hermes`, `uwf-builtin` are on PATH (from source):
         ```
         docker exec uwf-e2e-$$ bash -c '
           # Install bun
           curl -fsSL https://bun.sh/install | bash
           export PATH="$HOME/.bun/bin:$PATH"

           # Isolated storage
           mkdir -p $UNCAGED_WORKFLOW_STORAGE_ROOT

           # Install workspace deps
           cd ~/repos/workflow && bun install --frozen-lockfile

           # bun link each package that has a bin entry
           cd packages/cli-workflow && bun link && cd ../..
           cd packages/workflow-agent-hermes && bun link && cd ../..
           cd packages/workflow-agent-builtin && bun link && cd ../..
         '
         ```
      3. Verify all three commands are available inside the container:
         ```
         docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf --version'
         docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf-hermes --help'
         docker exec uwf-e2e-$$ bash -c 'export PATH="$HOME/.bun/bin:$PATH" && uwf-builtin --help'
         ```
      4. Copy host config if it exists:
         ```
         docker exec uwf-e2e-$$ bash -c '
           if [ -f $HOME/.uncaged/workflow/config.yaml ]; then
             cp $HOME/.uncaged/workflow/config.yaml $UNCAGED_WORKFLOW_STORAGE_ROOT/config.yaml
           fi
         '
         ```

      Report the container name and confirm uwf + agents are working.
      Set containerName to the Docker container name for subsequent roles.
    output: "Report uwf version and container readiness. Set $status to pass with containerName, or fail with error."
    frontmatter:
      oneOf:
        - properties:
            $status: { const: "pass" }
            containerName: { type: string }
          required: [$status, containerName]
        - properties:
            $status: { const: "fail" }
            error: { type: string }
          required: [$status, error]

  config-and-registry:
    description: "Validate uwf config commands and workflow registration"
    goal: "You are an E2E test runner. Validate uwf config operations and workflow registration inside the Docker container."
    capabilities:
      - docker
      - shell
    procedure: |
      Use the container from the previous step (containerName is in your prompt).
      All commands run via: `docker exec <containerName> bash -c '...'`
      All commands use `uwf` (installed via `bun link` inside the container).
      Remember to set env vars in each exec:
        export PATH="$HOME/.bun/bin:$PATH"
        export UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage

      Config tests:
      1. `uwf config list` — verify it returns valid JSON
      2. `uwf config set models.test.name test-model` — set a test key
      3. `uwf config get models.test.name` — verify it returns "test-model"

      Workflow registration tests:
      4. `uwf workflow add ~/repos/workflow/examples/solve-issue.yaml` — register workflow
      5. Verify the output contains a hash
      6. `uwf workflow list` — verify non-empty array
      7. Capture the workflow name from the list
      8. `uwf workflow show <name>` — verify it returns roles

      Report all test results with pass/fail counts.
    output: "Report test results. Set $status to pass (with workflowName and containerName) or fail."
    frontmatter:
      oneOf:
        - properties:
            $status: { const: "pass" }
            workflowName: { type: string }
            containerName: { type: string }
          required: [$status, workflowName, containerName]
        - properties:
            $status: { const: "fail" }
            error: { type: string }
            containerName: { type: string }
          required: [$status, error, containerName]

  thread-ops:
    description: "Test thread start, list, show, and exec"
    goal: "You are an E2E test runner. Validate thread creation and execution inside the Docker container."
    capabilities:
      - docker
      - shell
    procedure: |
      Use the container (containerName) and workflow (workflowName) from your prompt.
      All commands via: `docker exec <containerName> bash -c '...'`
      Set env: PATH="$HOME/.bun/bin:$PATH" UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage

      1. `uwf thread start <workflowName> -p 'E2E test: what is 2+2?'` — capture thread ID from JSON output
      2. `uwf thread list` — verify the thread appears in the list
      3. `uwf thread show <threadId>` — verify head pointer exists
      4. `uwf thread exec <threadId> --agent uwf-builtin` — execute one step
      5. Verify exec returns JSON with a head field

      Report results. Pass threadId and containerName forward.
    output: "Report test results. Set $status to pass (with threadId, workflowName, containerName) or fail."
    frontmatter:
      oneOf:
        - properties:
            $status: { const: "pass" }
            threadId: { type: string }
            workflowName: { type: string }
            containerName: { type: string }
          required: [$status, threadId, workflowName, containerName]
        - properties:
            $status: { const: "fail" }
            error: { type: string }
            containerName: { type: string }
          required: [$status, error, containerName]

  inspect:
    description: "Test step list/show, thread read, and CAS operations"
    goal: "You are an E2E test runner. Validate read and inspect operations inside the Docker container."
    capabilities:
      - docker
      - shell
    procedure: |
      Use the container (containerName) and threadId from your prompt.
      All commands via: `docker exec <containerName> bash -c '...'`
      Set env: PATH="$HOME/.bun/bin:$PATH" UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage

      Step inspection:
      1. `uwf step list <threadId>` — verify steps array has length > 1
      2. Capture the last step hash from the output
      3. `uwf step show <lastStepHash>` — verify it returns a role field

      Thread read:
      4. `uwf thread read <threadId>` — verify non-empty output

      CAS operations:
      5. `uwf cas get <lastStepHash>` — verify returns a type field
      6. `uwf cas has <lastStepHash>` — verify exits 0
      7. `uwf cas refs <lastStepHash>` — list refs (may be empty)
      8. `uwf cas walk <lastStepHash>` — verify returns non-empty array

      Report results. Pass threadId, lastStepHash, workflowName, containerName forward.
    output: "Report test results. Set $status to pass (with threadId, lastStepHash, workflowName, containerName) or fail."
    frontmatter:
      oneOf:
        - properties:
            $status: { const: "pass" }
            threadId: { type: string }
            lastStepHash: { type: string }
            workflowName: { type: string }
            containerName: { type: string }
          required: [$status, threadId, lastStepHash, workflowName, containerName]
        - properties:
            $status: { const: "fail" }
            error: { type: string }
            containerName: { type: string }
          required: [$status, error, containerName]

  cancel-and-fork:
    description: "Test thread cancel, step fork, and log inspection"
    goal: "You are an E2E test runner. Validate cancel, fork, and log operations inside the Docker container."
    capabilities:
      - docker
      - shell
    procedure: |
      Use containerName, threadId, lastStepHash, and workflowName from your prompt.
      All commands via: `docker exec <containerName> bash -c '...'`
      Set env: PATH="$HOME/.bun/bin:$PATH" UNCAGED_WORKFLOW_STORAGE_ROOT=/tmp/uwf-e2e-storage

      Cancel:
      1. Start a second thread: `uwf thread start <workflowName> -p 'E2E cancel test'`
      2. Cancel it: `uwf thread cancel <secondThreadId>`
      3. Verify it appears in completed list: `uwf thread list --status completed`

      Fork:
      4. Fork from the first thread's last step: `uwf step fork <lastStepHash>`
      5. Verify fork creates a new thread with a different ID

      Logs:
      6. `uwf log list` — verify output (may be empty)
      7. `uwf log show --thread <threadId>` — verify runs without error

      Report results with summary.
    output: "Report test results with summary. Set $status to pass or fail."
    frontmatter:
      oneOf:
        - properties:
            $status: { const: "pass" }
            containerName: { type: string }
            summary: { type: string }
          required: [$status, containerName, summary]
        - properties:
            $status: { const: "fail" }
            error: { type: string }
            containerName: { type: string }
          required: [$status, error, containerName]

  cleanup:
    description: "Remove Docker container"
    goal: "You are an E2E test runner. Clean up the Docker container used for testing."
    capabilities:
      - docker
      - shell
    procedure: |
      Remove the Docker container (containerName is in your prompt):
      1. `docker rm -f <containerName>`
      2. Verify the container is gone: `docker ps -a --filter name=<containerName> --format '{{.Names}}'` should return empty

      Report cleanup result.
    output: "Report cleanup result. Set $status to pass or fail."
    frontmatter:
      oneOf:
        - properties:
            $status: { const: "pass" }
            summary: { type: string }
          required: [$status, summary]
        - properties:
            $status: { const: "fail" }
            error: { type: string }
          required: [$status, error]

graph:
  $START:
    _: { role: "bootstrap", prompt: "Set up the Docker container and verify uwf is runnable." }
  bootstrap:
    pass: { role: "config-and-registry", prompt: "Container {{{containerName}}} is ready. Validate config and workflow registration." }
    fail: { role: "$END", prompt: "Bootstrap failed: {{{error}}}. No container was created." }
  config-and-registry:
    pass: { role: "thread-ops", prompt: "Config and registry OK. Workflow '{{{workflowName}}}' registered. Container: {{{containerName}}}. Now test thread operations." }
    fail: { role: "cleanup", prompt: "Config/registry failed: {{{error}}}. Clean up container {{{containerName}}}." }
  thread-ops:
    pass: { role: "inspect", prompt: "Thread ops OK. threadId={{{threadId}}}, workflowName={{{workflowName}}}, containerName={{{containerName}}}. Now test inspect operations." }
    fail: { role: "cleanup", prompt: "Thread ops failed: {{{error}}}. Clean up container {{{containerName}}}." }
  inspect:
    pass: { role: "cancel-and-fork", prompt: "Inspect OK. threadId={{{threadId}}}, lastStepHash={{{lastStepHash}}}, workflowName={{{workflowName}}}, containerName={{{containerName}}}. Now test cancel, fork, and logs." }
    fail: { role: "cleanup", prompt: "Inspect failed: {{{error}}}. Clean up container {{{containerName}}}." }
  cancel-and-fork:
    pass: { role: "cleanup", prompt: "All tests passed! {{{summary}}}. Clean up container {{{containerName}}}." }
    fail: { role: "cleanup", prompt: "Cancel/fork failed: {{{error}}}. Clean up container {{{containerName}}}." }
  cleanup:
    pass: { role: "$END", prompt: "E2E walkthrough complete. {{{summary}}}" }
    fail: { role: "$END", prompt: "Cleanup failed: {{{error}}}. Manual cleanup may be needed." }