json-cas/.workflows/e2e-check.yaml

name: "e2e-check"
description: "Docker-isolated E2E testing of json-cas CLI. Preparer builds from scratch, tester runs scenarios, reporter files bugs."

roles:
  preparer:
    description: "Spins up Docker container, copies repo, installs, builds, runs unit tests"
    goal: "You set up a clean Docker environment for E2E testing. Your job is to start a container, install deps, build, lint, run unit tests, and initialize a CAS store. Report any setup failures as bugs."
    capabilities:
      - docker
    procedure: |
      1. Start a detached container:
         ```bash
         docker run -d --name json-cas-e2e \
           -v "<repoPath>:/src:ro" \
           -w /workspace \
           oven/bun:latest \
           sleep 3600
         ```

      2. Copy repo and install:
         ```bash
         docker exec json-cas-e2e bash -c 'cp -r /src/. /workspace/ && cd /workspace && bun install'
         ```
         ✅ exit code 0, no missing peer deps

      3. Build:
         ```bash
         docker exec json-cas-e2e bash -c 'cd /workspace && bun run build'
         ```
         ✅ exit code 0, no type errors

      4. Lint:
         ```bash
         docker exec json-cas-e2e bash -c 'cd /workspace && bun run check'
         ```
         ✅ exit code 0

      5. Unit tests:
         ```bash
         docker exec json-cas-e2e bash -c 'cd /workspace && bun test'
         ```
         ✅ all pass (ignore dist/ false positives)

      6. Init CAS store:
         ```bash
         docker exec json-cas-e2e bash -c 'mkdir -p /tmp/cas-test && cd /workspace && bun packages/cli-json-cas/src/index.ts --store /tmp/cas-test init && bun packages/cli-json-cas/src/index.ts --store /tmp/cas-test bootstrap'
         ```

      **Any failure here is a high-severity bug** — it means a clean environment can't build/run the project.

      Set $status=ready if all steps pass. Set $status=setup_failed with failures list if anything breaks.

    output: "Setup result summary."
    frontmatter:
      oneOf:
        - type: object
          properties:
            $status: { const: "ready" }
            containerName: { type: string }
            storePath: { type: string }
            repoPath: { type: string }
          required: [$status, containerName, storePath, repoPath]
        - type: object
          properties:
            $status: { const: "setup_failed" }
            failures:
              type: array
              items:
                type: object
                properties:
                  title: { type: string }
                  command: { type: string }
                  expected: { type: string }
                  actual: { type: string }
                  severity: { type: string }
                  phase: { type: string }
                required: [title, command, expected, actual, severity, phase]
            repoPath: { type: string }
          required: [$status, failures, repoPath]

  tester:
    description: "Runs CLI scenarios against the prepared Docker environment"
    goal: "You are an exploratory QA agent. The Docker container is already running with the project built and a CAS store initialized. Run CLI test scenarios and report bugs."
    capabilities:
      - testing
      - cli
    procedure: |
      The container `{{{containerName}}}` is already running with the project built.
      Store path: `{{{storePath}}}`.

      Run all commands via:
      ```bash
      docker exec {{{containerName}}} bash -c 'cd /workspace && bun packages/cli-json-cas/src/index.ts --store {{{storePath}}} <subcommand>'
      ```

      Define a shorthand in your notes:
      `CMD="cd /workspace && bun packages/cli-json-cas/src/index.ts --store {{{storePath}}}"`

      ## Phase 1: CAS Core Operations

      1. **bootstrap** — `$CMD bootstrap`
         Expected: prints meta-schema hash (13-char Base32)

      2. **schema put** — Create `/tmp/test-schema.json` in container, then `$CMD schema put /tmp/test-schema.json`
         Schema: `{"type":"object","properties":{"name":{"type":"string"},"age":{"type":"number"}},"required":["name"],"additionalProperties":false}`
         Expected: prints type hash

      3. **schema get** — `$CMD schema get <type-hash>`
         Expected: returns the schema JSON

      4. **schema list** — `$CMD schema list`
         Expected: lists registered schemas

      5. **put** — Create data file, `$CMD put <type-hash> /tmp/test-node.json`
         Data: `{"name":"Alice","age":30}`
         Expected: prints node hash

      6. **get** — `$CMD get <node-hash>`
         Expected: returns node JSON

      7. **has (exists)** — `$CMD has <node-hash>`
         Expected: true

      8. **has (not exists)** — `$CMD has AAAAAAAAAAAAA`
         Expected: false

      9. **verify** — `$CMD verify <node-hash>`
         Expected: ok

      10. **refs** — `$CMD refs <node-hash>`
          Expected: lists refs (may be empty)

      11. **walk** — `$CMD walk <node-hash>`
          Expected: shows traversal tree

      12. **hash (dry run)** — `$CMD hash <type-hash> /tmp/test-node.json`
          Expected: same hash as put

      13. **cat** — `$CMD cat <node-hash>`
          Expected: full node output

      14. **cat --payload** — `$CMD cat <node-hash> --payload`
          Expected: payload only (no type wrapper)

      ## Phase 2: Schema Validation

      1. **Invalid node** — `$CMD put <type-hash> /tmp/bad-node.json` where bad-node = `{"name":123}`
         Expected: validation error, non-zero exit

      2. **schema validate** — `$CMD schema validate <node-hash>`
         Expected: valid for good node

      3. **Non-existent schema** — `$CMD put AAAAAAAAAAAAA /tmp/test-node.json`
         Expected: error about missing schema

      ## Phase 3: Variable System

      1. **var set** — `$CMD var set myapp/config <node-hash>`
         Expected: creates variable

      2. **var get** — `$CMD var get myapp/config --schema <type-hash>`
         Expected: returns variable

      3. **var list** — `$CMD var list`
         Expected: shows all variables

      4. **var list prefix** — `$CMD var list myapp/`
         Expected: filtered results

      5. **var set (update)** — Put a second node, `$CMD var set myapp/config <new-hash>`
         Expected: upsert succeeds

      6. **var tag** — `$CMD var tag myapp/config --schema <type-hash> env:prod important`
         Expected: adds tag and label

      7. **var list --tag** — `$CMD var list --tag env:prod`
         Expected: finds tagged variable

      8. **var list --tag (label)** — `$CMD var list --tag important`
         Expected: finds labeled variable

      9. **var tag remove** — `$CMD var tag myapp/config --schema <type-hash> :important`
         Expected: removes label

      10. **var delete** — `$CMD var delete myapp/config`
          Expected: deletes variable

      11. **var get (deleted)** — `$CMD var get myapp/config --schema <type-hash>`
          Expected: not found error

      ## Phase 4: Template System

      1. **template set** — Create template file, `$CMD template set <type-hash> /tmp/test.liquid`
         Template: `Name: {{ payload.name }}, Age: {{ payload.age }}`
         Expected: success

      2. **template get** — `$CMD template get <type-hash>`
         Expected: returns template text

      3. **template list** — `$CMD template list`
         Expected: lists templates

      4. **template delete** — `$CMD template delete <type-hash>`
         Expected: success

      5. **template get (deleted)** — `$CMD template get <type-hash>`
         Expected: not found error

      ## Phase 5: Render

      1. Re-register template, then `$CMD render <node-hash>`
         Expected: rendered output with payload values filled in

      2. **render --resolution** — `$CMD render <node-hash> --resolution 0.5`
         Expected: different resolution output

      3. **render (bad hash)** — `$CMD render AAAAAAAAAAAAA`
         Expected: graceful error, non-zero exit

      ## Phase 6: GC

      1. **gc basic** — `$CMD gc`
         Expected: runs without error

      2. **gc preserves referenced** — Verify `$CMD has <node-hash>` still true

      3. **gc collects orphans** — Put an orphan node (not in any variable), run gc, check it's gone

      ## Phase 7: Edge Cases & Error Handling

      1. `$CMD get AAAAAAAAAAAAA` — non-existent
      2. `$CMD put <type-hash> /nonexistent/file.json` — missing file
      3. `$CMD var set "" <hash>` — empty name
      4. `$CMD var set "bad name!" <hash>` — invalid name chars
      5. `$CMD schema put /tmp/bad-schema.json` — `{"type":"invalid"}`
      6. `$CMD` with no subcommand — should show help
      7. `$CMD --store /nonexistent/path get <hash>` — bad store path

      ## Recording Results

      For each scenario:
      - ✅ Pass: works as expected
      - ❌ Fail: unexpected behavior, crash, wrong output
      - ⚠️ Questionable: works but confusing UX

      Collect all ❌ and ⚠️. For each, record:
      - Title (concise description)
      - Command (exact command run)
      - Expected behavior
      - Actual behavior (include actual output)
      - Severity: critical / high / medium / low
      - Phase: which test phase

      ## CRITICAL: Frontmatter Output Format

      Your response MUST start with YAML frontmatter. The `bugs` field MUST be an array of objects, NOT strings.

      Example of CORRECT frontmatter:
      ```yaml
      ---
      $status: bugs_found
      containerName: json-cas-e2e
      repoPath: /path/to/repo
      bugs:
        - title: "put does not validate data against schema"
          command: "json-cas put <hash> bad-data.json"
          expected: "Validation error, non-zero exit"
          actual: "Accepted invalid data, exit 0"
          severity: "high"
          phase: "Schema Validation"
      ---
      ```

      Do NOT write bugs as plain strings like `- some bug description`. Each bug MUST be an object with all 6 fields.

      If all tests pass:
      ```yaml
      ---
      $status: all_passed
      containerName: json-cas-e2e
      ---
      ```

    output: "Summary of all phases with pass/fail counts. Set $status."
    frontmatter:
      oneOf:
        - type: object
          properties:
            $status: { const: "bugs_found" }
            bugs:
              type: array
              items:
                type: object
                properties:
                  title: { type: string }
                  command: { type: string }
                  expected: { type: string }
                  actual: { type: string }
                  severity: { type: string }
                  phase: { type: string }
                required: [title, command, expected, actual, severity, phase]
            containerName: { type: string }
            repoPath: { type: string }
          required: [$status, bugs, containerName]
        - type: object
          properties:
            $status: { const: "all_passed" }
            containerName: { type: string }
          required: [$status, containerName]

  reporter:
    description: "Opens Gitea issues for each bug found by the tester"
    goal: "You are a bug reporter. You create well-formatted Gitea issues for each bug found during E2E testing."
    capabilities:
      - issue-management
    procedure: |
      1. Parse the bugs array from the tester's output
      2. Group bugs by severity (critical first)
      3. For each bug, create a Gitea issue:
         ```bash
         tea issues create -r uncaged/json-cas \
           -t "[E2E] <title>" \
           -d "## Bug Report (E2E Check)

         **Phase:** <phase>
         **Severity:** <severity>

         **Command:**
         \`\`\`
         <exact command>
         \`\`\`

         **Expected:** <expected>

         **Actual:** <actual>

         ---
         _Reported by e2e-check workflow (Docker isolated)_"
         ```

         ⚠️ If `tea issues create` fails with long body, use Gitea REST API:
         ```bash
         eval "$(cfg env)" && GITEA_TOKEN=$(cfg get GITEA_TOKEN)
         curl -s -X POST "https://git.shazhou.work/api/v1/repos/uncaged/json-cas/issues" \
           -H "Authorization: token $GITEA_TOKEN" \
           -H "Content-Type: application/json" \
           -d '{"title":"[E2E] ...","body":"..."}'
         ```

      4. Collect created issue numbers

    output: "List created issues. Set $status."
    frontmatter:
      oneOf:
        - type: object
          properties:
            $status: { const: "reported" }
            issues:
              type: array
              items: { type: string }
          required: [$status, issues]
        - type: object
          properties:
            $status: { const: "partial" }
            created: { type: number }
            failed: { type: number }
          required: [$status, created, failed]

  cleanup:
    description: "Stops and removes the Docker container"
    goal: "You clean up the Docker environment after testing is complete."
    capabilities:
      - docker
    procedure: |
      Stop and remove the container:
      ```bash
      docker stop {{{containerName}}} && docker rm {{{containerName}}}
      ```

      Verify it's gone:
      ```bash
      docker ps -a --filter name={{{containerName}}} --format '{{.Names}}'
      ```
      Expected: empty output.

    output: "Cleanup result."
    frontmatter:
      oneOf:
        - type: object
          properties:
            $status: { const: "cleaned" }
          required: [$status]
        - type: object
          properties:
            $status: { const: "cleanup_failed" }
            error: { type: string }
          required: [$status, error]

graph:
  $START:
    _: { role: "preparer", prompt: "Set up Docker environment for E2E testing. Repo at {{{repoPath}}}." }
  preparer:
    ready: { role: "tester", prompt: "Environment ready. Container: {{{containerName}}}, store: {{{storePath}}}. Run all test scenarios." }
    setup_failed: { role: "reporter", prompt: "Setup failures found. File these as bugs: {{{failures}}}" }
  tester:
    all_passed: { role: "cleanup", prompt: "All tests passed. Clean up container {{{containerName}}}." }
    bugs_found: { role: "reporter", prompt: "File these bugs as Gitea issues: {{{bugs}}}" }
  reporter:
    reported: { role: "cleanup", prompt: "Bugs filed: {{{issues}}}. Clean up container {{{containerName}}}." }
    partial: { role: "cleanup", prompt: "Filed {{{created}}} issues, {{{failed}}} failed. Clean up container {{{containerName}}}." }
  cleanup:
    cleaned: { role: "$END", prompt: "E2E check complete. Environment cleaned up." }
    cleanup_failed: { role: "$END", prompt: "E2E check complete but cleanup failed: {{{error}}}" }