feat: add E2E walkthrough script (Docker-based, WIP)

Runs full uwf CLI walkthrough inside a Docker container with isolated storage root. Tests: setup, workflow add/list/show, thread start/exec/ cancel/fork, step list/show, CAS operations, config get/set, logs. Approach: mount host $HOME into node:22-bookworm container, override UNCAGED_WORKFLOW_STORAGE_ROOT with tmpdir. No mock LLM — real agents. Known issues documented in header comments (jq quoting, apt-get startup time, lockfile conflicts). 小橘 🍊（NEKO Team）
2026-05-26 12:40:47 +00:00
parent aa14fd08e0
commit 0ef2d8fec2
1 changed files with 377 additions and 0 deletions
@@ -0,0 +1,377 @@
+#!/usr/bin/env bash
+# E2E walkthrough for uncaged/workflow.
+# Runs inside Docker with isolated UNCAGED_WORKFLOW_STORAGE_ROOT.
+# Exercises: setup → workflow add → thread start/exec → cancel/fork → read/inspect.
+#
+# Usage:
+#   sudo -E scripts/e2e-walkthrough.sh [--agent <agent>] [--provider <provider>] [--model <model>] [--api-key <key>]
+#
+# Requires: Docker running, $HOME mount approach (see scripts/check-dev-env.sh).
+# Produces: JSON report on stdout, logs in $E2E_DIR.
+#
+# IMPORTANT: Must run with `sudo -E` to preserve $HOME (Docker needs root).
+#
+# Known Issues (WIP):
+#   1. `echo '$OUT' | jq` breaks when $OUT contains single quotes (e.g. workflow show
+#      output with YAML). Fix: use heredoc or pipe variable directly.
+#   2. Config may still have old `apiKeyEnv` field — thread exec will fail with
+#      "no API key". Fix: re-run `uwf setup` or manually set `apiKey` in config.
+#   3. Bootstrap installs jq via apt-get which adds ~30s startup time.
+#      Consider baking a custom image or using node's JSON.parse instead.
+#   4. `bun install` in container may modify host's lockfile/node_modules.
+#      Consider `--frozen-lockfile` or read-only mount for non-essential paths.
+
+set -euo pipefail
+
+# --- Args ---
+AGENT="uwf-builtin"
+PROVIDER=""
+MODEL=""
+API_KEY=""
+KEEP_CONTAINER=false
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --agent)     AGENT="$2";    shift 2 ;;
+    --provider)  PROVIDER="$2"; shift 2 ;;
+    --model)     MODEL="$2";    shift 2 ;;
+    --api-key)   API_KEY="$2";  shift 2 ;;
+    --keep)      KEEP_CONTAINER=true; shift ;;
+    *) echo "Unknown arg: $1" >&2; exit 1 ;;
+  esac
+done
+
+# --- Resolve paths ---
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+E2E_DIR=$(mktemp -d /tmp/uwf-e2e-XXXXXX)
+CONTAINER_NAME="uwf-e2e-$(date +%s)"
+
+echo "=== uwf E2E walkthrough ===" >&2
+echo "Agent:     $AGENT" >&2
+echo "Provider:  ${PROVIDER:-"(from config)"}" >&2
+echo "Model:     ${MODEL:-"(from config)"}" >&2
+echo "E2E dir:   $E2E_DIR" >&2
+echo "Container: $CONTAINER_NAME" >&2
+echo "" >&2
+
+# --- Cleanup ---
+cleanup() {
+  if [ "$KEEP_CONTAINER" = false ]; then
+    docker rm -f "$CONTAINER_NAME" 2>/dev/null || true
+  fi
+}
+trap cleanup EXIT
+
+# --- Build inner script ---
+# This runs INSIDE the container with an isolated storage root.
+cat > "$E2E_DIR/run.sh" << 'INNER_SCRIPT'
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Isolated storage — never touches host's ~/.uncaged/workflow
+export UNCAGED_WORKFLOW_STORAGE_ROOT="/tmp/uwf-e2e-storage"
+mkdir -p "$UNCAGED_WORKFLOW_STORAGE_ROOT"
+
+REPO_DIR="$1"
+AGENT="$2"
+PROVIDER="$3"
+MODEL="$4"
+API_KEY="$5"
+
+# Ensure tools are in PATH (derive HOME from REPO_DIR to avoid container HOME issues)
+REAL_HOME="${6:-$HOME}"
+export HOME="$REAL_HOME"
+export PATH="$REAL_HOME/.bun/bin:$REAL_HOME/.hermes/hermes-agent/venv/bin:$REAL_HOME/.local/share/npm/bin:$PATH"
+
+# Resolve uwf
+UWF="bun $REPO_DIR/packages/cli-workflow/src/cli.ts"
+
+PASS=0
+FAIL=0
+RESULTS=()
+
+run_test() {
+  local name="$1"
+  shift
+  local output exit_code
+  echo "--- TEST: $name ---" >&2
+  output=$("$@" 2>&1) && exit_code=0 || exit_code=$?
+  if [ $exit_code -eq 0 ]; then
+    PASS=$((PASS + 1))
+    RESULTS+=("{\"name\":\"$name\",\"status\":\"pass\"}")
+    echo "  ✅ PASS" >&2
+  else
+    FAIL=$((FAIL + 1))
+    # Escape output for JSON
+    local escaped
+    escaped=$(echo "$output" | head -5 | tr '\n' ' ' | sed 's/"/\\"/g' | cut -c1-200)
+    RESULTS+=("{\"name\":\"$name\",\"status\":\"fail\",\"error\":\"$escaped\"}")
+    echo "  ❌ FAIL: $output" >&2
+  fi
+  echo "$output"
+}
+
+assert_contains() {
+  local haystack="$1" needle="$2"
+  if echo "$haystack" | grep -q "$needle"; then
+    return 0
+  else
+    echo "Expected to contain: $needle" >&2
+    echo "Got: $haystack" >&2
+    return 1
+  fi
+}
+
+assert_json_field() {
+  local json="$1" field="$2"
+  if echo "$json" | jq -e ".$field" >/dev/null 2>&1; then
+    return 0
+  else
+    echo "Missing JSON field: $field" >&2
+    return 1
+  fi
+}
+
+# ============================================================
+# Phase 1: Environment check
+# ============================================================
+echo "" >&2
+echo "=== Phase 1: Environment ===" >&2
+
+run_test "uwf --version" bash -c "$UWF --version"
+
+# ============================================================
+# Phase 2: Setup (non-interactive)
+# ============================================================
+echo "" >&2
+echo "=== Phase 2: Setup ===" >&2
+
+if [ -n "$PROVIDER" ] && [ -n "$MODEL" ] && [ -n "$API_KEY" ]; then
+  SETUP_CMD="$UWF setup --provider $PROVIDER --base-url https://api.openai.com/v1 --api-key $API_KEY --model $MODEL"
+  if [ -n "$AGENT" ]; then
+    SETUP_CMD="$SETUP_CMD --agent $AGENT"
+  fi
+  run_test "uwf setup (non-interactive)" bash -c "$SETUP_CMD"
+else
+  # Copy host config if available
+  if [ -f "$HOME/.uncaged/workflow/config.yaml" ]; then
+    cp "$HOME/.uncaged/workflow/config.yaml" "$UNCAGED_WORKFLOW_STORAGE_ROOT/config.yaml"
+    echo "  Copied host config.yaml" >&2
+  fi
+fi
+
+# Test config commands
+OUT=$(run_test "uwf config list" bash -c "$UWF config list")
+run_test "config list is valid JSON" bash -c "echo '$OUT' | jq . >/dev/null"
+
+# ============================================================
+# Phase 3: Workflow registration
+# ============================================================
+echo "" >&2
+echo "=== Phase 3: Workflow registration ===" >&2
+
+# Use the example workflow
+EXAMPLE_WF="$REPO_DIR/examples/solve-issue.yaml"
+if [ ! -f "$EXAMPLE_WF" ]; then
+  echo "No example workflow found, creating minimal test workflow" >&2
+  EXAMPLE_WF="/tmp/test-workflow.yaml"
+  cat > "$EXAMPLE_WF" << 'WF'
+name: test-e2e
+roles:
+  worker:
+    goal: "Respond to the prompt with a brief answer."
+    outputSchema:
+      type: object
+      required: ["$status", "answer"]
+      properties:
+        $status:
+          type: string
+          enum: ["done"]
+        answer:
+          type: string
+graph:
+  - from: $START
+    to: worker
+  - from: worker
+    condition:
+      $status: done
+    to: $END
+WF
+fi
+
+OUT=$(run_test "uwf workflow add" bash -c "$UWF workflow add $EXAMPLE_WF")
+run_test "workflow add returns hash" bash -c "echo '$OUT' | jq -e '.hash'"
+
+OUT=$(run_test "uwf workflow list" bash -c "$UWF workflow list")
+run_test "workflow list is non-empty" bash -c "echo '$OUT' | jq -e 'length > 0'"
+
+# Get workflow name
+WF_NAME=$(echo "$OUT" | jq -r '.[0].name // empty')
+run_test "workflow has a name" bash -c "[ -n '$WF_NAME' ]"
+
+OUT=$(run_test "uwf workflow show" bash -c "$UWF workflow show $WF_NAME")
+run_test "workflow show returns roles" bash -c "echo '$OUT' | jq -e '.payload.roles'"
+
+# ============================================================
+# Phase 4: Thread lifecycle
+# ============================================================
+echo "" >&2
+echo "=== Phase 4: Thread lifecycle ===" >&2
+
+# Start a thread
+OUT=$(run_test "uwf thread start" bash -c "$UWF thread start $WF_NAME -p 'E2E test: what is 2+2?'")
+THREAD_ID=$(echo "$OUT" | jq -r '.thread // empty')
+run_test "thread start returns thread ID" bash -c "[ -n '$THREAD_ID' ]"
+
+# List threads
+OUT=$(run_test "uwf thread list" bash -c "$UWF thread list")
+run_test "thread appears in list" bash -c "echo '$OUT' | jq -e '.[] | select(.thread==\"$THREAD_ID\")'"
+
+# Show thread
+OUT=$(run_test "uwf thread show" bash -c "$UWF thread show $THREAD_ID")
+run_test "thread show returns head" bash -c "echo '$OUT' | jq -e '.head'"
+
+# Execute one step
+EXEC_ARGS=""
+if [ -n "$AGENT" ]; then
+  EXEC_ARGS="--agent $AGENT"
+fi
+OUT=$(run_test "uwf thread exec (1 step)" bash -c "$UWF thread exec $THREAD_ID $EXEC_ARGS")
+run_test "thread exec returns step info" bash -c "echo '$OUT' | jq -e '.head'"
+
+# ============================================================
+# Phase 5: Read & Inspect
+# ============================================================
+echo "" >&2
+echo "=== Phase 5: Read & Inspect ===" >&2
+
+# Step list
+OUT=$(run_test "uwf step list" bash -c "$UWF step list $THREAD_ID")
+STEP_COUNT=$(echo "$OUT" | jq '.steps | length')
+run_test "step list has steps" bash -c "[ $STEP_COUNT -gt 1 ]"
+
+# Get last step hash
+LAST_STEP=$(echo "$OUT" | jq -r '.steps[-1].hash // empty')
+run_test "last step has hash" bash -c "[ -n '$LAST_STEP' ]"
+
+# Step show
+if [ -n "$LAST_STEP" ]; then
+  OUT=$(run_test "uwf step show" bash -c "$UWF step show $LAST_STEP")
+  run_test "step show returns role" bash -c "echo '$OUT' | jq -e '.role'"
+fi
+
+# Thread read
+OUT=$(run_test "uwf thread read" bash -c "$UWF thread read $THREAD_ID")
+run_test "thread read produces output" bash -c "[ -n '$OUT' ]"
+
+# CAS operations
+if [ -n "$LAST_STEP" ]; then
+  OUT=$(run_test "uwf cas get" bash -c "$UWF cas get $LAST_STEP")
+  run_test "cas get returns type" bash -c "echo '$OUT' | jq -e '.type'"
+
+  OUT=$(run_test "uwf cas has" bash -c "$UWF cas has $LAST_STEP")
+
+  OUT=$(run_test "uwf cas refs" bash -c "$UWF cas refs $LAST_STEP")
+
+  OUT=$(run_test "uwf cas walk" bash -c "$UWF cas walk $LAST_STEP")
+  run_test "cas walk returns nodes" bash -c "echo '$OUT' | jq -e 'length > 0'"
+fi
+
+# ============================================================
+# Phase 6: Cancel & Fork
+# ============================================================
+echo "" >&2
+echo "=== Phase 6: Cancel & Fork ===" >&2
+
+# Start a second thread for cancel test
+OUT=$(run_test "thread start (for cancel)" bash -c "$UWF thread start $WF_NAME -p 'E2E cancel test'")
+CANCEL_THREAD=$(echo "$OUT" | jq -r '.thread // empty')
+
+if [ -n "$CANCEL_THREAD" ]; then
+  OUT=$(run_test "uwf thread cancel" bash -c "$UWF thread cancel $CANCEL_THREAD")
+  run_test "cancelled thread status" bash -c "$UWF thread list --status completed | jq -e '.[] | select(.thread==\"$CANCEL_THREAD\")'"
+fi
+
+# Fork from the first thread's last step
+if [ -n "$LAST_STEP" ]; then
+  OUT=$(run_test "uwf step fork" bash -c "$UWF step fork $LAST_STEP")
+  FORK_THREAD=$(echo "$OUT" | jq -r '.thread // empty')
+  run_test "fork creates new thread" bash -c "[ -n '$FORK_THREAD' ] && [ '$FORK_THREAD' != '$THREAD_ID' ]"
+fi
+
+# ============================================================
+# Phase 7: Log inspection
+# ============================================================
+echo "" >&2
+echo "=== Phase 7: Logs ===" >&2
+
+OUT=$(run_test "uwf log list" bash -c "$UWF log list")
+OUT=$(run_test "uwf log show" bash -c "$UWF log show --thread $THREAD_ID 2>&1 || true")
+
+# ============================================================
+# Phase 8: Config operations
+# ============================================================
+echo "" >&2
+echo "=== Phase 8: Config get/set ===" >&2
+
+OUT=$(run_test "uwf config get defaultAgent" bash -c "$UWF config get defaultAgent")
+OUT=$(run_test "uwf config set (test key)" bash -c "$UWF config set models.test.name test-model")
+OUT=$(run_test "uwf config get (verify set)" bash -c "$UWF config get models.test.name")
+run_test "config set value persisted" bash -c "echo '$OUT' | grep -q 'test-model'"
+
+# ============================================================
+# Report
+# ============================================================
+echo "" >&2
+echo "=== Results ===" >&2
+echo "Pass: $PASS  Fail: $FAIL" >&2
+
+# JSON report
+echo "{"
+echo "  \"pass\": $PASS,"
+echo "  \"fail\": $FAIL,"
+echo "  \"agent\": \"$AGENT\","
+echo "  \"tests\": [$(IFS=,; echo "${RESULTS[*]}")]"
+echo "}"
+
+[ $FAIL -eq 0 ]
+INNER_SCRIPT
+
+chmod +x "$E2E_DIR/run.sh"
+
+# --- Run in Docker ---
+echo "Starting Docker container..." >&2
+
+# --- Build bootstrap script (runs first inside container) ---
+cat > "$E2E_DIR/bootstrap.sh" << BOOTSTRAP
+#!/usr/bin/env bash
+set -uo pipefail
+echo "Installing jq..." >&2
+apt-get update -qq >&2 && apt-get install -y -qq jq >&2
+echo "jq installed" >&2
+
+# All tools come from host via mount
+export HOME='$HOME'
+export PATH="$HOME/.bun/bin:$HOME/.hermes/hermes-agent/venv/bin:$HOME/.local/share/npm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+
+# Ensure bun modules are resolved for this environment
+cd '$REPO_DIR'
+echo "Running bun install..." >&2
+which bun >&2
+bun install 2>&1 | tail -3 >&2
+echo "bun install done" >&2
+
+# Run E2E (pass HOME explicitly as 6th arg)
+bash /e2e/run.sh '$REPO_DIR' '$AGENT' '$PROVIDER' '$MODEL' '$API_KEY' '$HOME'
+BOOTSTRAP
+chmod +x "$E2E_DIR/bootstrap.sh"
+
+docker run --rm \
+  --name "$CONTAINER_NAME" \
+  -v "$HOME:$HOME" \
+  -v "$E2E_DIR:/e2e" \
+  -e HOME="$HOME" \
+  -w "$REPO_DIR" \
+  node:22-bookworm \
+  bash /e2e/bootstrap.sh