test: add E2E tests for suspend/resume, --count, mustache, and completed resume (#33)

4 new E2E scenarios: 4. $SUSPEND → resume lifecycle (suspendedRole/suspendMessage metadata) 5. --count 3 runs entire pipeline in one invocation 6. mustache template variables rendered into edgePrompt 7. completed thread resume (衔尾蛇: end → start, CAS chain preserved) Total: 7 E2E scenarios, all passing.
2026-06-04 09:03:01 +00:00
parent cd7e4e77ff
commit 974c2b8f1b
8 changed files with 354 additions and 2 deletions
@@ -106,9 +106,13 @@ async function addWorkflow(workflowFixture: string, workflowName: string): Promi
 type ExecResult = { stdout: string; stderr: string; exitCode: number };
-function runExec(threadId: string): ExecResult {
+function runExec(threadId: string, count: number | null = null): ExecResult {
  const args = [CLI_PATH, "thread", "exec", threadId];
  if (count !== null) {
    args.push("--count", String(count));
  }
  try {
-    const stdout = execFileSync(process.execPath, [CLI_PATH, "thread", "exec", threadId], {
+    const stdout = execFileSync(process.execPath, args, {
      encoding: "utf8",
      stdio: ["ignore", "pipe", "pipe"],
      env: { ...process.env, UWF_HOME: uwfHome, OCAS_HOME: casDir },
@@ -126,11 +130,38 @@ function runExec(threadId: string): ExecResult {
  }
 }
 /** Invoke `uwf thread resume <threadId> -p <prompt>` through the built CLI. */
 function runResume(threadId: string, prompt: string): ExecResult {
  try {
    const stdout = execFileSync(
      process.execPath,
      [CLI_PATH, "thread", "resume", threadId, "-p", prompt],
      {
        encoding: "utf8",
        stdio: ["ignore", "pipe", "pipe"],
        env: { ...process.env, UWF_HOME: uwfHome, OCAS_HOME: casDir },
        cwd: tmpDir,
        timeout: 30000,
      },
    );
    return { stdout, stderr: "", exitCode: 0 };
  } catch (e: unknown) {
    const err = e as NodeJS.ErrnoException & {
      stdout?: string;
      stderr?: string;
      status?: number;
    };
    return { stdout: err.stdout ?? "", stderr: err.stderr ?? "", exitCode: err.status ?? 1 };
  }
 }
 type StepOutputJson = {
  thread: string;
  head: string;
  status: string;
  currentRole: string | null;
  suspendedRole: string | null;
  suspendMessage: string | null;
  done: boolean;
 };
@@ -293,4 +324,159 @@ describe("E2E mock-agent: full uwf pipeline", () => {
    expect(entry!.status).not.toBe("completed");
    expect(entry!.head).toBe(step1.head);
  });
  test("4. planner $SUSPEND then resume re-runs planner and reaches $END", async () => {
    await writeMockConfig("e2e-suspend.mock.yaml");
    const workflowHash = await addWorkflow("e2e-suspend.workflow.yaml", "test-suspend");
    const start = await cmdThreadStart(uwfHome, workflowHash, "Analyze the task", uwfHome, tmpDir);
    const threadId = start.thread;
    // Step 1 → planner emits insufficient_info → thread suspends.
    const step1 = execStep(threadId);
    expect(step1.status).toBe("suspended");
    expect(step1.done).toBe(false);
    expect(step1.currentRole).toBeNull();
    expect(step1.suspendedRole).toBe("planner");
    expect(step1.suspendMessage).toBe("Need more info: missing requirements");
    // Thread index entry reflects the suspension with rendered metadata.
    const suspendedEntry = getThread((await createUwfStore(uwfHome)).varStore, threadId);
    expect(suspendedEntry).not.toBeNull();
    expect(suspendedEntry!.status).toBe("suspended");
    expect(suspendedEntry!.suspendedRole).toBe("planner");
    expect(suspendedEntry!.suspendMessage).toBe("Need more info: missing requirements");
    // Resume re-runs the planner role; the second scripted step is `ready` → $END.
    const resume = runResume(threadId, "Here are the requirements");
    expect(resume.exitCode).toBe(0);
    const resumeOut = JSON.parse(resume.stdout.trim()) as StepOutputJson;
    expect(resumeOut.status).toBe("completed");
    expect(resumeOut.done).toBe(true);
    expect(resumeOut.currentRole).toBeNull();
    expect(resumeOut.suspendedRole).toBeNull();
    // CAS chain: suspended planner step → resumed planner step.
    const store = await openStore(casDir);
    const s1 = getStepNode(store, step1.head);
    const s2 = getStepNode(store, resumeOut.head);
    expect(s1.role).toBe("planner");
    expect(s2.role).toBe("planner");
    expect(s2.prev).toBe(step1.head);
    expect(getStatus(store, s1.output)).toBe("insufficient_info");
    expect(getStatus(store, s2.output)).toBe("ready");
    const finalEntry = getThread((await createUwfStore(uwfHome)).varStore, threadId);
    expect(finalEntry).not.toBeNull();
    expect(finalEntry!.status).toBe("completed");
    expect(finalEntry!.head).toBe(resumeOut.head);
  });
  test("5. --count 3 runs the whole linear pipeline in one invocation", async () => {
    await writeMockConfig("e2e-count.mock.yaml");
    const workflowHash = await addWorkflow("e2e-count.workflow.yaml", "test-count");
    const start = await cmdThreadStart(uwfHome, workflowHash, "Ship the feature", uwfHome, tmpDir);
    const threadId = start.thread;
    // Single invocation with --count 3 → moderator drives analyst → developer → reviewer → $END.
    const { stdout, stderr, exitCode } = runExec(threadId, 3);
    expect(exitCode, `stderr: ${stderr}`).toBe(0);
    // Multi-step exec emits a JSON array (one entry per executed step).
    const results = JSON.parse(stdout.trim()) as StepOutputJson[];
    expect(Array.isArray(results)).toBe(true);
    expect(results).toHaveLength(3);
    expect(results[0].status).toBe("idle");
    expect(results[0].currentRole).toBe("developer");
    expect(results[1].status).toBe("idle");
    expect(results[1].currentRole).toBe("reviewer");
    expect(results[2].status).toBe("completed");
    expect(results[2].done).toBe(true);
    // Verify the CAS chain holds 3 step nodes in the correct order.
    const store = await openStore(casDir);
    const n1 = getStepNode(store, results[0].head);
    const n2 = getStepNode(store, results[1].head);
    const n3 = getStepNode(store, results[2].head);
    expect([n1.role, n2.role, n3.role]).toEqual(["analyst", "developer", "reviewer"]);
    expect(n1.prev).toBeNull();
    expect(n2.prev).toBe(results[0].head);
    expect(n3.prev).toBe(results[1].head);
    expect(new Set([n1.start, n2.start, n3.start]).size).toBe(1);
    const finalEntry = getThread((await createUwfStore(uwfHome)).varStore, threadId);
    expect(finalEntry).not.toBeNull();
    expect(finalEntry!.status).toBe("completed");
    expect(finalEntry!.head).toBe(results[2].head);
  });
  test("6. mustache edge prompt renders planner variables into the worker step", async () => {
    await writeMockConfig("e2e-mustache.mock.yaml");
    const workflowHash = await addWorkflow("e2e-mustache.workflow.yaml", "test-mustache");
    const start = await cmdThreadStart(uwfHome, workflowHash, "Plan the task", uwfHome, tmpDir);
    const threadId = start.thread;
    // Step 1 → planner emits branch + repoPath.
    const step1 = execStep(threadId);
    expect(step1.status).toBe("idle");
    expect(step1.currentRole).toBe("worker");
    // Step 2 → worker; the moderator renders the templated edge prompt before spawning it.
    const step2 = execStep(threadId);
    expect(step2.done).toBe(true);
    expect(step2.status).toBe("completed");
    const store = await openStore(casDir);
    const plannerStep = getStepNode(store, step1.head);
    expect(getStatus(store, plannerStep.output)).toBe("ready");
    // The worker step's edgePrompt is the mustache-rendered template.
    const workerStep = getStepNode(store, step2.head);
    expect(workerStep.role).toBe("worker");
    expect(workerStep.edgePrompt).toContain("fix/42-auth");
    expect(workerStep.edgePrompt).toContain("/tmp/my-repo");
    expect(workerStep.edgePrompt).toBe("Work on branch fix/42-auth in /tmp/my-repo");
  });
  test("7. completed thread can be resumed (衔尾蛇: end → start)", async () => {
    // Reuse the suspend workflow (planner with ready → $END), but mock data
    // goes straight to ready on first run, then ready again after resume.
    await writeMockConfig("e2e-completed-resume.mock.yaml");
    const workflowHash = await addWorkflow("e2e-suspend.workflow.yaml", "test-suspend");
    const start = await cmdThreadStart(uwfHome, workflowHash, "Do the work", uwfHome, tmpDir);
    const threadId = start.thread;
    // Step 1: planner outputs ready → $END → thread completed.
    const step1 = execStep(threadId);
    expect(step1.done).toBe(true);
    expect(step1.status).toBe("completed");
    const uwf1 = await createUwfStore(uwfHome);
    const entry1 = getThread(uwf1.varStore, threadId);
    expect(entry1).not.toBeNull();
    expect(entry1!.status).toBe("completed");
    // Resume the completed thread — should re-evaluate $START → planner.
    const resumeResult = runResume(threadId, "Additional context for round 2");
    expect(resumeResult.exitCode).toBe(0);
    // After resume step, planner ran again (step index 1 in mock) → ready → $END.
    const uwf2 = await createUwfStore(uwfHome);
    const entry2 = getThread(uwf2.varStore, threadId);
    expect(entry2).not.toBeNull();
    expect(entry2!.status).toBe("completed");
    // Head should have advanced (not the same as step1).
    expect(entry2!.head).not.toBe(step1.head);
    // CAS chain: step2.prev === step1 head (chain is preserved across resume).
    const store = await openStore(casDir);
    const resumeOutput = JSON.parse(resumeResult.stdout.trim());
    const step2Node = getStepNode(store, resumeOutput.head);
    expect(step2Node.role).toBe("planner");
    expect(step2Node.prev).toBe(step1.head);
  });
 });
@@ -0,0 +1,15 @@
 steps:
  # Step 0: planner → ready → $END (thread completes)
  - role: planner
    output: |
      ---
      $status: ready
      ---
      Initial plan complete.
  # Step 1: after resume, planner runs again from $START → ready → $END again
  - role: planner
    output: |
      ---
      $status: ready
      ---
      Revised plan after resume.
@@ -0,0 +1,19 @@
 steps:
  - role: analyst
    output: |
      ---
      $status: analyzed
      ---
      Analysis complete.
  - role: developer
    output: |
      ---
      $status: implemented
      ---
      Implementation complete.
  - role: reviewer
    output: |
      ---
      $status: approved
      ---
      Approved.
@@ -0,0 +1,45 @@
 name: test-count
 description: 3-step linear pipeline (analyst -> developer -> reviewer -> $END)
 roles:
  analyst:
    description: Analyzes the task
    goal: Analyze the task
    capabilities: []
    procedure: Analyze it
    output: Output the analysis and set $status to analyzed
    frontmatter:
      oneOf:
        - properties:
            $status: { const: analyzed }
          required: [$status]
  developer:
    description: Implements the change
    goal: Implement the change
    capabilities: []
    procedure: Write code
    output: Output the implementation and set $status to implemented
    frontmatter:
      oneOf:
        - properties:
            $status: { const: implemented }
          required: [$status]
  reviewer:
    description: Reviews the change
    goal: Review the change
    capabilities: []
    procedure: Review code
    output: Approve and set $status to approved
    frontmatter:
      oneOf:
        - properties:
            $status: { const: approved }
          required: [$status]
 graph:
  $START:
    _: { role: analyst, prompt: 'Analyze the task' }
  analyst:
    analyzed: { role: developer, prompt: 'Implement the change' }
  developer:
    implemented: { role: reviewer, prompt: 'Review the change' }
  reviewer:
    approved: { role: '$END', prompt: 'Done' }
@@ -0,0 +1,15 @@
 steps:
  - role: planner
    output: |
      ---
      $status: ready
      branch: fix/42-auth
      repoPath: /tmp/my-repo
      ---
      Planned the work.
  - role: worker
    output: |
      ---
      $status: done
      ---
      Work complete.
@@ -0,0 +1,34 @@
 name: test-mustache
 description: Planner emits template variables consumed by the worker edge prompt
 roles:
  planner:
    description: Plans work and emits branch + repo path
    goal: Plan the task
    capabilities: []
    procedure: Decide the branch and repo path
    output: Set $status to ready and emit branch and repoPath
    frontmatter:
      oneOf:
        - properties:
            $status: { const: ready }
            branch: { type: string }
            repoPath: { type: string }
          required: [$status, branch, repoPath]
  worker:
    description: Works on the planned branch
    goal: Do the work
    capabilities: []
    procedure: Do it
    output: Output the result and set $status to done
    frontmatter:
      oneOf:
        - properties:
            $status: { const: done }
          required: [$status]
 graph:
  $START:
    _: { role: planner, prompt: 'Plan the task' }
  planner:
    ready: { role: worker, prompt: 'Work on branch {{{branch}}} in {{{repoPath}}}' }
  worker:
    done: { role: '$END', prompt: 'Complete' }
@@ -0,0 +1,14 @@
 steps:
  - role: planner
    output: |
      ---
      $status: insufficient_info
      reason: missing requirements
      ---
      I need more information before I can plan this.
  - role: planner
    output: |
      ---
      $status: ready
      ---
      I now have what I need. Ready to proceed.
@@ -0,0 +1,24 @@
 name: test-suspend
 description: Planner can suspend for more info or finish when ready
 roles:
  planner:
    description: Plans work and may request more info
    goal: Analyze the task
    capabilities: []
    procedure: Analyze the task and decide if more info is needed
    output: Set $status to insufficient_info (with reason) or ready
    frontmatter:
      oneOf:
        - properties:
            $status: { const: insufficient_info }
            reason: { type: string }
          required: [$status, reason]
        - properties:
            $status: { const: ready }
          required: [$status]
 graph:
  $START:
    _: { role: planner, prompt: 'Analyze the task' }
  planner:
    insufficient_info: { role: '$SUSPEND', prompt: 'Need more info: {{{reason}}}' }
    ready: { role: '$END', prompt: 'Done' }