diff --git a/packages/cli/src/__tests__/e2e-mock-agent.test.ts b/packages/cli/src/__tests__/e2e-mock-agent.test.ts index e71e488..aac5687 100644 --- a/packages/cli/src/__tests__/e2e-mock-agent.test.ts +++ b/packages/cli/src/__tests__/e2e-mock-agent.test.ts @@ -106,9 +106,13 @@ async function addWorkflow(workflowFixture: string, workflowName: string): Promi type ExecResult = { stdout: string; stderr: string; exitCode: number }; -function runExec(threadId: string): ExecResult { +function runExec(threadId: string, count: number | null = null): ExecResult { + const args = [CLI_PATH, "thread", "exec", threadId]; + if (count !== null) { + args.push("--count", String(count)); + } try { - const stdout = execFileSync(process.execPath, [CLI_PATH, "thread", "exec", threadId], { + const stdout = execFileSync(process.execPath, args, { encoding: "utf8", stdio: ["ignore", "pipe", "pipe"], env: { ...process.env, UWF_HOME: uwfHome, OCAS_HOME: casDir }, @@ -126,11 +130,38 @@ function runExec(threadId: string): ExecResult { } } +/** Invoke `uwf thread resume -p ` through the built CLI. */ +function runResume(threadId: string, prompt: string): ExecResult { + try { + const stdout = execFileSync( + process.execPath, + [CLI_PATH, "thread", "resume", threadId, "-p", prompt], + { + encoding: "utf8", + stdio: ["ignore", "pipe", "pipe"], + env: { ...process.env, UWF_HOME: uwfHome, OCAS_HOME: casDir }, + cwd: tmpDir, + timeout: 30000, + }, + ); + return { stdout, stderr: "", exitCode: 0 }; + } catch (e: unknown) { + const err = e as NodeJS.ErrnoException & { + stdout?: string; + stderr?: string; + status?: number; + }; + return { stdout: err.stdout ?? "", stderr: err.stderr ?? "", exitCode: err.status ?? 1 }; + } +} + type StepOutputJson = { thread: string; head: string; status: string; currentRole: string | null; + suspendedRole: string | null; + suspendMessage: string | null; done: boolean; }; @@ -293,4 +324,159 @@ describe("E2E mock-agent: full uwf pipeline", () => { expect(entry!.status).not.toBe("completed"); expect(entry!.head).toBe(step1.head); }); + + test("4. planner $SUSPEND then resume re-runs planner and reaches $END", async () => { + await writeMockConfig("e2e-suspend.mock.yaml"); + const workflowHash = await addWorkflow("e2e-suspend.workflow.yaml", "test-suspend"); + + const start = await cmdThreadStart(uwfHome, workflowHash, "Analyze the task", uwfHome, tmpDir); + const threadId = start.thread; + + // Step 1 → planner emits insufficient_info → thread suspends. + const step1 = execStep(threadId); + expect(step1.status).toBe("suspended"); + expect(step1.done).toBe(false); + expect(step1.currentRole).toBeNull(); + expect(step1.suspendedRole).toBe("planner"); + expect(step1.suspendMessage).toBe("Need more info: missing requirements"); + + // Thread index entry reflects the suspension with rendered metadata. + const suspendedEntry = getThread((await createUwfStore(uwfHome)).varStore, threadId); + expect(suspendedEntry).not.toBeNull(); + expect(suspendedEntry!.status).toBe("suspended"); + expect(suspendedEntry!.suspendedRole).toBe("planner"); + expect(suspendedEntry!.suspendMessage).toBe("Need more info: missing requirements"); + + // Resume re-runs the planner role; the second scripted step is `ready` → $END. + const resume = runResume(threadId, "Here are the requirements"); + expect(resume.exitCode).toBe(0); + const resumeOut = JSON.parse(resume.stdout.trim()) as StepOutputJson; + expect(resumeOut.status).toBe("completed"); + expect(resumeOut.done).toBe(true); + expect(resumeOut.currentRole).toBeNull(); + expect(resumeOut.suspendedRole).toBeNull(); + + // CAS chain: suspended planner step → resumed planner step. + const store = await openStore(casDir); + const s1 = getStepNode(store, step1.head); + const s2 = getStepNode(store, resumeOut.head); + expect(s1.role).toBe("planner"); + expect(s2.role).toBe("planner"); + expect(s2.prev).toBe(step1.head); + expect(getStatus(store, s1.output)).toBe("insufficient_info"); + expect(getStatus(store, s2.output)).toBe("ready"); + + const finalEntry = getThread((await createUwfStore(uwfHome)).varStore, threadId); + expect(finalEntry).not.toBeNull(); + expect(finalEntry!.status).toBe("completed"); + expect(finalEntry!.head).toBe(resumeOut.head); + }); + + test("5. --count 3 runs the whole linear pipeline in one invocation", async () => { + await writeMockConfig("e2e-count.mock.yaml"); + const workflowHash = await addWorkflow("e2e-count.workflow.yaml", "test-count"); + + const start = await cmdThreadStart(uwfHome, workflowHash, "Ship the feature", uwfHome, tmpDir); + const threadId = start.thread; + + // Single invocation with --count 3 → moderator drives analyst → developer → reviewer → $END. + const { stdout, stderr, exitCode } = runExec(threadId, 3); + expect(exitCode, `stderr: ${stderr}`).toBe(0); + + // Multi-step exec emits a JSON array (one entry per executed step). + const results = JSON.parse(stdout.trim()) as StepOutputJson[]; + expect(Array.isArray(results)).toBe(true); + expect(results).toHaveLength(3); + + expect(results[0].status).toBe("idle"); + expect(results[0].currentRole).toBe("developer"); + expect(results[1].status).toBe("idle"); + expect(results[1].currentRole).toBe("reviewer"); + expect(results[2].status).toBe("completed"); + expect(results[2].done).toBe(true); + + // Verify the CAS chain holds 3 step nodes in the correct order. + const store = await openStore(casDir); + const n1 = getStepNode(store, results[0].head); + const n2 = getStepNode(store, results[1].head); + const n3 = getStepNode(store, results[2].head); + expect([n1.role, n2.role, n3.role]).toEqual(["analyst", "developer", "reviewer"]); + expect(n1.prev).toBeNull(); + expect(n2.prev).toBe(results[0].head); + expect(n3.prev).toBe(results[1].head); + expect(new Set([n1.start, n2.start, n3.start]).size).toBe(1); + + const finalEntry = getThread((await createUwfStore(uwfHome)).varStore, threadId); + expect(finalEntry).not.toBeNull(); + expect(finalEntry!.status).toBe("completed"); + expect(finalEntry!.head).toBe(results[2].head); + }); + + test("6. mustache edge prompt renders planner variables into the worker step", async () => { + await writeMockConfig("e2e-mustache.mock.yaml"); + const workflowHash = await addWorkflow("e2e-mustache.workflow.yaml", "test-mustache"); + + const start = await cmdThreadStart(uwfHome, workflowHash, "Plan the task", uwfHome, tmpDir); + const threadId = start.thread; + + // Step 1 → planner emits branch + repoPath. + const step1 = execStep(threadId); + expect(step1.status).toBe("idle"); + expect(step1.currentRole).toBe("worker"); + + // Step 2 → worker; the moderator renders the templated edge prompt before spawning it. + const step2 = execStep(threadId); + expect(step2.done).toBe(true); + expect(step2.status).toBe("completed"); + + const store = await openStore(casDir); + const plannerStep = getStepNode(store, step1.head); + expect(getStatus(store, plannerStep.output)).toBe("ready"); + + // The worker step's edgePrompt is the mustache-rendered template. + const workerStep = getStepNode(store, step2.head); + expect(workerStep.role).toBe("worker"); + expect(workerStep.edgePrompt).toContain("fix/42-auth"); + expect(workerStep.edgePrompt).toContain("/tmp/my-repo"); + expect(workerStep.edgePrompt).toBe("Work on branch fix/42-auth in /tmp/my-repo"); + }); + + test("7. completed thread can be resumed (衔尾蛇: end → start)", async () => { + // Reuse the suspend workflow (planner with ready → $END), but mock data + // goes straight to ready on first run, then ready again after resume. + await writeMockConfig("e2e-completed-resume.mock.yaml"); + const workflowHash = await addWorkflow("e2e-suspend.workflow.yaml", "test-suspend"); + + const start = await cmdThreadStart(uwfHome, workflowHash, "Do the work", uwfHome, tmpDir); + const threadId = start.thread; + + // Step 1: planner outputs ready → $END → thread completed. + const step1 = execStep(threadId); + expect(step1.done).toBe(true); + expect(step1.status).toBe("completed"); + + const uwf1 = await createUwfStore(uwfHome); + const entry1 = getThread(uwf1.varStore, threadId); + expect(entry1).not.toBeNull(); + expect(entry1!.status).toBe("completed"); + + // Resume the completed thread — should re-evaluate $START → planner. + const resumeResult = runResume(threadId, "Additional context for round 2"); + expect(resumeResult.exitCode).toBe(0); + + // After resume step, planner ran again (step index 1 in mock) → ready → $END. + const uwf2 = await createUwfStore(uwfHome); + const entry2 = getThread(uwf2.varStore, threadId); + expect(entry2).not.toBeNull(); + expect(entry2!.status).toBe("completed"); + // Head should have advanced (not the same as step1). + expect(entry2!.head).not.toBe(step1.head); + + // CAS chain: step2.prev === step1 head (chain is preserved across resume). + const store = await openStore(casDir); + const resumeOutput = JSON.parse(resumeResult.stdout.trim()); + const step2Node = getStepNode(store, resumeOutput.head); + expect(step2Node.role).toBe("planner"); + expect(step2Node.prev).toBe(step1.head); + }); }); diff --git a/packages/cli/src/__tests__/fixtures/e2e-completed-resume.mock.yaml b/packages/cli/src/__tests__/fixtures/e2e-completed-resume.mock.yaml new file mode 100644 index 0000000..2d3eedf --- /dev/null +++ b/packages/cli/src/__tests__/fixtures/e2e-completed-resume.mock.yaml @@ -0,0 +1,15 @@ +steps: + # Step 0: planner → ready → $END (thread completes) + - role: planner + output: | + --- + $status: ready + --- + Initial plan complete. + # Step 1: after resume, planner runs again from $START → ready → $END again + - role: planner + output: | + --- + $status: ready + --- + Revised plan after resume. diff --git a/packages/cli/src/__tests__/fixtures/e2e-count.mock.yaml b/packages/cli/src/__tests__/fixtures/e2e-count.mock.yaml new file mode 100644 index 0000000..4f897b0 --- /dev/null +++ b/packages/cli/src/__tests__/fixtures/e2e-count.mock.yaml @@ -0,0 +1,19 @@ +steps: + - role: analyst + output: | + --- + $status: analyzed + --- + Analysis complete. + - role: developer + output: | + --- + $status: implemented + --- + Implementation complete. + - role: reviewer + output: | + --- + $status: approved + --- + Approved. diff --git a/packages/cli/src/__tests__/fixtures/e2e-count.workflow.yaml b/packages/cli/src/__tests__/fixtures/e2e-count.workflow.yaml new file mode 100644 index 0000000..41848e1 --- /dev/null +++ b/packages/cli/src/__tests__/fixtures/e2e-count.workflow.yaml @@ -0,0 +1,45 @@ +name: test-count +description: 3-step linear pipeline (analyst -> developer -> reviewer -> $END) +roles: + analyst: + description: Analyzes the task + goal: Analyze the task + capabilities: [] + procedure: Analyze it + output: Output the analysis and set $status to analyzed + frontmatter: + oneOf: + - properties: + $status: { const: analyzed } + required: [$status] + developer: + description: Implements the change + goal: Implement the change + capabilities: [] + procedure: Write code + output: Output the implementation and set $status to implemented + frontmatter: + oneOf: + - properties: + $status: { const: implemented } + required: [$status] + reviewer: + description: Reviews the change + goal: Review the change + capabilities: [] + procedure: Review code + output: Approve and set $status to approved + frontmatter: + oneOf: + - properties: + $status: { const: approved } + required: [$status] +graph: + $START: + _: { role: analyst, prompt: 'Analyze the task' } + analyst: + analyzed: { role: developer, prompt: 'Implement the change' } + developer: + implemented: { role: reviewer, prompt: 'Review the change' } + reviewer: + approved: { role: '$END', prompt: 'Done' } diff --git a/packages/cli/src/__tests__/fixtures/e2e-mustache.mock.yaml b/packages/cli/src/__tests__/fixtures/e2e-mustache.mock.yaml new file mode 100644 index 0000000..8f7b41b --- /dev/null +++ b/packages/cli/src/__tests__/fixtures/e2e-mustache.mock.yaml @@ -0,0 +1,15 @@ +steps: + - role: planner + output: | + --- + $status: ready + branch: fix/42-auth + repoPath: /tmp/my-repo + --- + Planned the work. + - role: worker + output: | + --- + $status: done + --- + Work complete. diff --git a/packages/cli/src/__tests__/fixtures/e2e-mustache.workflow.yaml b/packages/cli/src/__tests__/fixtures/e2e-mustache.workflow.yaml new file mode 100644 index 0000000..0d64e9b --- /dev/null +++ b/packages/cli/src/__tests__/fixtures/e2e-mustache.workflow.yaml @@ -0,0 +1,34 @@ +name: test-mustache +description: Planner emits template variables consumed by the worker edge prompt +roles: + planner: + description: Plans work and emits branch + repo path + goal: Plan the task + capabilities: [] + procedure: Decide the branch and repo path + output: Set $status to ready and emit branch and repoPath + frontmatter: + oneOf: + - properties: + $status: { const: ready } + branch: { type: string } + repoPath: { type: string } + required: [$status, branch, repoPath] + worker: + description: Works on the planned branch + goal: Do the work + capabilities: [] + procedure: Do it + output: Output the result and set $status to done + frontmatter: + oneOf: + - properties: + $status: { const: done } + required: [$status] +graph: + $START: + _: { role: planner, prompt: 'Plan the task' } + planner: + ready: { role: worker, prompt: 'Work on branch {{{branch}}} in {{{repoPath}}}' } + worker: + done: { role: '$END', prompt: 'Complete' } diff --git a/packages/cli/src/__tests__/fixtures/e2e-suspend.mock.yaml b/packages/cli/src/__tests__/fixtures/e2e-suspend.mock.yaml new file mode 100644 index 0000000..bd1fb8e --- /dev/null +++ b/packages/cli/src/__tests__/fixtures/e2e-suspend.mock.yaml @@ -0,0 +1,14 @@ +steps: + - role: planner + output: | + --- + $status: insufficient_info + reason: missing requirements + --- + I need more information before I can plan this. + - role: planner + output: | + --- + $status: ready + --- + I now have what I need. Ready to proceed. diff --git a/packages/cli/src/__tests__/fixtures/e2e-suspend.workflow.yaml b/packages/cli/src/__tests__/fixtures/e2e-suspend.workflow.yaml new file mode 100644 index 0000000..42d59b6 --- /dev/null +++ b/packages/cli/src/__tests__/fixtures/e2e-suspend.workflow.yaml @@ -0,0 +1,24 @@ +name: test-suspend +description: Planner can suspend for more info or finish when ready +roles: + planner: + description: Plans work and may request more info + goal: Analyze the task + capabilities: [] + procedure: Analyze the task and decide if more info is needed + output: Set $status to insufficient_info (with reason) or ready + frontmatter: + oneOf: + - properties: + $status: { const: insufficient_info } + reason: { type: string } + required: [$status, reason] + - properties: + $status: { const: ready } + required: [$status] +graph: + $START: + _: { role: planner, prompt: 'Analyze the task' } + planner: + insufficient_info: { role: '$SUSPEND', prompt: 'Need more info: {{{reason}}}' } + ready: { role: '$END', prompt: 'Done' }