From 96ab097198b77e79008579b9e84f8fe6f6012159 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Thu, 28 May 2026 00:09:33 +0000 Subject: [PATCH] test(#566): add A4 retry loop and C1 integration round-trip tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - A4: verify frontmatter retry loop produces correct AdapterOutput JSON - C1: full round-trip test with mock agent → CLI JSON parsing → CAS verification 小橘 --- .../__tests__/adapter-json-roundtrip.test.ts | 174 ++++++++++++++++++ .../__tests__/adapter-retry.test.ts | 72 ++++++++ 2 files changed, 246 insertions(+) create mode 100644 packages/cli-workflow/src/__tests__/adapter-json-roundtrip.test.ts create mode 100644 packages/workflow-util-agent/__tests__/adapter-retry.test.ts diff --git a/packages/cli-workflow/src/__tests__/adapter-json-roundtrip.test.ts b/packages/cli-workflow/src/__tests__/adapter-json-roundtrip.test.ts new file mode 100644 index 0000000..250241d --- /dev/null +++ b/packages/cli-workflow/src/__tests__/adapter-json-roundtrip.test.ts @@ -0,0 +1,174 @@ +import { execFileSync } from "node:child_process"; +import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { putSchema } from "@uncaged/json-cas"; +import { createFsStore } from "@uncaged/json-cas-fs"; +import type { CasRef, StepNodePayload, ThreadId } from "@uncaged/workflow-protocol"; +import { afterEach, beforeEach, describe, expect, test } from "vitest"; +import { registerUwfSchemas } from "../schemas.js"; +import { saveThreadsIndex } from "../store.js"; + +// ── schemas ────────────────────────────────────────────────────────────────── + +const OUTPUT_SCHEMA = { + type: "object" as const, + properties: { + $status: { type: "string" as const, enum: ["done", "failed"] }, + result: { type: "string" as const }, + }, + required: ["$status"], + additionalProperties: false, +}; + +// ── fixture ────────────────────────────────────────────────────────────────── + +let tmpDir: string; + +beforeEach(async () => { + tmpDir = await mkdtemp(join(tmpdir(), "cli-uwf-roundtrip-test-")); +}); + +afterEach(async () => { + await rm(tmpDir, { recursive: true, force: true }); +}); + +describe("C1: adapter JSON round-trip integration", () => { + test("mock agent outputs JSON, CLI parses it and updates thread head in CAS", async () => { + // 1. Set up CAS store with workflow, start node, and output schema + const casDir = join(tmpDir, "cas"); + await mkdir(casDir, { recursive: true }); + const store = createFsStore(casDir); + const schemas = await registerUwfSchemas(store); + + const outputSchemaHash = await putSchema(store, OUTPUT_SCHEMA); + + const workflowHash = await store.put(schemas.workflow, { + name: "test-roundtrip", + description: "roundtrip integration test", + roles: { + worker: { + description: "Worker role", + goal: "Do work", + capabilities: [], + procedure: "work", + output: "result", + frontmatter: outputSchemaHash, + }, + }, + graph: { + $START: { _: { role: "worker", prompt: "Do the work", location: null } }, + worker: { done: { role: "$END", prompt: "completed", location: null } }, + }, + }); + + const startHash = await store.put(schemas.startNode, { + workflow: workflowHash, + prompt: "Test round-trip task", + }); + + const threadId = "01ROUNDTRIPTEST0000000000" as ThreadId; + await saveThreadsIndex(tmpDir, { [threadId]: startHash }); + + // 2. Pre-create CAS nodes that the mock agent would produce + const outputHash = await store.put(outputSchemaHash, { + $status: "done", + result: "test-ok", + }); + + // Use text schema for detail (simple placeholder) + const detailHash = await store.put(schemas.text, "mock detail"); + + const startedAtMs = 1716600000000; + const completedAtMs = 1716600001500; + + const stepHash = await store.put(schemas.stepNode, { + start: startHash, + prev: null, + role: "worker", + output: outputHash, + detail: detailHash, + agent: "uwf-mock", + edgePrompt: "Do the work", + startedAtMs, + completedAtMs, + cwd: tmpDir, + }); + + // 3. Create a minimal mock agent shell script that just outputs JSON + // The step node is already in CAS — the agent just needs to print the JSON line + const mockAgentPath = join(tmpDir, "mock-agent.sh"); + const adapterJson = JSON.stringify({ + stepHash, + detailHash, + role: "worker", + frontmatter: { $status: "done", result: "test-ok" }, + body: "", + startedAtMs, + completedAtMs, + }); + await writeFile(mockAgentPath, `#!/bin/sh\necho '${adapterJson}'\n`, { mode: 0o755 }); + + // 4. Write config.yaml + const configPath = join(tmpDir, "config.yaml"); + await writeFile( + configPath, + `defaultAgent: uwf-hermes\ndefaultModel: test-model\nagentOverrides: null\nagents: {}\nproviders: {}\nmodels: {}\n`, + ); + + // 5. Run CLI with agent override pointing to our mock + const cliPath = join(import.meta.dirname, "..", "cli.js"); + let stdout: string; + let stderr: string; + let exitCode: number; + + try { + stdout = execFileSync( + "bun", + ["run", cliPath, "thread", "exec", threadId, "--agent", mockAgentPath], + { + encoding: "utf8", + stdio: ["ignore", "pipe", "pipe"], + env: { ...process.env, WORKFLOW_STORAGE_ROOT: tmpDir }, + cwd: tmpDir, + timeout: 30000, + }, + ); + stderr = ""; + exitCode = 0; + } catch (e: unknown) { + const err = e as NodeJS.ErrnoException & { + stdout?: string; + stderr?: string; + status?: number; + }; + stdout = err.stdout ?? ""; + stderr = err.stderr ?? ""; + exitCode = err.status ?? 1; + } + + // 6. Verify + if (exitCode !== 0) { + throw new Error(`CLI exited with code ${exitCode}\nstdout: ${stdout}\nstderr: ${stderr}`); + } + + // Parse CLI output + const cliOutput = JSON.parse(stdout.trim()); + expect(cliOutput).toHaveProperty("thread", threadId); + expect(cliOutput).toHaveProperty("head", stepHash); + expect(cliOutput.head).toMatch(/^[0-9A-HJ-NP-TV-Z]{13}$/); + + // Verify the CAS step node exists and has correct metadata + const storeAfter = createFsStore(casDir); + const stepNode = storeAfter.get(cliOutput.head as CasRef); + expect(stepNode).not.toBeNull(); + + const payload = stepNode!.payload as StepNodePayload; + expect(payload.role).toBe("worker"); + expect(payload.agent).toBe("uwf-mock"); + expect(payload.startedAtMs).toBe(1716600000000); + expect(payload.completedAtMs).toBe(1716600001500); + expect(payload.output).toBe(outputHash); + expect(payload.detail).toBe(detailHash); + }); +}); diff --git a/packages/workflow-util-agent/__tests__/adapter-retry.test.ts b/packages/workflow-util-agent/__tests__/adapter-retry.test.ts new file mode 100644 index 0000000..f9a35cf --- /dev/null +++ b/packages/workflow-util-agent/__tests__/adapter-retry.test.ts @@ -0,0 +1,72 @@ +import { createMemoryStore, putSchema } from "@uncaged/json-cas"; +import { describe, expect, test } from "vitest"; + +import { tryFrontmatterFastPath } from "../src/frontmatter.js"; + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +const PLANNER_SCHEMA = { + type: "object", + properties: { + $status: { type: "string", enum: ["ready", "failed"] }, + plan: { type: "string" }, + }, + required: ["$status"], + additionalProperties: false, +}; + +describe("adapter-stdout: A4 retry loop survives JSON output", () => { + test("A4. first extraction fails, second succeeds — final result has correct data", async () => { + const store = createMemoryStore(); + const schemaHash = await putSchema(store, PLANNER_SCHEMA); + + // Simulate the retry loop from createAgent (run.ts lines 163-173): + // First attempt: agent outputs garbage (no frontmatter) + const badOutput = "Here is my response without frontmatter.\nJust plain text."; + const firstAttempt = await tryFrontmatterFastPath(badOutput, schemaHash, store); + expect(firstAttempt).toBeNull(); + + // Second attempt (after correction message): agent outputs valid frontmatter + const goodOutput = `---\n$status: ready\nplan: corrected-hash\n---\nCorrected body with valid frontmatter.`; + const secondAttempt = await tryFrontmatterFastPath(goodOutput, schemaHash, store); + + expect(secondAttempt).not.toBeNull(); + expect(secondAttempt!.outputHash).toMatch(/^[0-9A-Z]{13}$/); + expect(secondAttempt!.frontmatter).toEqual({ $status: "ready", plan: "corrected-hash" }); + expect(secondAttempt!.body).toBe("Corrected body with valid frontmatter."); + + // Verify the final AdapterOutput shape would be correct + const adapterOutput = { + stepHash: "MOCK_STEP_HASH", + detailHash: "MOCK_DETAIL_HA", + role: "planner", + frontmatter: secondAttempt!.frontmatter, + body: secondAttempt!.body, + startedAtMs: 1000, + completedAtMs: 2000, + }; + + const json = JSON.stringify(adapterOutput); + const parsed = JSON.parse(json); + expect(parsed.frontmatter).toEqual({ $status: "ready", plan: "corrected-hash" }); + expect(parsed.body).toBe("Corrected body with valid frontmatter."); + expect(parsed.completedAtMs).toBeGreaterThanOrEqual(parsed.startedAtMs); + }); + + test("A4. all retries fail — extraction returns null on every attempt", async () => { + const store = createMemoryStore(); + const schemaHash = await putSchema(store, PLANNER_SCHEMA); + + const MAX_RETRIES = 2; + const badOutput = "No frontmatter here"; + + // Simulate MAX_FRONTMATTER_RETRIES iterations all failing + let extracted = await tryFrontmatterFastPath(badOutput, schemaHash, store); + for (let retry = 0; retry < MAX_RETRIES && extracted === null; retry++) { + // Each retry also gets bad output + extracted = await tryFrontmatterFastPath(badOutput, schemaHash, store); + } + + expect(extracted).toBeNull(); + }); +});