feat: agent-mock package for deterministic E2E testing (#33) #44
@@ -0,0 +1,18 @@
|
|||||||
|
steps:
|
||||||
|
- role: planner
|
||||||
|
output: |
|
||||||
|
---
|
||||||
|
$status: ready
|
||||||
|
plan: test-plan-hash
|
||||||
|
repoPath: /tmp/test-repo
|
||||||
|
---
|
||||||
|
Plan: implement the feature.
|
||||||
|
|
||||||
|
- role: developer
|
||||||
|
output: |
|
||||||
|
---
|
||||||
|
$status: done
|
||||||
|
branch: fix/1-test
|
||||||
|
worktree: /tmp/worktree
|
||||||
|
---
|
||||||
|
Implemented the feature.
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
import { readFile } from "node:fs/promises";
|
||||||
|
import { join } from "node:path";
|
||||||
|
import { describe, expect, test } from "vitest";
|
||||||
|
|
||||||
|
import { parseScenario, selectMockStep } from "../src/mock-agent.js";
|
||||||
|
|
||||||
|
const FIXTURE = join(__dirname, "fixtures", "simple-scenario.yaml");
|
||||||
|
|
||||||
|
describe("parseScenario", () => {
|
||||||
|
test("parses the 2-step fixture in order", async () => {
|
||||||
|
const scenario = parseScenario(await readFile(FIXTURE, "utf8"));
|
||||||
|
expect(scenario.steps).toHaveLength(2);
|
||||||
|
expect(scenario.steps[0].role).toBe("planner");
|
||||||
|
expect(scenario.steps[1].role).toBe("developer");
|
||||||
|
expect(scenario.steps[0].output).toContain("$status: ready");
|
||||||
|
expect(scenario.steps[1].output).toContain("branch: fix/1-test");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("rejects documents without a steps array", () => {
|
||||||
|
expect(() => parseScenario("foo: bar")).toThrow(/steps/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("rejects steps missing role or output", () => {
|
||||||
|
expect(() => parseScenario("steps:\n - role: planner")).toThrow(/role.*output/);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("selectMockStep", () => {
|
||||||
|
const scenario = {
|
||||||
|
steps: [
|
||||||
|
{ role: "planner", output: "plan-output" },
|
||||||
|
{ role: "developer", output: "dev-output" },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
test("step index counts existing steps to pick the current step", () => {
|
||||||
|
expect(selectMockStep(scenario, 0, "planner").output).toBe("plan-output");
|
||||||
|
expect(selectMockStep(scenario, 1, "developer").output).toBe("dev-output");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("throws when the moderator routes to an unexpected role", () => {
|
||||||
|
expect(() => selectMockStep(scenario, 0, "developer")).toThrow(/expected role "planner"/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("throws when the step index runs past the scripted steps", () => {
|
||||||
|
expect(() => selectMockStep(scenario, 2, "planner")).toThrow(/no step at index 2/);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,47 @@
|
|||||||
|
{
|
||||||
|
"name": "@united-workforce/agent-mock",
|
||||||
|
"version": "0.5.0",
|
||||||
|
"files": [
|
||||||
|
"src",
|
||||||
|
"dist",
|
||||||
|
"package.json"
|
||||||
|
],
|
||||||
|
"type": "module",
|
||||||
|
"bin": {
|
||||||
|
"uwf-mock": "./src/cli.ts"
|
||||||
|
},
|
||||||
|
"exports": {
|
||||||
|
".": {
|
||||||
|
"types": "./dist/index.d.ts",
|
||||||
|
"import": "./dist/index.js"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"scripts": {
|
||||||
|
"prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1",
|
||||||
|
"test": "vitest run __tests__/",
|
||||||
|
"test:ci": "vitest run __tests__/"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"@ocas/core": "^0.3.0",
|
||||||
|
"@united-workforce/protocol": "workspace:^",
|
||||||
|
"@united-workforce/util": "workspace:^",
|
||||||
|
"@united-workforce/util-agent": "workspace:^",
|
||||||
|
"yaml": "^2.9.0"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"typescript": "^5.8.3"
|
||||||
|
},
|
||||||
|
"publishConfig": {
|
||||||
|
"access": "public"
|
||||||
|
},
|
||||||
|
"repository": {
|
||||||
|
"type": "git",
|
||||||
|
"url": "https://git.shazhou.work/shazhou/united-workforce.git",
|
||||||
|
"directory": "packages/agent-mock"
|
||||||
|
},
|
||||||
|
"homepage": "https://git.shazhou.work/shazhou/united-workforce#readme",
|
||||||
|
"bugs": {
|
||||||
|
"url": "https://git.shazhou.work/shazhou/united-workforce/issues"
|
||||||
|
},
|
||||||
|
"license": "MIT"
|
||||||
|
}
|
||||||
@@ -0,0 +1,18 @@
|
|||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
import { createMockAgent } from "./mock-agent.js";
|
||||||
|
|
||||||
|
const USAGE = "usage: uwf-mock --mock-data <path> --thread <id> --role <role> --prompt <text>";
|
||||||
|
|
||||||
|
function getMockDataPath(argv: string[]): string {
|
||||||
|
const idx = argv.indexOf("--mock-data");
|
||||||
|
if (idx === -1 || idx + 1 >= argv.length || argv[idx + 1] === "") {
|
||||||
|
process.stderr.write(`--mock-data is required. ${USAGE}\n`);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
return argv[idx + 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
const mockDataPath = getMockDataPath(process.argv);
|
||||||
|
const main = createMockAgent(mockDataPath);
|
||||||
|
void main();
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
export { createMockAgent, parseScenario, selectMockStep } from "./mock-agent.js";
|
||||||
|
export type { MockScenario, MockStep } from "./types.js";
|
||||||
@@ -0,0 +1,128 @@
|
|||||||
|
import { readFile } from "node:fs/promises";
|
||||||
|
|
||||||
|
import { bootstrap, type JSONSchema, putSchema, type Store } from "@ocas/core";
|
||||||
|
import { createLogger } from "@united-workforce/util";
|
||||||
|
import { type AgentContext, type AgentRunResult, createAgent } from "@united-workforce/util-agent";
|
||||||
|
import { parse } from "yaml";
|
||||||
|
|
||||||
|
import type { MockScenario, MockStep } from "./types.js";
|
||||||
|
|
||||||
|
const log = createLogger({ sink: { kind: "stderr" } });
|
||||||
|
|
||||||
|
const MOCK_DETAIL_SCHEMA: JSONSchema = {
|
||||||
|
title: "mock-detail",
|
||||||
|
type: "object",
|
||||||
|
required: ["sessionId", "role", "stepIndex"],
|
||||||
|
properties: {
|
||||||
|
sessionId: { type: "string" },
|
||||||
|
role: { type: "string" },
|
||||||
|
stepIndex: { type: "integer" },
|
||||||
|
},
|
||||||
|
additionalProperties: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||||
|
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Parse a YAML mock data document into a {@link MockScenario}. Pure — no I/O. */
|
||||||
|
export function parseScenario(text: string): MockScenario {
|
||||||
|
const raw = parse(text) as unknown;
|
||||||
|
if (!isRecord(raw) || !Array.isArray(raw.steps)) {
|
||||||
|
throw new Error("mock data must be a mapping with a 'steps' array");
|
||||||
|
}
|
||||||
|
const steps: MockStep[] = raw.steps.map((entry, i) => {
|
||||||
|
if (!isRecord(entry) || typeof entry.role !== "string" || typeof entry.output !== "string") {
|
||||||
|
throw new Error(`mock step ${i} must have string 'role' and string 'output'`);
|
||||||
|
}
|
||||||
|
return { role: entry.role, output: entry.output };
|
||||||
|
});
|
||||||
|
return { steps };
|
||||||
|
}
|
||||||
|
|
||||||
|
async function loadScenario(path: string): Promise<MockScenario> {
|
||||||
|
const text = await readFile(path, "utf8");
|
||||||
|
return parseScenario(text);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pick the scripted step for the given index and verify the moderator routed to
|
||||||
|
* the expected role. Throws on out-of-range index or role mismatch so routing
|
||||||
|
* bugs surface loudly during E2E runs.
|
||||||
|
*/
|
||||||
|
export function selectMockStep(scenario: MockScenario, stepIndex: number, role: string): MockStep {
|
||||||
|
const step = scenario.steps[stepIndex];
|
||||||
|
if (step === undefined) {
|
||||||
|
throw new Error(
|
||||||
|
`mock scenario has no step at index ${stepIndex} (total ${scenario.steps.length}); ` +
|
||||||
|
`moderator routed to role "${role}"`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (step.role !== role) {
|
||||||
|
throw new Error(
|
||||||
|
`mock step ${stepIndex} expected role "${step.role}" but moderator routed to "${role}"`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return step;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Persist a minimal detail node so the step node has a valid CAS ref. */
|
||||||
|
async function storeMockDetail(
|
||||||
|
store: Store,
|
||||||
|
sessionId: string,
|
||||||
|
role: string,
|
||||||
|
stepIndex: number,
|
||||||
|
): Promise<string> {
|
||||||
|
await bootstrap(store);
|
||||||
|
const schemaHash = await putSchema(store, MOCK_DETAIL_SCHEMA);
|
||||||
|
return store.cas.put(schemaHash, { sessionId, role, stepIndex });
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Agent CLI factory: a deterministic, LLM-free agent that replays pre-scripted
|
||||||
|
* outputs from a YAML mock data file. The step index is derived by counting the
|
||||||
|
* existing steps in the thread's CAS chain (exposed via `ctx.steps`).
|
||||||
|
*/
|
||||||
|
export function createMockAgent(mockDataPath: string): () => Promise<void> {
|
||||||
|
let lastResult: AgentRunResult | null = null;
|
||||||
|
|
||||||
|
async function run(ctx: AgentContext): Promise<AgentRunResult> {
|
||||||
|
const scenario = await loadScenario(mockDataPath);
|
||||||
|
const stepIndex = ctx.steps.length;
|
||||||
|
log(
|
||||||
|
"MK7X2QPV",
|
||||||
|
`mock step ${stepIndex} for role "${ctx.role}" (${scenario.steps.length} scripted)`,
|
||||||
|
);
|
||||||
|
|
||||||
|
const step = selectMockStep(scenario, stepIndex, ctx.role);
|
||||||
|
const sessionId = `mock-${stepIndex}`;
|
||||||
|
const detailHash = await storeMockDetail(ctx.store, sessionId, ctx.role, stepIndex);
|
||||||
|
|
||||||
|
const result: AgentRunResult = {
|
||||||
|
output: step.output,
|
||||||
|
detailHash,
|
||||||
|
sessionId,
|
||||||
|
assembledPrompt: "",
|
||||||
|
};
|
||||||
|
lastResult = result;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function continueRun(
|
||||||
|
sessionId: string,
|
||||||
|
_message: string,
|
||||||
|
_store: Store,
|
||||||
|
): Promise<AgentRunResult> {
|
||||||
|
if (lastResult === null) {
|
||||||
|
throw new Error("mock continue called before run");
|
||||||
|
}
|
||||||
|
log("MK3N8RTW", `mock continue for session ${sessionId}, replaying scripted output`);
|
||||||
|
return lastResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
return createAgent({
|
||||||
|
name: "mock",
|
||||||
|
run,
|
||||||
|
continue: continueRun,
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -0,0 +1,12 @@
|
|||||||
|
/** One pre-scripted step in a mock scenario. */
|
||||||
|
export type MockStep = {
|
||||||
|
/** Role this step is expected to run as. Validated against the actual `--role` argument. */
|
||||||
|
role: string;
|
||||||
|
/** Frontmatter markdown output the mock agent emits for this step. */
|
||||||
|
output: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Deterministic, pre-scripted agent script loaded from a YAML mock data file. */
|
||||||
|
export type MockScenario = {
|
||||||
|
steps: MockStep[];
|
||||||
|
};
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"extends": "../../tsconfig.json",
|
||||||
|
"compilerOptions": {
|
||||||
|
"rootDir": "src",
|
||||||
|
"outDir": "dist"
|
||||||
|
},
|
||||||
|
"include": ["src"],
|
||||||
|
"references": [{ "path": "../util-agent" }, { "path": "../util" }, { "path": "../protocol" }]
|
||||||
|
}
|
||||||
@@ -0,0 +1,296 @@
|
|||||||
|
import { execFileSync } from "node:child_process";
|
||||||
|
import { existsSync } from "node:fs";
|
||||||
|
import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
|
||||||
|
import { tmpdir } from "node:os";
|
||||||
|
import { dirname, join } from "node:path";
|
||||||
|
import { fileURLToPath } from "node:url";
|
||||||
|
import { openStore } from "@ocas/fs";
|
||||||
|
import type { CasRef, StartNodePayload, StepNodePayload } from "@united-workforce/protocol";
|
||||||
|
import { afterEach, beforeAll, beforeEach, describe, expect, test } from "vitest";
|
||||||
|
import { stringify } from "yaml";
|
||||||
|
import { cmdThreadStart } from "../commands/thread.js";
|
||||||
|
import { cmdWorkflowAdd } from "../commands/workflow.js";
|
||||||
|
import { createUwfStore, getThread } from "../store.js";
|
||||||
|
|
||||||
|
// ── paths ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const TEST_DIR = dirname(fileURLToPath(import.meta.url));
|
||||||
|
const FIXTURES_DIR = join(TEST_DIR, "fixtures");
|
||||||
|
const CLI_PATH = join(TEST_DIR, "..", "..", "dist", "cli.js");
|
||||||
|
const REPO_ROOT = join(TEST_DIR, "..", "..", "..", "..");
|
||||||
|
const AGENT_MOCK_DIR = join(REPO_ROOT, "packages", "agent-mock");
|
||||||
|
const AGENT_MOCK_CLI = join(AGENT_MOCK_DIR, "dist", "cli.js");
|
||||||
|
|
||||||
|
// ── shared fixture state ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
let tmpDir: string;
|
||||||
|
let uwfHome: string;
|
||||||
|
let casDir: string;
|
||||||
|
let savedEnv: { uwf: string | undefined; ocas: string | undefined };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The mock agent runs from its built `dist/cli.js`. When the test suite runs
|
||||||
|
* standalone (no prior `pnpm run build`), build it on demand so the E2E run is
|
||||||
|
* self-contained.
|
||||||
|
*/
|
||||||
|
beforeAll(() => {
|
||||||
|
if (existsSync(AGENT_MOCK_CLI)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
execFileSync(
|
||||||
|
process.execPath,
|
||||||
|
[
|
||||||
|
join(REPO_ROOT, "node_modules", "typescript", "bin", "tsc"),
|
||||||
|
"--build",
|
||||||
|
"--force",
|
||||||
|
AGENT_MOCK_DIR,
|
||||||
|
],
|
||||||
|
{ cwd: REPO_ROOT, stdio: "ignore" },
|
||||||
|
);
|
||||||
|
}, 120000);
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
tmpDir = await mkdtemp(join(tmpdir(), "cli-e2e-mock-"));
|
||||||
|
uwfHome = join(tmpDir, "uwf");
|
||||||
|
casDir = join(tmpDir, "ocas");
|
||||||
|
await mkdir(uwfHome, { recursive: true });
|
||||||
|
await mkdir(casDir, { recursive: true });
|
||||||
|
// Programmatic CLI APIs (cmdWorkflowAdd, cmdThreadStart) read the global CAS
|
||||||
|
// directory from OCAS_HOME and the storage root from UWF_HOME.
|
||||||
|
savedEnv = { uwf: process.env.UWF_HOME, ocas: process.env.OCAS_HOME };
|
||||||
|
process.env.UWF_HOME = uwfHome;
|
||||||
|
process.env.OCAS_HOME = casDir;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
process.env.UWF_HOME = savedEnv.uwf;
|
||||||
|
process.env.OCAS_HOME = savedEnv.ocas;
|
||||||
|
await rm(tmpDir, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── helpers ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a `config.yaml` into UWF_HOME that wires the default agent to the mock
|
||||||
|
* agent. The mock data path is baked into the agent args so the CLI's
|
||||||
|
* `thread exec` (without an `--agent` override) resolves it from config.
|
||||||
|
*/
|
||||||
|
async function writeMockConfig(mockDataFixture: string): Promise<void> {
|
||||||
|
const config = {
|
||||||
|
defaultAgent: "mock",
|
||||||
|
defaultModel: "test",
|
||||||
|
providers: {},
|
||||||
|
models: {},
|
||||||
|
agentOverrides: null,
|
||||||
|
agents: {
|
||||||
|
mock: {
|
||||||
|
command: process.execPath,
|
||||||
|
args: [AGENT_MOCK_CLI, "--mock-data", join(FIXTURES_DIR, mockDataFixture)],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
await writeFile(join(uwfHome, "config.yaml"), stringify(config));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* `cmdWorkflowAdd` enforces filename↔name consistency, so copy the fixture into
|
||||||
|
* UWF_HOME under `<workflow-name>.yaml` before registering it.
|
||||||
|
*/
|
||||||
|
async function addWorkflow(workflowFixture: string, workflowName: string): Promise<CasRef> {
|
||||||
|
const text = await readFile(join(FIXTURES_DIR, workflowFixture), "utf8");
|
||||||
|
const filePath = join(uwfHome, `${workflowName}.yaml`);
|
||||||
|
await writeFile(filePath, text);
|
||||||
|
const result = await cmdWorkflowAdd(uwfHome, filePath);
|
||||||
|
return result.hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
type ExecResult = { stdout: string; stderr: string; exitCode: number };
|
||||||
|
|
||||||
|
function runExec(threadId: string): ExecResult {
|
||||||
|
try {
|
||||||
|
const stdout = execFileSync(process.execPath, [CLI_PATH, "thread", "exec", threadId], {
|
||||||
|
encoding: "utf8",
|
||||||
|
stdio: ["ignore", "pipe", "pipe"],
|
||||||
|
env: { ...process.env, UWF_HOME: uwfHome, OCAS_HOME: casDir },
|
||||||
|
cwd: tmpDir,
|
||||||
|
timeout: 30000,
|
||||||
|
});
|
||||||
|
return { stdout, stderr: "", exitCode: 0 };
|
||||||
|
} catch (e: unknown) {
|
||||||
|
const err = e as NodeJS.ErrnoException & {
|
||||||
|
stdout?: string;
|
||||||
|
stderr?: string;
|
||||||
|
status?: number;
|
||||||
|
};
|
||||||
|
return { stdout: err.stdout ?? "", stderr: err.stderr ?? "", exitCode: err.status ?? 1 };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type StepOutputJson = {
|
||||||
|
thread: string;
|
||||||
|
head: string;
|
||||||
|
status: string;
|
||||||
|
currentRole: string | null;
|
||||||
|
done: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
function execStep(threadId: string): StepOutputJson {
|
||||||
|
const { stdout, stderr, exitCode } = runExec(threadId);
|
||||||
|
if (exitCode !== 0) {
|
||||||
|
throw new Error(`thread exec failed (code ${exitCode})\nstdout: ${stdout}\nstderr: ${stderr}`);
|
||||||
|
}
|
||||||
|
return JSON.parse(stdout.trim()) as StepOutputJson;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getStepNode(store: Awaited<ReturnType<typeof openStore>>, hash: string): StepNodePayload {
|
||||||
|
const node = store.cas.get(hash as CasRef);
|
||||||
|
expect(node).not.toBeNull();
|
||||||
|
return node!.payload as StepNodePayload;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getStatus(store: Awaited<ReturnType<typeof openStore>>, outputRef: CasRef): unknown {
|
||||||
|
const node = store.cas.get(outputRef);
|
||||||
|
expect(node).not.toBeNull();
|
||||||
|
return (node!.payload as Record<string, unknown>).$status;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── scenarios ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe("E2E mock-agent: full uwf pipeline", () => {
|
||||||
|
test("1. linear workflow runs planner then worker and reaches $END", async () => {
|
||||||
|
await writeMockConfig("e2e-linear.mock.yaml");
|
||||||
|
const workflowHash = await addWorkflow("e2e-linear.workflow.yaml", "test-linear");
|
||||||
|
|
||||||
|
const start = await cmdThreadStart(uwfHome, workflowHash, "Build the thing", uwfHome, tmpDir);
|
||||||
|
const threadId = start.thread;
|
||||||
|
|
||||||
|
// Capture the start node hash (thread head before any step).
|
||||||
|
const startHash = getThread((await createUwfStore(uwfHome)).varStore, threadId)?.head;
|
||||||
|
expect(startHash).toBeDefined();
|
||||||
|
|
||||||
|
// Step 1 → planner.
|
||||||
|
const step1 = execStep(threadId);
|
||||||
|
expect(step1.thread).toBe(threadId);
|
||||||
|
expect(step1.done).toBe(false);
|
||||||
|
expect(step1.status).toBe("idle");
|
||||||
|
expect(step1.currentRole).toBe("worker");
|
||||||
|
|
||||||
|
// Step 2 → worker → $END (thread archived to history).
|
||||||
|
const step2 = execStep(threadId);
|
||||||
|
expect(step2.done).toBe(true);
|
||||||
|
expect(step2.status).toBe("completed");
|
||||||
|
expect(step2.currentRole).toBeNull();
|
||||||
|
|
||||||
|
// Verify CAS chain integrity: start → step1 → step2.
|
||||||
|
const store = await openStore(casDir);
|
||||||
|
const s1 = getStepNode(store, step1.head);
|
||||||
|
const s2 = getStepNode(store, step2.head);
|
||||||
|
|
||||||
|
expect(s1.role).toBe("planner");
|
||||||
|
expect(s1.prev).toBeNull();
|
||||||
|
expect(s1.start).toBe(startHash);
|
||||||
|
|
||||||
|
expect(s2.role).toBe("worker");
|
||||||
|
expect(s2.prev).toBe(step1.head);
|
||||||
|
expect(s2.start).toBe(s1.start);
|
||||||
|
|
||||||
|
// Output frontmatter statuses persisted correctly.
|
||||||
|
expect(getStatus(store, s1.output)).toBe("ready");
|
||||||
|
expect(getStatus(store, s2.output)).toBe("done");
|
||||||
|
|
||||||
|
// The start node points at the registered workflow.
|
||||||
|
const startNode = store.cas.get(startHash as CasRef);
|
||||||
|
expect((startNode!.payload as StartNodePayload).workflow).toBe(workflowHash);
|
||||||
|
|
||||||
|
// Thread is completed: status changed to "completed", head updated.
|
||||||
|
const uwf = await createUwfStore(uwfHome);
|
||||||
|
const finalEntry = getThread(uwf.varStore, threadId);
|
||||||
|
expect(finalEntry).not.toBeNull();
|
||||||
|
expect(finalEntry!.status).toBe("completed");
|
||||||
|
expect(finalEntry!.head).toBe(step2.head);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("2. branching workflow loops developer→reviewer→developer→reviewer→$END", async () => {
|
||||||
|
await writeMockConfig("e2e-loop.mock.yaml");
|
||||||
|
const workflowHash = await addWorkflow("e2e-loop.workflow.yaml", "test-loop");
|
||||||
|
|
||||||
|
const start = await cmdThreadStart(uwfHome, workflowHash, "Implement feature", uwfHome, tmpDir);
|
||||||
|
const threadId = start.thread;
|
||||||
|
|
||||||
|
// 4 steps: developer, reviewer (rejected → loop), developer, reviewer (approved → $END).
|
||||||
|
const s1 = execStep(threadId);
|
||||||
|
expect(s1.status).toBe("idle");
|
||||||
|
expect(s1.currentRole).toBe("reviewer");
|
||||||
|
|
||||||
|
const s2 = execStep(threadId);
|
||||||
|
expect(s2.status).toBe("idle");
|
||||||
|
// reviewer rejected → loops back to developer.
|
||||||
|
expect(s2.currentRole).toBe("developer");
|
||||||
|
|
||||||
|
const s3 = execStep(threadId);
|
||||||
|
expect(s3.status).toBe("idle");
|
||||||
|
expect(s3.currentRole).toBe("reviewer");
|
||||||
|
|
||||||
|
const s4 = execStep(threadId);
|
||||||
|
expect(s4.done).toBe(true);
|
||||||
|
expect(s4.status).toBe("completed");
|
||||||
|
|
||||||
|
// Verify the chain order and roles.
|
||||||
|
const store = await openStore(casDir);
|
||||||
|
const n1 = getStepNode(store, s1.head);
|
||||||
|
const n2 = getStepNode(store, s2.head);
|
||||||
|
const n3 = getStepNode(store, s3.head);
|
||||||
|
const n4 = getStepNode(store, s4.head);
|
||||||
|
|
||||||
|
expect([n1.role, n2.role, n3.role, n4.role]).toEqual([
|
||||||
|
"developer",
|
||||||
|
"reviewer",
|
||||||
|
"developer",
|
||||||
|
"reviewer",
|
||||||
|
]);
|
||||||
|
expect(n1.prev).toBeNull();
|
||||||
|
expect(n2.prev).toBe(s1.head);
|
||||||
|
expect(n3.prev).toBe(s2.head);
|
||||||
|
expect(n4.prev).toBe(s3.head);
|
||||||
|
|
||||||
|
// All steps share the same start node.
|
||||||
|
expect(new Set([n1.start, n2.start, n3.start, n4.start]).size).toBe(1);
|
||||||
|
|
||||||
|
// Statuses drove the loop routing.
|
||||||
|
expect(getStatus(store, n1.output)).toBe("review_needed");
|
||||||
|
expect(getStatus(store, n2.output)).toBe("rejected");
|
||||||
|
expect(getStatus(store, n3.output)).toBe("review_needed");
|
||||||
|
expect(getStatus(store, n4.output)).toBe("approved");
|
||||||
|
|
||||||
|
const uwf = await createUwfStore(uwfHome);
|
||||||
|
const finalEntry = getThread(uwf.varStore, threadId);
|
||||||
|
expect(finalEntry).not.toBeNull();
|
||||||
|
expect(finalEntry!.status).toBe("completed");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("3. role mismatch in mock data makes the agent exit with an error", async () => {
|
||||||
|
// Reuses the linear workflow but with a mock whose step[1].role is wrong.
|
||||||
|
await writeMockConfig("e2e-mismatch.mock.yaml");
|
||||||
|
const workflowHash = await addWorkflow("e2e-linear.workflow.yaml", "test-linear");
|
||||||
|
|
||||||
|
const start = await cmdThreadStart(uwfHome, workflowHash, "Build the thing", uwfHome, tmpDir);
|
||||||
|
const threadId = start.thread;
|
||||||
|
|
||||||
|
// Step 1 (planner) matches and succeeds.
|
||||||
|
const step1 = execStep(threadId);
|
||||||
|
expect(step1.status).toBe("idle");
|
||||||
|
expect(step1.currentRole).toBe("worker");
|
||||||
|
|
||||||
|
// Step 2: moderator routes to "worker" but mock step[1].role is "planner".
|
||||||
|
const result = runExec(threadId);
|
||||||
|
expect(result.exitCode).not.toBe(0);
|
||||||
|
expect(`${result.stdout}\n${result.stderr}`).toMatch(/expected role "planner"/);
|
||||||
|
|
||||||
|
// The thread remains active (no step node was written for the failed step).
|
||||||
|
const uwf = await createUwfStore(uwfHome);
|
||||||
|
const entry = getThread(uwf.varStore, threadId);
|
||||||
|
expect(entry).not.toBeNull();
|
||||||
|
expect(entry!.status).not.toBe("completed");
|
||||||
|
expect(entry!.head).toBe(step1.head);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
steps:
|
||||||
|
- role: planner
|
||||||
|
output: |
|
||||||
|
---
|
||||||
|
$status: ready
|
||||||
|
---
|
||||||
|
Planning complete.
|
||||||
|
- role: worker
|
||||||
|
output: |
|
||||||
|
---
|
||||||
|
$status: done
|
||||||
|
---
|
||||||
|
Work complete.
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
name: test-linear
|
||||||
|
description: Simple 2-step linear test (planner -> worker -> $END)
|
||||||
|
roles:
|
||||||
|
planner:
|
||||||
|
description: Plans work
|
||||||
|
goal: Plan the task
|
||||||
|
capabilities: []
|
||||||
|
procedure: Plan it
|
||||||
|
output: Output a plan and set $status to ready
|
||||||
|
frontmatter:
|
||||||
|
oneOf:
|
||||||
|
- properties:
|
||||||
|
$status: { const: ready }
|
||||||
|
required: [$status]
|
||||||
|
worker:
|
||||||
|
description: Does work
|
||||||
|
goal: Do the work
|
||||||
|
capabilities: []
|
||||||
|
procedure: Do it
|
||||||
|
output: Output the result and set $status to done
|
||||||
|
frontmatter:
|
||||||
|
oneOf:
|
||||||
|
- properties:
|
||||||
|
$status: { const: done }
|
||||||
|
required: [$status]
|
||||||
|
graph:
|
||||||
|
$START:
|
||||||
|
_: { role: planner, prompt: 'Plan the task' }
|
||||||
|
planner:
|
||||||
|
ready: { role: worker, prompt: 'Do the work' }
|
||||||
|
worker:
|
||||||
|
done: { role: '$END', prompt: 'Done' }
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
steps:
|
||||||
|
- role: developer
|
||||||
|
output: |
|
||||||
|
---
|
||||||
|
$status: review_needed
|
||||||
|
---
|
||||||
|
First implementation.
|
||||||
|
- role: reviewer
|
||||||
|
output: |
|
||||||
|
---
|
||||||
|
$status: rejected
|
||||||
|
---
|
||||||
|
Needs changes, sending back.
|
||||||
|
- role: developer
|
||||||
|
output: |
|
||||||
|
---
|
||||||
|
$status: review_needed
|
||||||
|
---
|
||||||
|
Second implementation addressing feedback.
|
||||||
|
- role: reviewer
|
||||||
|
output: |
|
||||||
|
---
|
||||||
|
$status: approved
|
||||||
|
---
|
||||||
|
Looks good, approved.
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
name: test-loop
|
||||||
|
description: Branching test where the reviewer can reject and loop back to the developer
|
||||||
|
roles:
|
||||||
|
developer:
|
||||||
|
description: Implements changes
|
||||||
|
goal: Implement the change
|
||||||
|
capabilities: []
|
||||||
|
procedure: Write code
|
||||||
|
output: Summarize the change and set $status to review_needed
|
||||||
|
frontmatter:
|
||||||
|
oneOf:
|
||||||
|
- properties:
|
||||||
|
$status: { const: review_needed }
|
||||||
|
required: [$status]
|
||||||
|
reviewer:
|
||||||
|
description: Reviews changes
|
||||||
|
goal: Review the change
|
||||||
|
capabilities: []
|
||||||
|
procedure: Review code
|
||||||
|
output: Approve or reject; set $status to approved or rejected
|
||||||
|
frontmatter:
|
||||||
|
oneOf:
|
||||||
|
- properties:
|
||||||
|
$status: { const: rejected }
|
||||||
|
required: [$status]
|
||||||
|
- properties:
|
||||||
|
$status: { const: approved }
|
||||||
|
required: [$status]
|
||||||
|
graph:
|
||||||
|
$START:
|
||||||
|
_: { role: developer, prompt: 'Implement the change' }
|
||||||
|
developer:
|
||||||
|
review_needed: { role: reviewer, prompt: 'Review the change' }
|
||||||
|
reviewer:
|
||||||
|
rejected: { role: developer, prompt: 'Fix the issues and resubmit' }
|
||||||
|
approved: { role: '$END', prompt: 'Approved, done' }
|
||||||
@@ -0,0 +1,16 @@
|
|||||||
|
# Reuses the test-linear workflow. The moderator routes step 0 -> planner and
|
||||||
|
# step 1 -> worker, but step[1].role below is "planner", so the mock agent must
|
||||||
|
# detect the role mismatch on the second step and exit with an error.
|
||||||
|
steps:
|
||||||
|
- role: planner
|
||||||
|
output: |
|
||||||
|
---
|
||||||
|
$status: ready
|
||||||
|
---
|
||||||
|
Planning complete.
|
||||||
|
- role: planner
|
||||||
|
output: |
|
||||||
|
---
|
||||||
|
$status: done
|
||||||
|
---
|
||||||
|
This step claims to be planner, but the moderator routes to worker.
|
||||||
Generated
+22
@@ -93,6 +93,28 @@ importers:
|
|||||||
specifier: ^5.8.3
|
specifier: ^5.8.3
|
||||||
version: 5.9.3
|
version: 5.9.3
|
||||||
|
|
||||||
|
packages/agent-mock:
|
||||||
|
dependencies:
|
||||||
|
'@ocas/core':
|
||||||
|
specifier: ^0.3.0
|
||||||
|
version: 0.3.0
|
||||||
|
'@united-workforce/protocol':
|
||||||
|
specifier: workspace:^
|
||||||
|
version: link:../protocol
|
||||||
|
'@united-workforce/util':
|
||||||
|
specifier: workspace:^
|
||||||
|
version: link:../util
|
||||||
|
'@united-workforce/util-agent':
|
||||||
|
specifier: workspace:^
|
||||||
|
version: link:../util-agent
|
||||||
|
yaml:
|
||||||
|
specifier: ^2.9.0
|
||||||
|
version: 2.9.0
|
||||||
|
devDependencies:
|
||||||
|
typescript:
|
||||||
|
specifier: ^5.8.3
|
||||||
|
version: 5.9.3
|
||||||
|
|
||||||
packages/cli:
|
packages/cli:
|
||||||
dependencies:
|
dependencies:
|
||||||
'@ocas/core':
|
'@ocas/core':
|
||||||
|
|||||||
@@ -23,6 +23,10 @@ packages:
|
|||||||
path: packages/agent-builtin
|
path: packages/agent-builtin
|
||||||
type: cli
|
type: cli
|
||||||
|
|
||||||
|
- name: "@united-workforce/agent-mock"
|
||||||
|
path: packages/agent-mock
|
||||||
|
type: cli
|
||||||
|
|
||||||
- name: "@united-workforce/cli"
|
- name: "@united-workforce/cli"
|
||||||
path: packages/cli
|
path: packages/cli
|
||||||
type: cli
|
type: cli
|
||||||
|
|||||||
@@ -23,6 +23,7 @@
|
|||||||
{ "path": "packages/util-agent" },
|
{ "path": "packages/util-agent" },
|
||||||
{ "path": "packages/agent-hermes" },
|
{ "path": "packages/agent-hermes" },
|
||||||
{ "path": "packages/agent-builtin" },
|
{ "path": "packages/agent-builtin" },
|
||||||
|
{ "path": "packages/agent-mock" },
|
||||||
{ "path": "packages/agent-claude-code" },
|
{ "path": "packages/agent-claude-code" },
|
||||||
{ "path": "packages/cli" }
|
{ "path": "packages/cli" }
|
||||||
]
|
]
|
||||||
|
|||||||
Reference in New Issue
Block a user