refactor(sense-generator): tester → createHermesRole for e2e verification
Replaced 122-line hand-written smoke test with createHermesRole.
Hermes runs the full lifecycle check autonomously via terminal:
files → nerve.yaml → sense list → trigger → query → logs on failure.
All 3 roles now use factories:
planner: createCursorRole (ask mode)
coder: createCursorRole (default mode)
tester: createHermesRole (e2e verification)
小橘 🍊(NEKO Team)
This commit is contained in:
parent
bc4ac8a5cc
commit
7c4883ddec
@ -2,7 +2,7 @@ import type { WorkflowDefinition } from "@uncaged/nerve-core";
|
||||
import { END } from "@uncaged/nerve-core";
|
||||
import { buildPlannerRole } from "./roles/planner/index.js";
|
||||
import { buildCoderRole } from "./roles/coder/index.js";
|
||||
import { tester } from "./roles/tester/index.js";
|
||||
import { buildTesterRole } from "./roles/tester/index.js";
|
||||
|
||||
import type { SenseMeta } from "./roles/types.js";
|
||||
|
||||
@ -11,7 +11,7 @@ const workflow: WorkflowDefinition<SenseMeta> = {
|
||||
roles: {
|
||||
planner: await buildPlannerRole(),
|
||||
coder: await buildCoderRole(),
|
||||
tester,
|
||||
tester: await buildTesterRole(),
|
||||
},
|
||||
moderator(context) {
|
||||
if (context.steps.length === 0) return "planner";
|
||||
|
||||
@ -1,122 +1,16 @@
|
||||
import type { RoleResult, StartStep, WorkflowMessage } from "@uncaged/nerve-core";
|
||||
import { spawnSafe } from "@uncaged/nerve-workflow-utils";
|
||||
import type { SpawnError } from "@uncaged/nerve-workflow-utils";
|
||||
import { existsSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { NERVE_ROOT, SENSES_DIR } from "../shared.js";
|
||||
|
||||
import { createHermesRole } from "@uncaged/nerve-workflow-utils";
|
||||
import { resolveDashScopeProvider, NERVE_ROOT, SENSES_DIR } from "../shared.js";
|
||||
import { testerMetaSchema } from "../types.js";
|
||||
import type { SenseMeta } from "../types.js";
|
||||
import { testerPrompt } from "./prompt.js";
|
||||
|
||||
function formatSpawnFailure(error: SpawnError): string {
|
||||
if (error.kind === "spawn_failed") return error.message;
|
||||
if (error.kind === "timeout") return `timeout (stdout=${error.stdout.slice(0, 200)})`;
|
||||
return `exit ${error.exitCode} stderr=${error.stderr.slice(0, 400)}`;
|
||||
}
|
||||
|
||||
async function runSenseSmokeTest(
|
||||
senseName: string,
|
||||
): Promise<{ ok: boolean; log: string; reason: string }> {
|
||||
const logParts: string[] = [];
|
||||
|
||||
const runNerve = async (
|
||||
args: string[],
|
||||
): Promise<{ ok: true; out: string } | { ok: false; err: string }> => {
|
||||
const result = await spawnSafe("nerve", args, {
|
||||
cwd: NERVE_ROOT,
|
||||
env: null,
|
||||
timeoutMs: 300_000,
|
||||
export async function buildTesterRole() {
|
||||
const provider = await resolveDashScopeProvider();
|
||||
if (provider === null) {
|
||||
throw new Error("Cannot create tester: set DASHSCOPE_API_KEY and DASHSCOPE_BASE_URL");
|
||||
}
|
||||
return createHermesRole<SenseMeta["tester"]>({
|
||||
prompt: async (threadId) => testerPrompt({ threadId, sensesDir: SENSES_DIR, nerveRoot: NERVE_ROOT }),
|
||||
extract: { provider, schema: testerMetaSchema },
|
||||
});
|
||||
if (!result.ok) return { ok: false, err: formatSpawnFailure(result.error) };
|
||||
return { ok: true, out: result.value.stdout };
|
||||
};
|
||||
|
||||
const statusRun = await runNerve(["status"]);
|
||||
if (!statusRun.ok) {
|
||||
return {
|
||||
ok: false,
|
||||
log: `=== nerve status ===\nERROR: ${statusRun.err}`,
|
||||
reason: `Smoke test command failed: ${statusRun.err}`,
|
||||
};
|
||||
}
|
||||
logParts.push("=== nerve status ===\n" + statusRun.out);
|
||||
if (!statusRun.out.includes(senseName)) {
|
||||
return {
|
||||
ok: false,
|
||||
log: logParts.join("\n\n"),
|
||||
reason: `Sense "${senseName}" not listed in \`nerve status\` output`,
|
||||
};
|
||||
}
|
||||
|
||||
const triggerRun = await runNerve(["sense", "trigger", senseName]);
|
||||
if (!triggerRun.ok) {
|
||||
logParts.push(`=== nerve sense trigger ===\nERROR: ${triggerRun.err}`);
|
||||
return { ok: false, log: logParts.join("\n\n"), reason: `Trigger failed: ${triggerRun.err}` };
|
||||
}
|
||||
logParts.push("=== nerve sense trigger ===\n" + triggerRun.out);
|
||||
|
||||
let lastQuery = "";
|
||||
for (let i = 0; i < 25; i++) {
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
const queryRun = await runNerve(["sense", "query", senseName]);
|
||||
if (!queryRun.ok) {
|
||||
logParts.push(`=== query attempt ${i + 1} ===\nERROR: ${queryRun.err}`);
|
||||
} else {
|
||||
lastQuery = queryRun.out;
|
||||
logParts.push(`=== query attempt ${i + 1} ===\n${lastQuery}`);
|
||||
if (!lastQuery.includes("(0 rows)")) {
|
||||
return {
|
||||
ok: true,
|
||||
log: logParts.join("\n\n"),
|
||||
reason: "Trigger succeeded and query returned at least one row",
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
ok: false,
|
||||
log: logParts.join("\n\n"),
|
||||
reason: lastQuery.includes("(0 rows)")
|
||||
? "Query still returned 0 rows after trigger"
|
||||
: "Timed out waiting for successful sense query",
|
||||
};
|
||||
}
|
||||
|
||||
export async function tester(
|
||||
_start: StartStep,
|
||||
messages: WorkflowMessage[],
|
||||
): Promise<RoleResult<SenseMeta["tester"]>> {
|
||||
const attempt = messages.filter((m) => m.role === "tester").length + 1;
|
||||
|
||||
const plannerStep = messages.find((m) => m.role === "planner");
|
||||
const senseName = plannerStep
|
||||
? (plannerStep.meta as SenseMeta["planner"]).senseName
|
||||
: "";
|
||||
|
||||
if (senseName.length === 0) {
|
||||
return {
|
||||
content: "FAIL — no senseName from planner",
|
||||
meta: { passed: false, attempt },
|
||||
};
|
||||
}
|
||||
|
||||
const senseDir = join(SENSES_DIR, senseName);
|
||||
const missing = [
|
||||
existsSync(join(senseDir, "index.js")) ? null : "index.js",
|
||||
existsSync(join(senseDir, "schema.ts")) ? null : "schema.ts",
|
||||
existsSync(join(senseDir, "migrations", "0001_init.sql")) ? null : "migrations/0001_init.sql",
|
||||
].filter((x) => x !== null);
|
||||
|
||||
if (missing.length > 0) {
|
||||
return {
|
||||
content: `FAIL — missing files: ${missing.join(", ")}`,
|
||||
meta: { passed: false, attempt },
|
||||
};
|
||||
}
|
||||
|
||||
const smoke = await runSenseSmokeTest(senseName);
|
||||
return {
|
||||
content: `${smoke.ok ? "PASS" : "FAIL"} — ${smoke.reason}`,
|
||||
meta: { passed: smoke.ok, attempt },
|
||||
};
|
||||
}
|
||||
|
||||
32
workflows/sense-generator/roles/tester/prompt.ts
Normal file
32
workflows/sense-generator/roles/tester/prompt.ts
Normal file
@ -0,0 +1,32 @@
|
||||
export function testerPrompt(vars: {
|
||||
threadId: string;
|
||||
sensesDir: string;
|
||||
nerveRoot: string;
|
||||
}): string {
|
||||
return `You are testing a newly created Nerve sense end-to-end.
|
||||
|
||||
Read the workflow thread for context: \`nerve thread ${vars.threadId}\`
|
||||
The planner named the sense and the coder created the files.
|
||||
|
||||
Verify the full lifecycle:
|
||||
|
||||
1. Check files exist under ${vars.sensesDir}/<sense-name>/
|
||||
- index.js, schema.ts, migrations/0001_init.sql
|
||||
All three must exist.
|
||||
|
||||
2. Check ${vars.nerveRoot}/nerve.yaml has the sense config and reflex entry
|
||||
The sense name should appear under \`senses:\` with group, throttle, etc.
|
||||
|
||||
3. Run \`nerve sense list\` — confirm the sense appears in the output
|
||||
|
||||
4. Run \`nerve sense trigger <sense-name>\` — should complete without error
|
||||
|
||||
5. Wait a few seconds, then run \`nerve sense query <sense-name>\`
|
||||
Keep retrying (up to ~20 seconds) until it returns at least one row.
|
||||
If it still says "0 rows", that's a failure.
|
||||
|
||||
6. If any step fails, run \`nerve logs\` to check for errors and include
|
||||
relevant log lines in your report.
|
||||
|
||||
Output a clear summary: what you checked, what passed, what failed, and why.`;
|
||||
}
|
||||
@ -13,3 +13,8 @@ export const plannerMetaSchema = z.object({
|
||||
export const coderMetaSchema = z.object({
|
||||
filesCreated: z.boolean().describe("true if the sense files were created"),
|
||||
});
|
||||
|
||||
export const testerMetaSchema = z.object({
|
||||
passed: z.boolean().describe("true if all e2e checks passed"),
|
||||
attempt: z.number().describe("which attempt this is (1-based)"),
|
||||
});
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user