refactor(sense-generator): tester → createHermesRole for e2e verification

Replaced 122-line hand-written smoke test with createHermesRole.
Hermes runs the full lifecycle check autonomously via terminal:
files → nerve.yaml → sense list → trigger → query → logs on failure.

All 3 roles now use factories:
  planner: createCursorRole (ask mode)
  coder:   createCursorRole (default mode)
  tester:  createHermesRole (e2e verification)

小橘 🍊(NEKO Team)
This commit is contained in:
小橘 2026-04-28 03:48:26 +00:00
parent bc4ac8a5cc
commit 7c4883ddec
4 changed files with 52 additions and 121 deletions

View File

@ -2,7 +2,7 @@ import type { WorkflowDefinition } from "@uncaged/nerve-core";
import { END } from "@uncaged/nerve-core"; import { END } from "@uncaged/nerve-core";
import { buildPlannerRole } from "./roles/planner/index.js"; import { buildPlannerRole } from "./roles/planner/index.js";
import { buildCoderRole } from "./roles/coder/index.js"; import { buildCoderRole } from "./roles/coder/index.js";
import { tester } from "./roles/tester/index.js"; import { buildTesterRole } from "./roles/tester/index.js";
import type { SenseMeta } from "./roles/types.js"; import type { SenseMeta } from "./roles/types.js";
@ -11,7 +11,7 @@ const workflow: WorkflowDefinition<SenseMeta> = {
roles: { roles: {
planner: await buildPlannerRole(), planner: await buildPlannerRole(),
coder: await buildCoderRole(), coder: await buildCoderRole(),
tester, tester: await buildTesterRole(),
}, },
moderator(context) { moderator(context) {
if (context.steps.length === 0) return "planner"; if (context.steps.length === 0) return "planner";

View File

@ -1,122 +1,16 @@
import type { RoleResult, StartStep, WorkflowMessage } from "@uncaged/nerve-core"; import { createHermesRole } from "@uncaged/nerve-workflow-utils";
import { spawnSafe } from "@uncaged/nerve-workflow-utils"; import { resolveDashScopeProvider, NERVE_ROOT, SENSES_DIR } from "../shared.js";
import type { SpawnError } from "@uncaged/nerve-workflow-utils"; import { testerMetaSchema } from "../types.js";
import { existsSync } from "node:fs";
import { join } from "node:path";
import { NERVE_ROOT, SENSES_DIR } from "../shared.js";
import type { SenseMeta } from "../types.js"; import type { SenseMeta } from "../types.js";
import { testerPrompt } from "./prompt.js";
function formatSpawnFailure(error: SpawnError): string { export async function buildTesterRole() {
if (error.kind === "spawn_failed") return error.message; const provider = await resolveDashScopeProvider();
if (error.kind === "timeout") return `timeout (stdout=${error.stdout.slice(0, 200)})`; if (provider === null) {
return `exit ${error.exitCode} stderr=${error.stderr.slice(0, 400)}`; throw new Error("Cannot create tester: set DASHSCOPE_API_KEY and DASHSCOPE_BASE_URL");
} }
return createHermesRole<SenseMeta["tester"]>({
async function runSenseSmokeTest( prompt: async (threadId) => testerPrompt({ threadId, sensesDir: SENSES_DIR, nerveRoot: NERVE_ROOT }),
senseName: string, extract: { provider, schema: testerMetaSchema },
): Promise<{ ok: boolean; log: string; reason: string }> {
const logParts: string[] = [];
const runNerve = async (
args: string[],
): Promise<{ ok: true; out: string } | { ok: false; err: string }> => {
const result = await spawnSafe("nerve", args, {
cwd: NERVE_ROOT,
env: null,
timeoutMs: 300_000,
}); });
if (!result.ok) return { ok: false, err: formatSpawnFailure(result.error) };
return { ok: true, out: result.value.stdout };
};
const statusRun = await runNerve(["status"]);
if (!statusRun.ok) {
return {
ok: false,
log: `=== nerve status ===\nERROR: ${statusRun.err}`,
reason: `Smoke test command failed: ${statusRun.err}`,
};
}
logParts.push("=== nerve status ===\n" + statusRun.out);
if (!statusRun.out.includes(senseName)) {
return {
ok: false,
log: logParts.join("\n\n"),
reason: `Sense "${senseName}" not listed in \`nerve status\` output`,
};
}
const triggerRun = await runNerve(["sense", "trigger", senseName]);
if (!triggerRun.ok) {
logParts.push(`=== nerve sense trigger ===\nERROR: ${triggerRun.err}`);
return { ok: false, log: logParts.join("\n\n"), reason: `Trigger failed: ${triggerRun.err}` };
}
logParts.push("=== nerve sense trigger ===\n" + triggerRun.out);
let lastQuery = "";
for (let i = 0; i < 25; i++) {
await new Promise((r) => setTimeout(r, 1000));
const queryRun = await runNerve(["sense", "query", senseName]);
if (!queryRun.ok) {
logParts.push(`=== query attempt ${i + 1} ===\nERROR: ${queryRun.err}`);
} else {
lastQuery = queryRun.out;
logParts.push(`=== query attempt ${i + 1} ===\n${lastQuery}`);
if (!lastQuery.includes("(0 rows)")) {
return {
ok: true,
log: logParts.join("\n\n"),
reason: "Trigger succeeded and query returned at least one row",
};
}
}
}
return {
ok: false,
log: logParts.join("\n\n"),
reason: lastQuery.includes("(0 rows)")
? "Query still returned 0 rows after trigger"
: "Timed out waiting for successful sense query",
};
}
export async function tester(
_start: StartStep,
messages: WorkflowMessage[],
): Promise<RoleResult<SenseMeta["tester"]>> {
const attempt = messages.filter((m) => m.role === "tester").length + 1;
const plannerStep = messages.find((m) => m.role === "planner");
const senseName = plannerStep
? (plannerStep.meta as SenseMeta["planner"]).senseName
: "";
if (senseName.length === 0) {
return {
content: "FAIL — no senseName from planner",
meta: { passed: false, attempt },
};
}
const senseDir = join(SENSES_DIR, senseName);
const missing = [
existsSync(join(senseDir, "index.js")) ? null : "index.js",
existsSync(join(senseDir, "schema.ts")) ? null : "schema.ts",
existsSync(join(senseDir, "migrations", "0001_init.sql")) ? null : "migrations/0001_init.sql",
].filter((x) => x !== null);
if (missing.length > 0) {
return {
content: `FAIL — missing files: ${missing.join(", ")}`,
meta: { passed: false, attempt },
};
}
const smoke = await runSenseSmokeTest(senseName);
return {
content: `${smoke.ok ? "PASS" : "FAIL"}${smoke.reason}`,
meta: { passed: smoke.ok, attempt },
};
} }

View File

@ -0,0 +1,32 @@
export function testerPrompt(vars: {
threadId: string;
sensesDir: string;
nerveRoot: string;
}): string {
return `You are testing a newly created Nerve sense end-to-end.
Read the workflow thread for context: \`nerve thread ${vars.threadId}\`
The planner named the sense and the coder created the files.
Verify the full lifecycle:
1. Check files exist under ${vars.sensesDir}/<sense-name>/
- index.js, schema.ts, migrations/0001_init.sql
All three must exist.
2. Check ${vars.nerveRoot}/nerve.yaml has the sense config and reflex entry
The sense name should appear under \`senses:\` with group, throttle, etc.
3. Run \`nerve sense list\` — confirm the sense appears in the output
4. Run \`nerve sense trigger <sense-name>\` — should complete without error
5. Wait a few seconds, then run \`nerve sense query <sense-name>\`
Keep retrying (up to ~20 seconds) until it returns at least one row.
If it still says "0 rows", that's a failure.
6. If any step fails, run \`nerve logs\` to check for errors and include
relevant log lines in your report.
Output a clear summary: what you checked, what passed, what failed, and why.`;
}

View File

@ -13,3 +13,8 @@ export const plannerMetaSchema = z.object({
export const coderMetaSchema = z.object({ export const coderMetaSchema = z.object({
filesCreated: z.boolean().describe("true if the sense files were created"), filesCreated: z.boolean().describe("true if the sense files were created"),
}); });
export const testerMetaSchema = z.object({
passed: z.boolean().describe("true if all e2e checks passed"),
attempt: z.number().describe("which attempt this is (1-based)"),
});