5e054facb2
Phase 2 of RFC #308: Stateful Sense refactor. - SenseRuntime uses JSON file persistence instead of SQLite/Drizzle - Sense compute now receives state and returns { state, workflow } - IPC: replaced SignalMessage with ComputeResultMessage - Removed Signal Bus entirely (on[] now uses reverse-index in scheduler) - sense-scheduler.onSenseCompleted() triggers dependent senses - kernel no longer constructs Signal objects or calls routeSenseComputeOutput - Removed drizzle-orm dependency from daemon package Refs #308, closes #310
504 lines
13 KiB
TypeScript
504 lines
13 KiB
TypeScript
/**
|
|
* Phase 6 integration tests — hot reload, error isolation, grace period, health.
|
|
*/
|
|
|
|
import { mkdtempSync, rmSync } from "node:fs";
|
|
import { tmpdir } from "node:os";
|
|
import { dirname, join } from "node:path";
|
|
import { fileURLToPath } from "node:url";
|
|
|
|
import type { NerveConfig } from "@uncaged/nerve-core";
|
|
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
|
|
|
import { createKernel } from "../kernel.js";
|
|
import type { Kernel } from "../kernel.js";
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dir = dirname(__filename);
|
|
const MOCK_WORKER = join(__dir, "fixtures", "mock-worker.mjs");
|
|
const ERROR_WORKER = join(__dir, "fixtures", "error-worker.mjs");
|
|
|
|
function makeConfig(overrides: Partial<NerveConfig> = {}): NerveConfig {
|
|
return {
|
|
senses: {
|
|
"cpu-usage": {
|
|
group: "system",
|
|
throttle: null,
|
|
timeout: null,
|
|
gracePeriod: null,
|
|
interval: null,
|
|
on: [],
|
|
},
|
|
},
|
|
workflows: {},
|
|
maxRounds: 10,
|
|
extract: null,
|
|
api: { port: null, token: null, host: "127.0.0.1" },
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
async function pollUntil(
|
|
predicate: () => boolean,
|
|
timeoutMs: number,
|
|
intervalMs = 25,
|
|
): Promise<void> {
|
|
const deadline = Date.now() + timeoutMs;
|
|
while (!predicate()) {
|
|
if (Date.now() >= deadline) {
|
|
throw new Error(`pollUntil timed out after ${timeoutMs}ms`);
|
|
}
|
|
await new Promise<void>((resolve) => setTimeout(resolve, intervalMs));
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Hot Reload — restartGroup
|
|
// ---------------------------------------------------------------------------
|
|
|
|
describe("phase6 — restartGroup", () => {
|
|
let kernel: Kernel | null = null;
|
|
let nerveRoot: string;
|
|
|
|
beforeEach(() => {
|
|
nerveRoot = mkdtempSync(join(tmpdir(), "nerve-phase6-restart-"));
|
|
});
|
|
|
|
afterEach(async () => {
|
|
if (kernel !== null) {
|
|
await kernel.stop();
|
|
kernel = null;
|
|
}
|
|
rmSync(nerveRoot, { recursive: true, force: true });
|
|
});
|
|
|
|
it("restartGroup stops old worker and spawns a new one", async () => {
|
|
const config = makeConfig();
|
|
kernel = createKernel(config, nerveRoot, {
|
|
workerScript: MOCK_WORKER,
|
|
});
|
|
|
|
await kernel.ready;
|
|
|
|
const oldPid = kernel.getWorkerPid("system");
|
|
expect(oldPid).not.toBeNull();
|
|
|
|
await kernel.restartGroup("system");
|
|
|
|
// Wait for new worker to become ready
|
|
await pollUntil(() => {
|
|
const newPid = kernel?.getWorkerPid("system");
|
|
return newPid !== null && newPid !== oldPid;
|
|
}, 12_000);
|
|
|
|
const newPid = kernel.getWorkerPid("system");
|
|
expect(newPid).not.toBeNull();
|
|
expect(newPid).not.toBe(oldPid);
|
|
|
|
kernel.triggerCompute("cpu-usage");
|
|
await pollUntil(
|
|
() =>
|
|
kernel!.logStore.query({
|
|
source: "sense",
|
|
type: "compute-complete",
|
|
refId: "cpu-usage",
|
|
}).length > 0,
|
|
10_000,
|
|
);
|
|
}, 35_000);
|
|
|
|
it("restartGroup on nonexistent group does nothing", async () => {
|
|
const config = makeConfig();
|
|
kernel = createKernel(config, nerveRoot, {
|
|
workerScript: MOCK_WORKER,
|
|
});
|
|
await kernel.ready;
|
|
|
|
// Should not throw
|
|
await kernel.restartGroup("nonexistent");
|
|
}, 5_000);
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Hot Reload — reloadConfig
|
|
// ---------------------------------------------------------------------------
|
|
|
|
describe("phase6 — reloadConfig", () => {
|
|
let kernel: Kernel | null = null;
|
|
let nerveRoot: string;
|
|
|
|
beforeEach(() => {
|
|
nerveRoot = mkdtempSync(join(tmpdir(), "nerve-phase6-reload-"));
|
|
});
|
|
|
|
afterEach(async () => {
|
|
if (kernel !== null) {
|
|
await kernel.stop();
|
|
kernel = null;
|
|
}
|
|
rmSync(nerveRoot, { recursive: true, force: true });
|
|
});
|
|
|
|
it("adds new group when new sense group is introduced", async () => {
|
|
const config = makeConfig();
|
|
kernel = createKernel(config, nerveRoot, {
|
|
workerScript: MOCK_WORKER,
|
|
});
|
|
await kernel.ready;
|
|
|
|
expect(kernel.groups.has("network")).toBe(false);
|
|
|
|
const newConfig: NerveConfig = {
|
|
senses: {
|
|
"cpu-usage": {
|
|
group: "system",
|
|
throttle: null,
|
|
timeout: null,
|
|
gracePeriod: null,
|
|
interval: null,
|
|
on: [],
|
|
},
|
|
"net-rx": {
|
|
group: "network",
|
|
throttle: null,
|
|
timeout: null,
|
|
gracePeriod: null,
|
|
interval: null,
|
|
on: [],
|
|
},
|
|
},
|
|
workflows: {},
|
|
maxRounds: 10,
|
|
extract: null,
|
|
api: { port: null, token: null, host: "127.0.0.1" },
|
|
};
|
|
|
|
kernel.reloadConfig(newConfig);
|
|
|
|
expect(kernel.groups.has("network")).toBe(true);
|
|
|
|
// Wait for the new network worker to start
|
|
await pollUntil(() => kernel?.getWorkerPid("network") !== null, 3000);
|
|
expect(kernel.getWorkerPid("network")).not.toBeNull();
|
|
}, 10_000);
|
|
|
|
it("removes group when all its senses are removed", async () => {
|
|
const config: NerveConfig = {
|
|
senses: {
|
|
"cpu-usage": {
|
|
group: "system",
|
|
throttle: null,
|
|
timeout: null,
|
|
gracePeriod: null,
|
|
interval: null,
|
|
on: [],
|
|
},
|
|
"net-rx": {
|
|
group: "network",
|
|
throttle: null,
|
|
timeout: null,
|
|
gracePeriod: null,
|
|
interval: null,
|
|
on: [],
|
|
},
|
|
},
|
|
workflows: {},
|
|
maxRounds: 10,
|
|
extract: null,
|
|
api: { port: null, token: null, host: "127.0.0.1" },
|
|
};
|
|
kernel = createKernel(config, nerveRoot, {
|
|
workerScript: MOCK_WORKER,
|
|
});
|
|
await kernel.ready;
|
|
|
|
expect(kernel.groups.has("network")).toBe(true);
|
|
|
|
const newConfig: NerveConfig = {
|
|
senses: {
|
|
"cpu-usage": {
|
|
group: "system",
|
|
throttle: null,
|
|
timeout: null,
|
|
gracePeriod: null,
|
|
interval: null,
|
|
on: [],
|
|
},
|
|
},
|
|
workflows: {},
|
|
maxRounds: 10,
|
|
extract: null,
|
|
api: { port: null, token: null, host: "127.0.0.1" },
|
|
};
|
|
|
|
kernel.reloadConfig(newConfig);
|
|
|
|
expect(kernel.groups.has("network")).toBe(false);
|
|
}, 10_000);
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Error Isolation
|
|
// ---------------------------------------------------------------------------
|
|
|
|
describe("phase6 — error isolation", () => {
|
|
let kernel: Kernel | null = null;
|
|
let nerveRoot: string;
|
|
|
|
beforeEach(() => {
|
|
nerveRoot = mkdtempSync(join(tmpdir(), "nerve-phase6-err-"));
|
|
});
|
|
|
|
afterEach(async () => {
|
|
if (kernel !== null) {
|
|
await kernel.stop();
|
|
kernel = null;
|
|
}
|
|
rmSync(nerveRoot, { recursive: true, force: true });
|
|
});
|
|
|
|
it("error from one sense does not crash the worker — other senses still work", async () => {
|
|
const config: NerveConfig = {
|
|
senses: {
|
|
"good-sense": {
|
|
group: "mixed",
|
|
throttle: null,
|
|
timeout: null,
|
|
gracePeriod: null,
|
|
interval: null,
|
|
on: [],
|
|
},
|
|
"bad-sense": {
|
|
group: "mixed",
|
|
throttle: null,
|
|
timeout: null,
|
|
gracePeriod: null,
|
|
interval: null,
|
|
on: [],
|
|
},
|
|
},
|
|
workflows: {},
|
|
maxRounds: 10,
|
|
extract: null,
|
|
api: { port: null, token: null, host: "127.0.0.1" },
|
|
};
|
|
|
|
kernel = createKernel(config, nerveRoot, {
|
|
workerScript: MOCK_WORKER,
|
|
});
|
|
await kernel.ready;
|
|
|
|
kernel.triggerCompute("good-sense");
|
|
await pollUntil(
|
|
() =>
|
|
kernel!.logStore.query({
|
|
source: "sense",
|
|
type: "compute-complete",
|
|
refId: "good-sense",
|
|
}).length > 0,
|
|
10_000,
|
|
);
|
|
|
|
kernel.triggerCompute("bad-sense");
|
|
await pollUntil(
|
|
() =>
|
|
kernel!.logStore.query({
|
|
source: "sense",
|
|
type: "compute-complete",
|
|
refId: "bad-sense",
|
|
}).length > 0,
|
|
10_000,
|
|
);
|
|
}, 10_000);
|
|
|
|
it("error worker sends error messages, kernel still running", async () => {
|
|
const stderrMessages: string[] = [];
|
|
const stderrSpy = ((original) => {
|
|
return (chunk: string | Uint8Array) => {
|
|
stderrMessages.push(String(chunk));
|
|
return original.call(process.stderr, chunk);
|
|
};
|
|
})(process.stderr.write);
|
|
const origWrite = process.stderr.write;
|
|
process.stderr.write = stderrSpy as typeof process.stderr.write;
|
|
|
|
const config = makeConfig();
|
|
kernel = createKernel(config, nerveRoot, {
|
|
workerScript: ERROR_WORKER,
|
|
});
|
|
await kernel.ready;
|
|
|
|
kernel.triggerCompute("cpu-usage");
|
|
|
|
// Wait for the error to be logged
|
|
await pollUntil(() => stderrMessages.some((m) => m.includes("simulated compute error")), 3000);
|
|
|
|
process.stderr.write = origWrite;
|
|
|
|
// Kernel should still be running (not crashed)
|
|
expect(kernel.getWorkerPid("system")).not.toBeNull();
|
|
}, 10_000);
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// getHealth
|
|
// ---------------------------------------------------------------------------
|
|
|
|
describe("phase6 — getHealth", () => {
|
|
let kernel: Kernel | null = null;
|
|
let nerveRoot: string;
|
|
|
|
beforeEach(() => {
|
|
nerveRoot = mkdtempSync(join(tmpdir(), "nerve-phase6-health-"));
|
|
});
|
|
|
|
afterEach(async () => {
|
|
if (kernel !== null) {
|
|
await kernel.stop();
|
|
kernel = null;
|
|
}
|
|
rmSync(nerveRoot, { recursive: true, force: true });
|
|
});
|
|
|
|
it("returns health snapshot with correct shape", async () => {
|
|
const config = makeConfig({
|
|
senses: {
|
|
"cpu-usage": {
|
|
group: "system",
|
|
throttle: null,
|
|
timeout: null,
|
|
gracePeriod: null,
|
|
interval: null,
|
|
on: [],
|
|
},
|
|
"disk-usage": {
|
|
group: "system",
|
|
throttle: null,
|
|
timeout: null,
|
|
gracePeriod: null,
|
|
interval: null,
|
|
on: [],
|
|
},
|
|
"net-rx": {
|
|
group: "network",
|
|
throttle: null,
|
|
timeout: null,
|
|
gracePeriod: null,
|
|
interval: null,
|
|
on: [],
|
|
},
|
|
},
|
|
});
|
|
kernel = createKernel(config, nerveRoot, {
|
|
workerScript: MOCK_WORKER,
|
|
});
|
|
await kernel.ready;
|
|
|
|
const health = kernel.getHealth();
|
|
|
|
expect(health.uptime).toBeGreaterThanOrEqual(0);
|
|
expect(health.activeSenses).toBe(3);
|
|
expect(health.activeGroups).toBe(2);
|
|
expect(health.memoryUsage).toBeDefined();
|
|
expect(typeof health.memoryUsage.heapUsed).toBe("number");
|
|
}, 10_000);
|
|
|
|
it("health reflects config changes after reloadConfig", async () => {
|
|
const config = makeConfig();
|
|
kernel = createKernel(config, nerveRoot, {
|
|
workerScript: MOCK_WORKER,
|
|
});
|
|
await kernel.ready;
|
|
|
|
expect(kernel.getHealth().activeSenses).toBe(1);
|
|
|
|
const newConfig: NerveConfig = {
|
|
senses: {
|
|
"cpu-usage": {
|
|
group: "system",
|
|
throttle: null,
|
|
timeout: null,
|
|
gracePeriod: null,
|
|
interval: null,
|
|
on: [],
|
|
},
|
|
"net-rx": {
|
|
group: "network",
|
|
throttle: null,
|
|
timeout: null,
|
|
gracePeriod: null,
|
|
interval: null,
|
|
on: [],
|
|
},
|
|
},
|
|
workflows: {},
|
|
maxRounds: 10,
|
|
extract: null,
|
|
api: { port: null, token: null, host: "127.0.0.1" },
|
|
};
|
|
kernel.reloadConfig(newConfig);
|
|
|
|
expect(kernel.getHealth().activeSenses).toBe(2);
|
|
expect(kernel.getHealth().activeGroups).toBe(2);
|
|
}, 10_000);
|
|
});
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Auto-respawn on crash (existing test extended)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
describe("phase6 — auto-respawn on worker crash", () => {
|
|
let kernel: Kernel | null = null;
|
|
let nerveRoot: string;
|
|
|
|
beforeEach(() => {
|
|
nerveRoot = mkdtempSync(join(tmpdir(), "nerve-phase6-crash-"));
|
|
});
|
|
|
|
afterEach(async () => {
|
|
if (kernel !== null) {
|
|
await kernel.stop();
|
|
kernel = null;
|
|
}
|
|
rmSync(nerveRoot, { recursive: true, force: true });
|
|
});
|
|
|
|
it("kernel auto-respawns worker and new worker is functional", async () => {
|
|
const config = makeConfig();
|
|
kernel = createKernel(config, nerveRoot, {
|
|
workerScript: MOCK_WORKER,
|
|
});
|
|
await kernel.ready;
|
|
|
|
const originalPid = kernel.getWorkerPid("system");
|
|
expect(originalPid).not.toBeNull();
|
|
|
|
// Kill worker to simulate crash
|
|
process.kill(originalPid as number, "SIGKILL");
|
|
|
|
// Wait for respawn
|
|
await pollUntil(() => {
|
|
const pid = kernel?.getWorkerPid("system");
|
|
return pid !== null && pid !== originalPid;
|
|
}, 12_000);
|
|
|
|
const newPid = kernel.getWorkerPid("system");
|
|
expect(newPid).not.toBeNull();
|
|
expect(newPid).not.toBe(originalPid);
|
|
|
|
kernel.triggerCompute("cpu-usage");
|
|
await pollUntil(
|
|
() =>
|
|
kernel!.logStore.query({
|
|
source: "sense",
|
|
type: "compute-complete",
|
|
refId: "cpu-usage",
|
|
}).length > 0,
|
|
10_000,
|
|
);
|
|
|
|
await kernel.stop();
|
|
kernel = null;
|
|
}, 30_000);
|
|
});
|