refactor(daemon): split kernel.ts into focused modules (#86)
- Extract worker-pool.ts (211 LOC): sense worker fork/shutdown/restart/crash recovery - Extract kernel-file-watch.ts (92 LOC): file change handlers for hot reload - Extract kernel-sense-groups.ts (29 LOC): group lookup utilities - kernel.ts reduced from 617 → 380 LOC (thin orchestrator) - Add worker-pool.test.ts with 8 test cases - No behavior changes, all existing tests unchanged
This commit is contained in:
@@ -0,0 +1,235 @@
|
|||||||
|
import { EventEmitter } from "node:events";
|
||||||
|
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||||
|
|
||||||
|
const mockChildren: MockChild[] = [];
|
||||||
|
|
||||||
|
type MockChild = EventEmitter & {
|
||||||
|
send: ReturnType<typeof vi.fn>;
|
||||||
|
kill: ReturnType<typeof vi.fn>;
|
||||||
|
pid: number;
|
||||||
|
connected: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
function makeMockChild(pid = 1): MockChild {
|
||||||
|
const child = new EventEmitter() as MockChild;
|
||||||
|
child.connected = true;
|
||||||
|
child.send = vi.fn((msg: unknown) => {
|
||||||
|
if (
|
||||||
|
msg !== null &&
|
||||||
|
typeof msg === "object" &&
|
||||||
|
(msg as Record<string, unknown>).type === "shutdown"
|
||||||
|
) {
|
||||||
|
child.connected = false;
|
||||||
|
setImmediate(() => child.emit("exit", 0, null));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
child.kill = vi.fn((_signal?: string) => {
|
||||||
|
child.connected = false;
|
||||||
|
child.emit("exit", null, _signal ?? "SIGKILL");
|
||||||
|
});
|
||||||
|
child.pid = pid;
|
||||||
|
return child;
|
||||||
|
}
|
||||||
|
|
||||||
|
vi.mock("node:child_process", () => ({
|
||||||
|
fork: vi.fn((_script: string, _args: string[], _opts: unknown) => {
|
||||||
|
const child = makeMockChild(mockChildren.length + 1);
|
||||||
|
mockChildren.push(child);
|
||||||
|
return child;
|
||||||
|
}),
|
||||||
|
}));
|
||||||
|
|
||||||
|
const { createSenseWorkerPool } = await import("../worker-pool.js");
|
||||||
|
|
||||||
|
async function flushSetImmediate(): Promise<void> {
|
||||||
|
await new Promise<void>((resolve) => setImmediate(resolve));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function startWorkerWithReady(
|
||||||
|
pool: ReturnType<typeof createSenseWorkerPool>,
|
||||||
|
group: string,
|
||||||
|
): Promise<void> {
|
||||||
|
const pr = pool.startWorker(group);
|
||||||
|
const child = mockChildren[mockChildren.length - 1];
|
||||||
|
child.emit("message", { type: "ready" });
|
||||||
|
await pr;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("createSenseWorkerPool", () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
mockChildren.length = 0;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
vi.useRealTimers();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("forks one child per startWorker and routes IPC to onWorkerMessage", async () => {
|
||||||
|
const onWorkerMessage = vi.fn();
|
||||||
|
const pool = createSenseWorkerPool({
|
||||||
|
nerveRoot: "/tmp/n",
|
||||||
|
workerScript: "/fake/sense-worker.js",
|
||||||
|
onWorkerMessage,
|
||||||
|
sensesForGroup: () => [],
|
||||||
|
onWorkerCrashed: vi.fn(),
|
||||||
|
onBeforeGroupRestart: vi.fn(),
|
||||||
|
isStopped: () => false,
|
||||||
|
});
|
||||||
|
|
||||||
|
await startWorkerWithReady(pool, "g1");
|
||||||
|
expect(mockChildren).toHaveLength(1);
|
||||||
|
const child = mockChildren[0];
|
||||||
|
child.emit("message", { type: "signal", sense: "s", payload: 1 });
|
||||||
|
expect(onWorkerMessage).toHaveBeenCalledWith({ type: "signal", sense: "s", payload: 1 });
|
||||||
|
});
|
||||||
|
|
||||||
|
it("sendCompute delivers to the worker for that group", async () => {
|
||||||
|
const pool = createSenseWorkerPool({
|
||||||
|
nerveRoot: "/tmp/n",
|
||||||
|
workerScript: "/fake/sense-worker.js",
|
||||||
|
onWorkerMessage: vi.fn(),
|
||||||
|
sensesForGroup: () => [],
|
||||||
|
onWorkerCrashed: vi.fn(),
|
||||||
|
onBeforeGroupRestart: vi.fn(),
|
||||||
|
isStopped: () => false,
|
||||||
|
});
|
||||||
|
|
||||||
|
await startWorkerWithReady(pool, "sys");
|
||||||
|
const child = mockChildren[0];
|
||||||
|
pool.sendCompute("sys", "cpu");
|
||||||
|
expect(child.send).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({ type: "compute", sense: "cpu" }),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("hasWorkerForGroup and getWorkerPid reflect running workers", async () => {
|
||||||
|
const pool = createSenseWorkerPool({
|
||||||
|
nerveRoot: "/tmp/n",
|
||||||
|
workerScript: "/fake/sense-worker.js",
|
||||||
|
onWorkerMessage: vi.fn(),
|
||||||
|
sensesForGroup: () => [],
|
||||||
|
onWorkerCrashed: vi.fn(),
|
||||||
|
onBeforeGroupRestart: vi.fn(),
|
||||||
|
isStopped: () => false,
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(pool.hasWorkerForGroup("a")).toBe(false);
|
||||||
|
expect(pool.getWorkerPid("a")).toBeNull();
|
||||||
|
|
||||||
|
await startWorkerWithReady(pool, "a");
|
||||||
|
expect(pool.hasWorkerForGroup("a")).toBe(true);
|
||||||
|
expect(pool.getWorkerPid("a")).toBe(1);
|
||||||
|
expect(pool.activeGroupCount()).toBe(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("evictGroup sends shutdown and removes the entry without waiting", async () => {
|
||||||
|
const pool = createSenseWorkerPool({
|
||||||
|
nerveRoot: "/tmp/n",
|
||||||
|
workerScript: "/fake/sense-worker.js",
|
||||||
|
onWorkerMessage: vi.fn(),
|
||||||
|
sensesForGroup: () => [],
|
||||||
|
onWorkerCrashed: vi.fn(),
|
||||||
|
onBeforeGroupRestart: vi.fn(),
|
||||||
|
isStopped: () => false,
|
||||||
|
});
|
||||||
|
|
||||||
|
await startWorkerWithReady(pool, "x");
|
||||||
|
expect(pool.activeGroupCount()).toBe(1);
|
||||||
|
pool.evictGroup("x");
|
||||||
|
expect(pool.hasWorkerForGroup("x")).toBe(false);
|
||||||
|
expect(mockChildren[0].send).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({ type: "shutdown" }),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("restartGroup invokes onBeforeGroupRestart then respawns", async () => {
|
||||||
|
const onBeforeGroupRestart = vi.fn();
|
||||||
|
const pool = createSenseWorkerPool({
|
||||||
|
nerveRoot: "/tmp/n",
|
||||||
|
workerScript: "/fake/sense-worker.js",
|
||||||
|
onWorkerMessage: vi.fn(),
|
||||||
|
sensesForGroup: () => ["s1"],
|
||||||
|
onWorkerCrashed: vi.fn(),
|
||||||
|
onBeforeGroupRestart,
|
||||||
|
isStopped: () => false,
|
||||||
|
});
|
||||||
|
|
||||||
|
await startWorkerWithReady(pool, "g");
|
||||||
|
expect(mockChildren).toHaveLength(1);
|
||||||
|
|
||||||
|
const p = pool.restartGroup("g");
|
||||||
|
expect(onBeforeGroupRestart).toHaveBeenCalledWith("g");
|
||||||
|
expect(mockChildren[0].send).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({ type: "shutdown" }),
|
||||||
|
);
|
||||||
|
|
||||||
|
await flushSetImmediate();
|
||||||
|
expect(mockChildren).toHaveLength(2);
|
||||||
|
mockChildren[1].emit("message", { type: "ready" });
|
||||||
|
await p;
|
||||||
|
expect(pool.hasWorkerForGroup("g")).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("onWorkerCrashed runs and schedules respawn after non-zero exit", async () => {
|
||||||
|
vi.useFakeTimers({ shouldAdvanceTime: true });
|
||||||
|
const onWorkerCrashed = vi.fn();
|
||||||
|
const pool = createSenseWorkerPool({
|
||||||
|
nerveRoot: "/tmp/n",
|
||||||
|
workerScript: "/fake/sense-worker.js",
|
||||||
|
onWorkerMessage: vi.fn(),
|
||||||
|
sensesForGroup: (g) => (g === "g" ? ["a", "b"] : []),
|
||||||
|
onWorkerCrashed,
|
||||||
|
onBeforeGroupRestart: vi.fn(),
|
||||||
|
isStopped: () => false,
|
||||||
|
});
|
||||||
|
|
||||||
|
await startWorkerWithReady(pool, "g");
|
||||||
|
expect(mockChildren).toHaveLength(1);
|
||||||
|
mockChildren[0].emit("exit", 1, null);
|
||||||
|
expect(onWorkerCrashed).toHaveBeenCalledWith("g");
|
||||||
|
|
||||||
|
await vi.advanceTimersByTimeAsync(1000);
|
||||||
|
expect(mockChildren).toHaveLength(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("shutdownAll sends shutdown to every worker", async () => {
|
||||||
|
const pool = createSenseWorkerPool({
|
||||||
|
nerveRoot: "/tmp/n",
|
||||||
|
workerScript: "/fake/sense-worker.js",
|
||||||
|
onWorkerMessage: vi.fn(),
|
||||||
|
sensesForGroup: () => [],
|
||||||
|
onWorkerCrashed: vi.fn(),
|
||||||
|
onBeforeGroupRestart: vi.fn(),
|
||||||
|
isStopped: () => false,
|
||||||
|
});
|
||||||
|
|
||||||
|
await startWorkerWithReady(pool, "a");
|
||||||
|
await startWorkerWithReady(pool, "b");
|
||||||
|
await pool.shutdownAll();
|
||||||
|
expect(mockChildren[0].send).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({ type: "shutdown" }),
|
||||||
|
);
|
||||||
|
expect(mockChildren[1].send).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({ type: "shutdown" }),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not respawn after crash when isStopped is true", async () => {
|
||||||
|
vi.useFakeTimers({ shouldAdvanceTime: true });
|
||||||
|
const pool = createSenseWorkerPool({
|
||||||
|
nerveRoot: "/tmp/n",
|
||||||
|
workerScript: "/fake/sense-worker.js",
|
||||||
|
onWorkerMessage: vi.fn(),
|
||||||
|
sensesForGroup: () => [],
|
||||||
|
onWorkerCrashed: vi.fn(),
|
||||||
|
onBeforeGroupRestart: vi.fn(),
|
||||||
|
isStopped: () => true,
|
||||||
|
});
|
||||||
|
|
||||||
|
await startWorkerWithReady(pool, "g");
|
||||||
|
const n = mockChildren.length;
|
||||||
|
mockChildren[0].emit("exit", 1, null);
|
||||||
|
await vi.advanceTimersByTimeAsync(1000);
|
||||||
|
expect(mockChildren.length).toBe(n);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,92 @@
|
|||||||
|
/**
|
||||||
|
* File-watcher callbacks for nerve.yaml / sense / workflow sources (hot reload wiring).
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { readFileSync } from "node:fs";
|
||||||
|
import { join } from "node:path";
|
||||||
|
|
||||||
|
import type { NerveConfig } from "@uncaged/nerve-core";
|
||||||
|
import { parseNerveConfig } from "@uncaged/nerve-core";
|
||||||
|
|
||||||
|
import type { LogStore } from "@uncaged/nerve-store";
|
||||||
|
import type { WorkflowManager } from "./workflow-manager.js";
|
||||||
|
|
||||||
|
export type KernelFileWatchDeps = {
|
||||||
|
nerveRoot: string;
|
||||||
|
getConfig: () => NerveConfig;
|
||||||
|
logStore: LogStore;
|
||||||
|
workflowManager: WorkflowManager;
|
||||||
|
restartGroup: (group: string) => Promise<void>;
|
||||||
|
reloadConfig: (newConfig: NerveConfig) => void;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type KernelFileWatchHandlers = {
|
||||||
|
onSenseFileChange: (senseName: string) => void;
|
||||||
|
onWorkflowFileChange: (workflowName: string) => void;
|
||||||
|
onConfigFileChange: () => void;
|
||||||
|
};
|
||||||
|
|
||||||
|
export function createKernelFileWatchHandlers(deps: KernelFileWatchDeps): KernelFileWatchHandlers {
|
||||||
|
function onSenseFileChange(senseName: string): void {
|
||||||
|
const sc = deps.getConfig().senses[senseName];
|
||||||
|
if (sc === undefined) return;
|
||||||
|
process.stderr.write(
|
||||||
|
`[kernel] sense file changed: "${senseName}", restarting group "${sc.group}"\n`,
|
||||||
|
);
|
||||||
|
deps.logStore.append({
|
||||||
|
source: "system",
|
||||||
|
type: "sense_reload",
|
||||||
|
refId: senseName,
|
||||||
|
payload: null,
|
||||||
|
ts: Date.now(),
|
||||||
|
});
|
||||||
|
deps.restartGroup(sc.group).catch((e) => {
|
||||||
|
const msg = e instanceof Error ? e.message : String(e);
|
||||||
|
process.stderr.write(`[kernel] restartGroup error: ${msg}\n`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function onWorkflowFileChange(workflowName: string): void {
|
||||||
|
process.stderr.write(
|
||||||
|
`[kernel] workflow file changed: "${workflowName}", draining and respawning worker\n`,
|
||||||
|
);
|
||||||
|
deps.logStore.append({
|
||||||
|
source: "system",
|
||||||
|
type: "workflow_reload",
|
||||||
|
refId: workflowName,
|
||||||
|
payload: null,
|
||||||
|
ts: Date.now(),
|
||||||
|
});
|
||||||
|
deps.workflowManager.drainAndRespawn(workflowName).catch((e) => {
|
||||||
|
const msg = e instanceof Error ? e.message : String(e);
|
||||||
|
process.stderr.write(`[kernel] drainAndRespawn error for "${workflowName}": ${msg}\n`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function onConfigFileChange(): void {
|
||||||
|
process.stderr.write("[kernel] nerve.yaml changed, reloading config\n");
|
||||||
|
deps.logStore.append({
|
||||||
|
source: "system",
|
||||||
|
type: "config_reload",
|
||||||
|
refId: null,
|
||||||
|
payload: null,
|
||||||
|
ts: Date.now(),
|
||||||
|
});
|
||||||
|
try {
|
||||||
|
const raw = readFileSync(join(deps.nerveRoot, "nerve.yaml"), "utf8");
|
||||||
|
const parseResult = parseNerveConfig(raw);
|
||||||
|
if (!parseResult.ok) {
|
||||||
|
process.stderr.write(
|
||||||
|
`[kernel] config parse error, keeping current config: ${parseResult.error.message}\n`,
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
deps.reloadConfig(parseResult.value);
|
||||||
|
} catch (e) {
|
||||||
|
const msg = e instanceof Error ? e.message : String(e);
|
||||||
|
process.stderr.write(`[kernel] failed to read nerve.yaml, keeping current config: ${msg}\n`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { onSenseFileChange, onWorkflowFileChange, onConfigFileChange };
|
||||||
|
}
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
import type { NerveConfig } from "@uncaged/nerve-core";
|
||||||
|
|
||||||
|
export function groupForSense(config: NerveConfig, senseName: string): string | null {
|
||||||
|
const senseConfig = config.senses[senseName];
|
||||||
|
if (senseConfig === undefined) return null;
|
||||||
|
return senseConfig.group;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function senseNamesInGroup(config: NerveConfig, group: string): string[] {
|
||||||
|
return Object.entries(config.senses)
|
||||||
|
.filter(([, sc]) => sc.group === group)
|
||||||
|
.map(([name]) => name);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function collectSenseGroups(cfg: NerveConfig): Set<string> {
|
||||||
|
const result = new Set<string>();
|
||||||
|
for (const sc of Object.values(cfg.senses)) {
|
||||||
|
result.add(sc.group);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function senseNamesInGroupAsSet(cfg: NerveConfig, group: string): Set<string> {
|
||||||
|
const result = new Set<string>();
|
||||||
|
for (const [name, sc] of Object.entries(cfg.senses)) {
|
||||||
|
if (sc.group === group) result.add(name);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
+56
-293
@@ -1,43 +1,32 @@
|
|||||||
/**
|
/**
|
||||||
* Kernel — the main orchestrator that ties sense workers, signal bus, and
|
* Kernel — ties sense workers, signal bus, reflex scheduler, workflow manager,
|
||||||
* reflex scheduler together.
|
* optional file watcher, and daemon IPC.
|
||||||
*
|
|
||||||
* Responsibilities:
|
|
||||||
* - Spawn one child process per sense group (via fork)
|
|
||||||
* - Route SignalMessage from workers → SignalBus
|
|
||||||
* - Route ErrorMessage from workers → stderr log
|
|
||||||
* - Drive compute triggers via ReflexScheduler
|
|
||||||
* - Graceful shutdown: stop scheduler, send shutdown to all workers
|
|
||||||
* - Hot reload: restartGroup, reloadConfig, file watcher integration
|
|
||||||
* - Health reporting: getHealth
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { fork } from "node:child_process";
|
import { join } from "node:path";
|
||||||
import type { ChildProcess } from "node:child_process";
|
|
||||||
import { readFileSync } from "node:fs";
|
|
||||||
import { dirname, join } from "node:path";
|
|
||||||
import { fileURLToPath } from "node:url";
|
|
||||||
|
|
||||||
import type { NerveConfig, SenseInfo, Signal } from "@uncaged/nerve-core";
|
import type { NerveConfig, SenseInfo, Signal } from "@uncaged/nerve-core";
|
||||||
import { parseNerveConfig, routeSenseComputeOutput } from "@uncaged/nerve-core";
|
import { routeSenseComputeOutput } from "@uncaged/nerve-core";
|
||||||
|
|
||||||
|
import { createLogStore } from "@uncaged/nerve-store";
|
||||||
|
import type { LogStore } from "@uncaged/nerve-store";
|
||||||
import { createDaemonIpcServer } from "./daemon-ipc.js";
|
import { createDaemonIpcServer } from "./daemon-ipc.js";
|
||||||
import type { DaemonIpcServer } from "./daemon-ipc.js";
|
import type { DaemonIpcServer } from "./daemon-ipc.js";
|
||||||
import { createFileWatcher } from "./file-watcher.js";
|
import { createFileWatcher } from "./file-watcher.js";
|
||||||
import type { FileWatcher } from "./file-watcher.js";
|
import type { FileWatcher } from "./file-watcher.js";
|
||||||
import type { ComputeMessage, ShutdownMessage } from "./ipc.js";
|
|
||||||
import { parseWorkerMessage } from "./ipc.js";
|
import { parseWorkerMessage } from "./ipc.js";
|
||||||
import { createLogStore } from "@uncaged/nerve-store";
|
import { createKernelFileWatchHandlers } from "./kernel-file-watch.js";
|
||||||
import type { LogStore } from "@uncaged/nerve-store";
|
import {
|
||||||
|
collectSenseGroups,
|
||||||
|
groupForSense,
|
||||||
|
senseNamesInGroup,
|
||||||
|
senseNamesInGroupAsSet,
|
||||||
|
} from "./kernel-sense-groups.js";
|
||||||
import { createReflexScheduler } from "./reflex-scheduler.js";
|
import { createReflexScheduler } from "./reflex-scheduler.js";
|
||||||
import type { ReflexScheduler } from "./reflex-scheduler.js";
|
import type { ReflexScheduler } from "./reflex-scheduler.js";
|
||||||
import { createSignalBus } from "./signal-bus.js";
|
import { createSignalBus } from "./signal-bus.js";
|
||||||
import type { SignalBus } from "./signal-bus.js";
|
import type { SignalBus } from "./signal-bus.js";
|
||||||
import {
|
import { createSenseWorkerPool, resolveWorkerScript } from "./worker-pool.js";
|
||||||
formatCapturedStderrTail,
|
|
||||||
formatChildExitSummary,
|
|
||||||
teeCapturedStderr,
|
|
||||||
} from "./worker-fork-support.js";
|
|
||||||
import { createWorkflowManager } from "./workflow-manager.js";
|
import { createWorkflowManager } from "./workflow-manager.js";
|
||||||
import type { WorkflowManager } from "./workflow-manager.js";
|
import type { WorkflowManager } from "./workflow-manager.js";
|
||||||
|
|
||||||
@@ -57,93 +46,19 @@ export type Kernel = {
|
|||||||
bus: SignalBus;
|
bus: SignalBus;
|
||||||
logStore: LogStore;
|
logStore: LogStore;
|
||||||
workflowManager: WorkflowManager;
|
workflowManager: WorkflowManager;
|
||||||
/** Resolves when all workers have sent their initial "ready" message. */
|
|
||||||
ready: Promise<void>;
|
ready: Promise<void>;
|
||||||
/** Returns the PID of the worker process for a given group, or null if not found. */
|
|
||||||
getWorkerPid: (group: string) => number | null;
|
getWorkerPid: (group: string) => number | null;
|
||||||
/** Sends a compute message to the worker responsible for the given sense. */
|
|
||||||
triggerCompute: (senseName: string) => void;
|
triggerCompute: (senseName: string) => void;
|
||||||
/**
|
|
||||||
* On-demand sense trigger — looks up the group for `senseName`, finds its worker,
|
|
||||||
* and sends a compute message. Throws if the sense is unknown.
|
|
||||||
*/
|
|
||||||
triggerSense: (senseName: string) => void;
|
triggerSense: (senseName: string) => void;
|
||||||
/** Gracefully restart a group worker (wait for exit, then respawn). */
|
|
||||||
restartGroup: (group: string) => Promise<void>;
|
restartGroup: (group: string) => Promise<void>;
|
||||||
/** Reload config from a new NerveConfig, incrementally updating scheduler and workers.
|
|
||||||
* Note: any pending/throttled computes in the old scheduler are silently dropped on reload.
|
|
||||||
* In-flight state is not preserved across reloadConfig. */
|
|
||||||
reloadConfig: (newConfig: NerveConfig) => void;
|
reloadConfig: (newConfig: NerveConfig) => void;
|
||||||
/** Return daemon health info. */
|
|
||||||
getHealth: () => KernelHealth;
|
getHealth: () => KernelHealth;
|
||||||
};
|
};
|
||||||
|
|
||||||
type WorkerEntry = {
|
|
||||||
group: string;
|
|
||||||
process: ChildProcess;
|
|
||||||
};
|
|
||||||
|
|
||||||
function resolveWorkerScript(): string {
|
|
||||||
const __filename = fileURLToPath(import.meta.url);
|
|
||||||
const __dir = dirname(__filename);
|
|
||||||
return join(__dir, "sense-worker.js");
|
|
||||||
}
|
|
||||||
|
|
||||||
function spawnWorker(
|
|
||||||
nerveRoot: string,
|
|
||||||
group: string,
|
|
||||||
workerScript: string,
|
|
||||||
stderrTail: { value: string },
|
|
||||||
): ChildProcess {
|
|
||||||
const child = fork(workerScript, ["--group", group, "--root", nerveRoot], {
|
|
||||||
stdio: ["ignore", "inherit", "pipe", "ipc"],
|
|
||||||
});
|
|
||||||
teeCapturedStderr(child, stderrTail);
|
|
||||||
// Prevent unhandled EPIPE when writing to a child whose IPC channel closed
|
|
||||||
child.on("error", (err) => {
|
|
||||||
if ((err as NodeJS.ErrnoException).code !== "EPIPE") {
|
|
||||||
console.error("[worker] error:", err.message);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
return child;
|
|
||||||
}
|
|
||||||
|
|
||||||
function sendCompute(worker: ChildProcess, senseName: string): void {
|
|
||||||
// worker.connected is false when the IPC channel has been closed (e.g. worker crashed)
|
|
||||||
if (worker.connected === false) return;
|
|
||||||
const msg: ComputeMessage = { type: "compute", sense: senseName };
|
|
||||||
try {
|
|
||||||
worker.send(msg);
|
|
||||||
} catch {
|
|
||||||
// IPC channel closed between connected check and send
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function sendShutdown(worker: ChildProcess): void {
|
|
||||||
if (worker.connected === false) return;
|
|
||||||
const msg: ShutdownMessage = { type: "shutdown" };
|
|
||||||
try {
|
|
||||||
worker.send(msg);
|
|
||||||
} catch {
|
|
||||||
// IPC channel closed between connected check and send
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function groupForSense(config: NerveConfig, senseName: string): string | null {
|
|
||||||
const senseConfig = config.senses[senseName];
|
|
||||||
if (senseConfig === undefined) return null;
|
|
||||||
return senseConfig.group;
|
|
||||||
}
|
|
||||||
|
|
||||||
export type KernelOptions = {
|
export type KernelOptions = {
|
||||||
workerScript?: string | null;
|
workerScript?: string | null;
|
||||||
enableFileWatcher?: boolean;
|
enableFileWatcher?: boolean;
|
||||||
/** Override the LogStore instance (useful for testing). */
|
|
||||||
logStore?: LogStore;
|
logStore?: LogStore;
|
||||||
/**
|
|
||||||
* Unix socket path for the daemon IPC server (used by CLI to send trigger-workflow).
|
|
||||||
* When null, the IPC server is not started (e.g. during tests).
|
|
||||||
*/
|
|
||||||
ipcSocketPath?: string | null;
|
ipcSocketPath?: string | null;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -184,7 +99,6 @@ export function createKernel(
|
|||||||
groups.add(senseConfig.group);
|
groups.add(senseConfig.group);
|
||||||
}
|
}
|
||||||
|
|
||||||
const workers = new Map<string, WorkerEntry>();
|
|
||||||
let stopped = false;
|
let stopped = false;
|
||||||
let scheduler: ReflexScheduler = null as unknown as ReflexScheduler;
|
let scheduler: ReflexScheduler = null as unknown as ReflexScheduler;
|
||||||
|
|
||||||
@@ -194,10 +108,10 @@ export function createKernel(
|
|||||||
});
|
});
|
||||||
let pendingReadyCount = groups.size > 0 ? groups.size : 0;
|
let pendingReadyCount = groups.size > 0 ? groups.size : 0;
|
||||||
|
|
||||||
function sensesForGroup(group: string): string[] {
|
function clearSchedulerForGroup(group: string): void {
|
||||||
return Object.entries(config.senses)
|
for (const senseName of senseNamesInGroup(config, group)) {
|
||||||
.filter(([, sc]) => sc.group === group)
|
scheduler.onComputeComplete(senseName);
|
||||||
.map(([name]) => name);
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function handleWorkerMessage(raw: unknown): void {
|
function handleWorkerMessage(raw: unknown): void {
|
||||||
@@ -259,50 +173,17 @@ export function createKernel(
|
|||||||
}
|
}
|
||||||
scheduler.onComputeComplete(msg.sense);
|
scheduler.onComputeComplete(msg.sense);
|
||||||
}
|
}
|
||||||
|
|
||||||
// health-response is handled externally by the caller; no action needed here
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function startWorker(group: string): Promise<void> {
|
const senseWorkerPool = createSenseWorkerPool({
|
||||||
const stderrTail = { value: "" };
|
nerveRoot,
|
||||||
const child = spawnWorker(nerveRoot, group, workerScript, stderrTail);
|
workerScript,
|
||||||
|
onWorkerMessage: handleWorkerMessage,
|
||||||
let workerReadyResolve: (() => void) | undefined;
|
sensesForGroup: (group) => senseNamesInGroup(config, group),
|
||||||
const workerReady = new Promise<void>((resolve) => {
|
onWorkerCrashed: clearSchedulerForGroup,
|
||||||
workerReadyResolve = resolve;
|
onBeforeGroupRestart: clearSchedulerForGroup,
|
||||||
});
|
isStopped: () => stopped,
|
||||||
|
});
|
||||||
child.on("message", (raw: unknown) => {
|
|
||||||
const result = parseWorkerMessage(raw);
|
|
||||||
if (result.ok && result.value.type === "ready") {
|
|
||||||
workerReadyResolve?.();
|
|
||||||
}
|
|
||||||
handleWorkerMessage(raw);
|
|
||||||
});
|
|
||||||
|
|
||||||
child.on("exit", (code, signal) => {
|
|
||||||
const summary = formatChildExitSummary(code, signal ?? null);
|
|
||||||
process.stderr.write(
|
|
||||||
`[kernel] worker for group "${group}" exited (${summary})${formatCapturedStderrTail(stderrTail.value)}\n`,
|
|
||||||
);
|
|
||||||
// Resolve ready in case the worker exits before sending ready (prevents hangs)
|
|
||||||
workerReadyResolve?.();
|
|
||||||
if (!stopped && code !== 0) {
|
|
||||||
process.stderr.write(`[kernel] respawning worker for group "${group}" in 1s\n`);
|
|
||||||
for (const senseName of sensesForGroup(group)) {
|
|
||||||
scheduler.onComputeComplete(senseName);
|
|
||||||
}
|
|
||||||
setTimeout(() => {
|
|
||||||
if (!stopped) {
|
|
||||||
startWorker(group);
|
|
||||||
}
|
|
||||||
}, 1000);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
workers.set(group, { group, process: child });
|
|
||||||
return workerReady;
|
|
||||||
}
|
|
||||||
|
|
||||||
function triggerFn(senseName: string): void {
|
function triggerFn(senseName: string): void {
|
||||||
const group = groupForSense(config, senseName);
|
const group = groupForSense(config, senseName);
|
||||||
@@ -310,12 +191,7 @@ export function createKernel(
|
|||||||
process.stderr.write(`[kernel] triggerFn: unknown sense "${senseName}"\n`);
|
process.stderr.write(`[kernel] triggerFn: unknown sense "${senseName}"\n`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const entry = workers.get(group);
|
senseWorkerPool.sendCompute(group, senseName);
|
||||||
if (entry === undefined) {
|
|
||||||
process.stderr.write(`[kernel] triggerFn: no worker for group "${group}"\n`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
sendCompute(entry.process, senseName);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function triggerSense(senseName: string): void {
|
function triggerSense(senseName: string): void {
|
||||||
@@ -323,11 +199,10 @@ export function createKernel(
|
|||||||
if (group === null) {
|
if (group === null) {
|
||||||
throw new Error(`Unknown sense: "${senseName}"`);
|
throw new Error(`Unknown sense: "${senseName}"`);
|
||||||
}
|
}
|
||||||
const entry = workers.get(group);
|
if (!senseWorkerPool.hasWorkerForGroup(group)) {
|
||||||
if (entry === undefined) {
|
|
||||||
throw new Error(`No worker running for group "${group}" (sense: "${senseName}")`);
|
throw new Error(`No worker running for group "${group}" (sense: "${senseName}")`);
|
||||||
}
|
}
|
||||||
sendCompute(entry.process, senseName);
|
senseWorkerPool.sendCompute(group, senseName);
|
||||||
}
|
}
|
||||||
|
|
||||||
scheduler = createReflexScheduler(config, bus, triggerFn, {
|
scheduler = createReflexScheduler(config, bus, triggerFn, {
|
||||||
@@ -339,63 +214,13 @@ export function createKernel(
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (const group of groups) {
|
for (const group of groups) {
|
||||||
startWorker(group);
|
senseWorkerPool.startWorker(group);
|
||||||
}
|
|
||||||
|
|
||||||
function waitForExit(child: ChildProcess, timeoutMs: number): Promise<void> {
|
|
||||||
return new Promise((resolve) => {
|
|
||||||
const timer = setTimeout(() => {
|
|
||||||
child.kill("SIGKILL");
|
|
||||||
resolve();
|
|
||||||
}, timeoutMs);
|
|
||||||
child.once("exit", () => {
|
|
||||||
clearTimeout(timer);
|
|
||||||
resolve();
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- restartGroup: gracefully stop worker, then respawn and await ready ---
|
|
||||||
async function restartGroup(group: string): Promise<void> {
|
|
||||||
const entry = workers.get(group);
|
|
||||||
if (entry === undefined) return;
|
|
||||||
|
|
||||||
for (const senseName of sensesForGroup(group)) {
|
|
||||||
scheduler.onComputeComplete(senseName);
|
|
||||||
}
|
|
||||||
|
|
||||||
sendShutdown(entry.process);
|
|
||||||
await waitForExit(entry.process, 5000);
|
|
||||||
|
|
||||||
if (!stopped) {
|
|
||||||
await startWorker(group);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function collectGroups(cfg: NerveConfig): Set<string> {
|
|
||||||
const result = new Set<string>();
|
|
||||||
for (const sc of Object.values(cfg.senses)) {
|
|
||||||
result.add(sc.group);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
function sensesForGroupInConfig(cfg: NerveConfig, group: string): Set<string> {
|
|
||||||
const result = new Set<string>();
|
|
||||||
for (const [name, sc] of Object.entries(cfg.senses)) {
|
|
||||||
if (sc.group === group) result.add(name);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function removeStaleGroups(oldGroups: Set<string>, newGroups: Set<string>): void {
|
function removeStaleGroups(oldGroups: Set<string>, newGroups: Set<string>): void {
|
||||||
for (const g of oldGroups) {
|
for (const g of oldGroups) {
|
||||||
if (newGroups.has(g)) continue;
|
if (newGroups.has(g)) continue;
|
||||||
const entry = workers.get(g);
|
senseWorkerPool.evictGroup(g);
|
||||||
if (entry !== undefined) {
|
|
||||||
sendShutdown(entry.process);
|
|
||||||
workers.delete(g);
|
|
||||||
}
|
|
||||||
groups.delete(g);
|
groups.delete(g);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -404,27 +229,25 @@ export function createKernel(
|
|||||||
for (const g of newGroups) {
|
for (const g of newGroups) {
|
||||||
if (oldGroups.has(g)) continue;
|
if (oldGroups.has(g)) continue;
|
||||||
groups.add(g);
|
groups.add(g);
|
||||||
if (!stopped) startWorker(g);
|
if (!stopped) {
|
||||||
|
senseWorkerPool.startWorker(g);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function reloadConfig(newConfig: NerveConfig): void {
|
function reloadConfig(newConfig: NerveConfig): void {
|
||||||
const oldGroups = collectGroups(config);
|
const oldGroups = collectSenseGroups(config);
|
||||||
const oldConfig = config;
|
const oldConfig = config;
|
||||||
const oldWorkflows = config.workflows ?? {};
|
const oldWorkflows = config.workflows ?? {};
|
||||||
config = newConfig;
|
config = newConfig;
|
||||||
// Note: pending/throttled computes in the old scheduler are silently dropped here.
|
|
||||||
// In-flight state is not preserved across reloadConfig.
|
|
||||||
scheduler.stop();
|
scheduler.stop();
|
||||||
scheduler = createReflexScheduler(config, bus, triggerFn, {
|
scheduler = createReflexScheduler(config, bus, triggerFn, {
|
||||||
logStore,
|
logStore,
|
||||||
});
|
});
|
||||||
// Update workflow concurrency/overflow config incrementally — no restart needed
|
|
||||||
workflowManager.updateConfig(newConfig);
|
workflowManager.updateConfig(newConfig);
|
||||||
|
|
||||||
const newWorkflows = newConfig.workflows ?? {};
|
const newWorkflows = newConfig.workflows ?? {};
|
||||||
|
|
||||||
// Drain + remove workers for deleted workflows
|
|
||||||
for (const workflowName of Object.keys(oldWorkflows)) {
|
for (const workflowName of Object.keys(oldWorkflows)) {
|
||||||
if (!(workflowName in newWorkflows)) {
|
if (!(workflowName in newWorkflows)) {
|
||||||
process.stderr.write(
|
process.stderr.write(
|
||||||
@@ -439,20 +262,17 @@ export function createKernel(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const newGroups = collectGroups(newConfig);
|
const newGroups = collectSenseGroups(newConfig);
|
||||||
removeStaleGroups(oldGroups, newGroups);
|
removeStaleGroups(oldGroups, newGroups);
|
||||||
addNewGroups(oldGroups, newGroups);
|
addNewGroups(oldGroups, newGroups);
|
||||||
|
|
||||||
// Restart existing groups that gained new senses — the running worker process
|
|
||||||
// was spawned with the old config and will report "Unknown sense" for any newly
|
|
||||||
// added sense until it is restarted.
|
|
||||||
for (const g of newGroups) {
|
for (const g of newGroups) {
|
||||||
if (!oldGroups.has(g)) continue; // already handled by addNewGroups
|
if (!oldGroups.has(g)) continue;
|
||||||
const oldSenses = sensesForGroupInConfig(oldConfig, g);
|
const oldSenses = senseNamesInGroupAsSet(oldConfig, g);
|
||||||
const newSenses = sensesForGroupInConfig(newConfig, g);
|
const newSenses = senseNamesInGroupAsSet(newConfig, g);
|
||||||
const gained = [...newSenses].some((s) => !oldSenses.has(s));
|
const gained = [...newSenses].some((s) => !oldSenses.has(s));
|
||||||
if (gained) {
|
if (gained) {
|
||||||
restartGroup(g).catch((e) => {
|
senseWorkerPool.restartGroup(g).catch((e) => {
|
||||||
const msg = e instanceof Error ? e.message : String(e);
|
const msg = e instanceof Error ? e.message : String(e);
|
||||||
process.stderr.write(`[kernel] reloadConfig restartGroup error for "${g}": ${msg}\n`);
|
process.stderr.write(`[kernel] reloadConfig restartGroup error for "${g}": ${msg}\n`);
|
||||||
});
|
});
|
||||||
@@ -464,80 +284,28 @@ export function createKernel(
|
|||||||
return {
|
return {
|
||||||
uptime: Date.now() - startTime,
|
uptime: Date.now() - startTime,
|
||||||
activeSenses: Object.keys(config.senses).length,
|
activeSenses: Object.keys(config.senses).length,
|
||||||
activeGroups: workers.size,
|
activeGroups: senseWorkerPool.activeGroupCount(),
|
||||||
pendingComputes: 0,
|
pendingComputes: 0,
|
||||||
activeWorkflows: workflowManager.totalActiveCount(),
|
activeWorkflows: workflowManager.totalActiveCount(),
|
||||||
memoryUsage: process.memoryUsage(),
|
memoryUsage: process.memoryUsage(),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
function handleSenseFileChange(senseName: string): void {
|
const fileWatchHandlers = createKernelFileWatchHandlers({
|
||||||
const sc = config.senses[senseName];
|
nerveRoot,
|
||||||
if (sc === undefined) return;
|
getConfig: () => config,
|
||||||
process.stderr.write(
|
logStore,
|
||||||
`[kernel] sense file changed: "${senseName}", restarting group "${sc.group}"\n`,
|
workflowManager,
|
||||||
);
|
restartGroup: (group) => senseWorkerPool.restartGroup(group),
|
||||||
logStore.append({
|
reloadConfig,
|
||||||
source: "system",
|
});
|
||||||
type: "sense_reload",
|
|
||||||
refId: senseName,
|
|
||||||
payload: null,
|
|
||||||
ts: Date.now(),
|
|
||||||
});
|
|
||||||
restartGroup(sc.group).catch((e) => {
|
|
||||||
const msg = e instanceof Error ? e.message : String(e);
|
|
||||||
process.stderr.write(`[kernel] restartGroup error: ${msg}\n`);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function handleWorkflowFileChange(workflowName: string): void {
|
|
||||||
process.stderr.write(
|
|
||||||
`[kernel] workflow file changed: "${workflowName}", draining and respawning worker\n`,
|
|
||||||
);
|
|
||||||
logStore.append({
|
|
||||||
source: "system",
|
|
||||||
type: "workflow_reload",
|
|
||||||
refId: workflowName,
|
|
||||||
payload: null,
|
|
||||||
ts: Date.now(),
|
|
||||||
});
|
|
||||||
workflowManager.drainAndRespawn(workflowName).catch((e) => {
|
|
||||||
const msg = e instanceof Error ? e.message : String(e);
|
|
||||||
process.stderr.write(`[kernel] drainAndRespawn error for "${workflowName}": ${msg}\n`);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function handleConfigFileChange(): void {
|
|
||||||
process.stderr.write("[kernel] nerve.yaml changed, reloading config\n");
|
|
||||||
logStore.append({
|
|
||||||
source: "system",
|
|
||||||
type: "config_reload",
|
|
||||||
refId: null,
|
|
||||||
payload: null,
|
|
||||||
ts: Date.now(),
|
|
||||||
});
|
|
||||||
try {
|
|
||||||
const raw = readFileSync(join(nerveRoot, "nerve.yaml"), "utf8");
|
|
||||||
const parseResult = parseNerveConfig(raw);
|
|
||||||
if (!parseResult.ok) {
|
|
||||||
process.stderr.write(
|
|
||||||
`[kernel] config parse error, keeping current config: ${parseResult.error.message}\n`,
|
|
||||||
);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
reloadConfig(parseResult.value);
|
|
||||||
} catch (e) {
|
|
||||||
const msg = e instanceof Error ? e.message : String(e);
|
|
||||||
process.stderr.write(`[kernel] failed to read nerve.yaml, keeping current config: ${msg}\n`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let fileWatcher: FileWatcher | null = null;
|
let fileWatcher: FileWatcher | null = null;
|
||||||
if (options.enableFileWatcher) {
|
if (options.enableFileWatcher) {
|
||||||
fileWatcher = createFileWatcher(nerveRoot, (change) => {
|
fileWatcher = createFileWatcher(nerveRoot, (change) => {
|
||||||
if (change.kind === "sense") handleSenseFileChange(change.senseName);
|
if (change.kind === "sense") fileWatchHandlers.onSenseFileChange(change.senseName);
|
||||||
if (change.kind === "config") handleConfigFileChange();
|
if (change.kind === "config") fileWatchHandlers.onConfigFileChange();
|
||||||
if (change.kind === "workflow") handleWorkflowFileChange(change.workflowName);
|
if (change.kind === "workflow") fileWatchHandlers.onWorkflowFileChange(change.workflowName);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -577,12 +345,7 @@ export function createKernel(
|
|||||||
}
|
}
|
||||||
scheduler.stop();
|
scheduler.stop();
|
||||||
await workflowManager.stop();
|
await workflowManager.stop();
|
||||||
const exitPromises: Promise<void>[] = [];
|
await senseWorkerPool.shutdownAll();
|
||||||
for (const entry of workers.values()) {
|
|
||||||
sendShutdown(entry.process);
|
|
||||||
exitPromises.push(waitForExit(entry.process, 5000));
|
|
||||||
}
|
|
||||||
await Promise.all(exitPromises);
|
|
||||||
logStore.append({
|
logStore.append({
|
||||||
source: "system",
|
source: "system",
|
||||||
type: "stop",
|
type: "stop",
|
||||||
@@ -594,7 +357,7 @@ export function createKernel(
|
|||||||
}
|
}
|
||||||
|
|
||||||
function getWorkerPid(group: string): number | null {
|
function getWorkerPid(group: string): number | null {
|
||||||
return workers.get(group)?.process.pid ?? null;
|
return senseWorkerPool.getWorkerPid(group);
|
||||||
}
|
}
|
||||||
|
|
||||||
const senseCount = Object.keys(config.senses).length;
|
const senseCount = Object.keys(config.senses).length;
|
||||||
@@ -610,7 +373,7 @@ export function createKernel(
|
|||||||
getWorkerPid,
|
getWorkerPid,
|
||||||
triggerCompute: triggerFn,
|
triggerCompute: triggerFn,
|
||||||
triggerSense,
|
triggerSense,
|
||||||
restartGroup,
|
restartGroup: (group) => senseWorkerPool.restartGroup(group),
|
||||||
reloadConfig,
|
reloadConfig,
|
||||||
getHealth,
|
getHealth,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -0,0 +1,211 @@
|
|||||||
|
/**
|
||||||
|
* Sense worker pool — forked child processes per sense group (IPC lifecycle).
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { fork } from "node:child_process";
|
||||||
|
import type { ChildProcess } from "node:child_process";
|
||||||
|
import { dirname, join } from "node:path";
|
||||||
|
import { fileURLToPath } from "node:url";
|
||||||
|
|
||||||
|
import type { ComputeMessage, ShutdownMessage } from "./ipc.js";
|
||||||
|
import { parseWorkerMessage } from "./ipc.js";
|
||||||
|
import {
|
||||||
|
formatCapturedStderrTail,
|
||||||
|
formatChildExitSummary,
|
||||||
|
teeCapturedStderr,
|
||||||
|
} from "./worker-fork-support.js";
|
||||||
|
|
||||||
|
export function resolveWorkerScript(): string {
|
||||||
|
const __filename = fileURLToPath(import.meta.url);
|
||||||
|
const __dir = dirname(__filename);
|
||||||
|
return join(__dir, "sense-worker.js");
|
||||||
|
}
|
||||||
|
|
||||||
|
type WorkerEntry = {
|
||||||
|
group: string;
|
||||||
|
process: ChildProcess;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type SenseWorkerPoolOptions = {
|
||||||
|
nerveRoot: string;
|
||||||
|
workerScript: string;
|
||||||
|
/** Invoked for every IPC message from a worker (including ready / signal / error). */
|
||||||
|
onWorkerMessage: (raw: unknown) => void;
|
||||||
|
/** Sense names in a group — used when clearing scheduler state on crash or restart. */
|
||||||
|
sensesForGroup: (group: string) => string[];
|
||||||
|
/**
|
||||||
|
* Called when a worker exits with non-zero code before scheduling a respawn
|
||||||
|
* (scheduler should release pending computes for senses in that group).
|
||||||
|
*/
|
||||||
|
onWorkerCrashed: (group: string) => void;
|
||||||
|
/**
|
||||||
|
* Called at the beginning of `restartGroup` before shutdown
|
||||||
|
* (same scheduler cleanup as crash path).
|
||||||
|
*/
|
||||||
|
onBeforeGroupRestart: (group: string) => void;
|
||||||
|
isStopped: () => boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
export type SenseWorkerPool = {
|
||||||
|
startWorker: (group: string) => Promise<void>;
|
||||||
|
restartGroup: (group: string) => Promise<void>;
|
||||||
|
/** Send shutdown and drop the entry without waiting (matches reloadConfig stale-group removal). */
|
||||||
|
evictGroup: (group: string) => void;
|
||||||
|
shutdownAll: () => Promise<void>;
|
||||||
|
sendCompute: (group: string, senseName: string) => void;
|
||||||
|
getWorkerPid: (group: string) => number | null;
|
||||||
|
hasWorkerForGroup: (group: string) => boolean;
|
||||||
|
activeGroupCount: () => number;
|
||||||
|
};
|
||||||
|
|
||||||
|
function spawnWorker(
|
||||||
|
nerveRoot: string,
|
||||||
|
group: string,
|
||||||
|
workerScript: string,
|
||||||
|
stderrTail: { value: string },
|
||||||
|
): ChildProcess {
|
||||||
|
const child = fork(workerScript, ["--group", group, "--root", nerveRoot], {
|
||||||
|
stdio: ["ignore", "inherit", "pipe", "ipc"],
|
||||||
|
});
|
||||||
|
teeCapturedStderr(child, stderrTail);
|
||||||
|
child.on("error", (err) => {
|
||||||
|
if ((err as NodeJS.ErrnoException).code !== "EPIPE") {
|
||||||
|
console.error("[worker] error:", err.message);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return child;
|
||||||
|
}
|
||||||
|
|
||||||
|
function sendComputeToProcess(worker: ChildProcess, senseName: string): void {
|
||||||
|
if (worker.connected === false) return;
|
||||||
|
const msg: ComputeMessage = { type: "compute", sense: senseName };
|
||||||
|
try {
|
||||||
|
worker.send(msg);
|
||||||
|
} catch {
|
||||||
|
// IPC channel closed between connected check and send
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function sendShutdownToProcess(worker: ChildProcess): void {
|
||||||
|
if (worker.connected === false) return;
|
||||||
|
const msg: ShutdownMessage = { type: "shutdown" };
|
||||||
|
try {
|
||||||
|
worker.send(msg);
|
||||||
|
} catch {
|
||||||
|
// IPC channel closed between connected check and send
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function waitForExit(child: ChildProcess, timeoutMs: number): Promise<void> {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
const timer = setTimeout(() => {
|
||||||
|
child.kill("SIGKILL");
|
||||||
|
resolve();
|
||||||
|
}, timeoutMs);
|
||||||
|
child.once("exit", () => {
|
||||||
|
clearTimeout(timer);
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createSenseWorkerPool(options: SenseWorkerPoolOptions): SenseWorkerPool {
|
||||||
|
const workers = new Map<string, WorkerEntry>();
|
||||||
|
|
||||||
|
function startWorker(group: string): Promise<void> {
|
||||||
|
const stderrTail = { value: "" };
|
||||||
|
const child = spawnWorker(options.nerveRoot, group, options.workerScript, stderrTail);
|
||||||
|
|
||||||
|
let workerReadyResolve: (() => void) | undefined;
|
||||||
|
const workerReady = new Promise<void>((resolve) => {
|
||||||
|
workerReadyResolve = resolve;
|
||||||
|
});
|
||||||
|
|
||||||
|
child.on("message", (raw: unknown) => {
|
||||||
|
const result = parseWorkerMessage(raw);
|
||||||
|
if (result.ok && result.value.type === "ready") {
|
||||||
|
workerReadyResolve?.();
|
||||||
|
}
|
||||||
|
options.onWorkerMessage(raw);
|
||||||
|
});
|
||||||
|
|
||||||
|
child.on("exit", (code, signal) => {
|
||||||
|
const summary = formatChildExitSummary(code, signal ?? null);
|
||||||
|
process.stderr.write(
|
||||||
|
`[kernel] worker for group "${group}" exited (${summary})${formatCapturedStderrTail(stderrTail.value)}\n`,
|
||||||
|
);
|
||||||
|
workerReadyResolve?.();
|
||||||
|
if (!options.isStopped() && code !== 0) {
|
||||||
|
process.stderr.write(`[kernel] respawning worker for group "${group}" in 1s\n`);
|
||||||
|
options.onWorkerCrashed(group);
|
||||||
|
setTimeout(() => {
|
||||||
|
if (!options.isStopped()) {
|
||||||
|
startWorker(group);
|
||||||
|
}
|
||||||
|
}, 1000);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
workers.set(group, { group, process: child });
|
||||||
|
return workerReady;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function restartGroup(group: string): Promise<void> {
|
||||||
|
const entry = workers.get(group);
|
||||||
|
if (entry === undefined) return;
|
||||||
|
|
||||||
|
options.onBeforeGroupRestart(group);
|
||||||
|
|
||||||
|
sendShutdownToProcess(entry.process);
|
||||||
|
await waitForExit(entry.process, 5000);
|
||||||
|
|
||||||
|
if (!options.isStopped()) {
|
||||||
|
await startWorker(group);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function evictGroup(group: string): void {
|
||||||
|
const entry = workers.get(group);
|
||||||
|
if (entry === undefined) return;
|
||||||
|
sendShutdownToProcess(entry.process);
|
||||||
|
workers.delete(group);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function shutdownAll(): Promise<void> {
|
||||||
|
const exitPromises: Promise<void>[] = [];
|
||||||
|
for (const entry of workers.values()) {
|
||||||
|
sendShutdownToProcess(entry.process);
|
||||||
|
exitPromises.push(waitForExit(entry.process, 5000));
|
||||||
|
}
|
||||||
|
await Promise.all(exitPromises);
|
||||||
|
}
|
||||||
|
|
||||||
|
function sendCompute(group: string, senseName: string): void {
|
||||||
|
const entry = workers.get(group);
|
||||||
|
if (entry === undefined) return;
|
||||||
|
sendComputeToProcess(entry.process, senseName);
|
||||||
|
}
|
||||||
|
|
||||||
|
function getWorkerPid(group: string): number | null {
|
||||||
|
return workers.get(group)?.process.pid ?? null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasWorkerForGroup(group: string): boolean {
|
||||||
|
return workers.has(group);
|
||||||
|
}
|
||||||
|
|
||||||
|
function activeGroupCount(): number {
|
||||||
|
return workers.size;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
startWorker,
|
||||||
|
restartGroup,
|
||||||
|
evictGroup,
|
||||||
|
shutdownAll,
|
||||||
|
sendCompute,
|
||||||
|
getWorkerPid,
|
||||||
|
hasWorkerForGroup,
|
||||||
|
activeGroupCount,
|
||||||
|
};
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user