Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f12f187d8d | |||
| 1512113a01 | |||
| 8f495cf92e | |||
| a68338c4e9 | |||
| 9802f68380 | |||
| f245224320 |
@@ -0,0 +1,410 @@
|
||||
# RFC-002: Workflow Engine
|
||||
|
||||
> Status: Draft
|
||||
> Author: 小橘 🍊(NEKO Team)
|
||||
> Date: 2026-04-22
|
||||
|
||||
## 1. 动机
|
||||
|
||||
RFC-001 定义了 Sense → Signal → Reflex 的无状态观测循环。当 Reflex 的 action 为 `StartWorkflow` 时,需要一个有状态的执行引擎来驱动多步骤工作流。本 RFC 定义 Workflow Engine 的完整设计。
|
||||
|
||||
## 2. 概念模型
|
||||
|
||||
```
|
||||
Signal 循环 ──→ ThreadStart ──→ Thread 循环
|
||||
(无状态,幂等,可合并) (有状态,顺序,Command Event 驱动)
|
||||
│
|
||||
Thread 产出 Log
|
||||
(执行日志,供 retrospection)
|
||||
```
|
||||
|
||||
### 2.1 核心概念
|
||||
|
||||
- **Workflow**:一个有状态工作流的定义,包含并发策略和溢出策略
|
||||
- **Thread**:Workflow 的一次执行实例,有唯一 `runId`
|
||||
- **Role**:执行具体动作的角色,有副作用(调 API、改文件等)
|
||||
- **Moderator**:在 Role 之间递话筒的调度逻辑(纯函数)
|
||||
- **CommandEvent**:Thread 内部的事件流,驱动 Role 之间的流转
|
||||
|
||||
Role 和 Moderator 是 Workflow 的内部细节,不跨 Workflow 共享,不作为顶层扩展点。
|
||||
|
||||
### 2.2 与 Sense 循环的边界
|
||||
|
||||
| | Signal 循环 | Thread 循环 |
|
||||
|---|---|---|
|
||||
| 状态 | 无状态 | 有状态(事件溯源) |
|
||||
| 幂等性 | 幂等、可合并、可丢弃 | 严格顺序、每个 event 必须响应 |
|
||||
| 触发 | Reflex | Moderator 递话筒 |
|
||||
| 产出 | Signal → Bus | Log → logs.db |
|
||||
|
||||
**关键约束**:Thread 产出的 Log 不能触发 Reflex(见 RFC-001 §2.4),只能被 Sense 的 compute 查询用于 retrospection。
|
||||
|
||||
## 3. 配置
|
||||
|
||||
```yaml
|
||||
# nerve.yaml
|
||||
workflows:
|
||||
cleanup:
|
||||
concurrency: 1 # 同时最多 1 个 thread
|
||||
overflow: drop # 已有活跃 thread 时丢弃新请求
|
||||
execute-task:
|
||||
concurrency: 10 # 可并行 10 个 thread
|
||||
overflow: queue # 超出时排队
|
||||
code-review:
|
||||
concurrency: 3
|
||||
overflow: queue
|
||||
maxQueue: 20 # 队列上限,超出丢弃最旧请求
|
||||
```
|
||||
|
||||
- **concurrency**:同时允许的最大活跃 Thread 数
|
||||
- **overflow**:
|
||||
- `drop`:丢弃,适用于幂等操作(如 cleanup)
|
||||
- `queue`:排队等待,适用于每次需要执行的操作
|
||||
- **maxQueue**(仅 `overflow: queue`):队列上限,默认 100,超出丢弃最旧请求
|
||||
|
||||
不需要 throttle——触发频率由上游 Sense 的 throttle 控制。
|
||||
|
||||
## 4. 用户代码结构
|
||||
|
||||
```
|
||||
~/.uncaged-nerve/
|
||||
workflows/
|
||||
cleanup/
|
||||
index.ts # workflow 定义
|
||||
code-review/
|
||||
index.ts
|
||||
```
|
||||
|
||||
### 4.1 Workflow 定义(用户代码)
|
||||
|
||||
```typescript
|
||||
// workflows/cleanup/index.ts
|
||||
import type { WorkflowDefinition } from "@uncaged/nerve-daemon";
|
||||
|
||||
const workflow: WorkflowDefinition = {
|
||||
roles: {
|
||||
scanner: {
|
||||
execute: async (prompt, ctx) => {
|
||||
// 扫描需要清理的资源
|
||||
const stale = await findStaleResources();
|
||||
return { type: "scan_complete", resources: stale };
|
||||
},
|
||||
},
|
||||
cleaner: {
|
||||
execute: async (prompt, ctx) => {
|
||||
// 执行清理
|
||||
await deleteResources(prompt.resources);
|
||||
return { type: "cleanup_done", count: prompt.resources.length };
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
moderate(thread, event) {
|
||||
// 纯函数:决定下一步交给谁
|
||||
if (event.type === "thread_start") {
|
||||
return { role: "scanner", prompt: {} };
|
||||
}
|
||||
if (event.type === "scan_complete") {
|
||||
if (event.resources.length === 0) return null; // 结束
|
||||
return { role: "cleaner", prompt: { resources: event.resources } };
|
||||
}
|
||||
return null; // 结束
|
||||
},
|
||||
};
|
||||
|
||||
export default workflow;
|
||||
```
|
||||
|
||||
### 4.2 类型定义
|
||||
|
||||
```typescript
|
||||
// 用户需要实现的接口
|
||||
export type RoleExecuteFn = (
|
||||
prompt: unknown,
|
||||
ctx: WorkflowContext,
|
||||
) => Promise<CommandEvent>;
|
||||
|
||||
export type Role = {
|
||||
execute: RoleExecuteFn;
|
||||
};
|
||||
|
||||
export type ModerateFn = (
|
||||
thread: ThreadState,
|
||||
event: CommandEvent,
|
||||
) => ModerateResult | null; // null = thread 完成
|
||||
|
||||
export type ModerateResult = {
|
||||
role: string;
|
||||
prompt: unknown;
|
||||
};
|
||||
|
||||
export type WorkflowDefinition = {
|
||||
roles: Record<string, Role>;
|
||||
moderate: ModerateFn;
|
||||
};
|
||||
|
||||
export type WorkflowContext = {
|
||||
runId: string;
|
||||
workflowName: string;
|
||||
log: (message: string) => void;
|
||||
};
|
||||
|
||||
export type CommandEvent = {
|
||||
type: string;
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
export type ThreadState = {
|
||||
runId: string;
|
||||
events: CommandEvent[]; // 历史事件,供 moderate 做决策
|
||||
};
|
||||
```
|
||||
|
||||
## 5. 运行时架构
|
||||
|
||||
### 5.1 进程模型
|
||||
|
||||
同一个 workflow 的所有 thread 共享一个 worker 进程。`concurrency` 控制进程内的并发 async task 数。
|
||||
|
||||
```
|
||||
nerve-engine (kernel)
|
||||
├─ nerve-worker sense --group system (永驻)
|
||||
├─ nerve-worker sense --group tasks (永驻)
|
||||
├─ nerve-worker workflow --name cleanup (按需启动)
|
||||
└─ nerve-worker workflow --name code-review (按需启动)
|
||||
```
|
||||
|
||||
- 进程数 = workflow 种类数,不会膨胀
|
||||
- 有活跃 thread 时启动,所有 thread 完成后退出(或保持待命 `idleTimeout`,默认 30s)
|
||||
- 崩溃后 engine respawn worker,从持久化状态恢复 thread
|
||||
|
||||
### 5.2 IPC 扩展
|
||||
|
||||
在现有 IPC 协议基础上扩展 workflow 相关消息:
|
||||
|
||||
```typescript
|
||||
// Parent → Workflow Worker
|
||||
type StartThreadMessage = {
|
||||
type: "start-thread";
|
||||
runId: string;
|
||||
workflow: string;
|
||||
triggerPayload: unknown; // Reflex 传入的 signal payload
|
||||
};
|
||||
|
||||
type ResumeThreadMessage = {
|
||||
type: "resume-thread";
|
||||
runId: string;
|
||||
};
|
||||
|
||||
type ShutdownMessage = {
|
||||
type: "shutdown";
|
||||
};
|
||||
|
||||
// Workflow Worker → Parent
|
||||
type ThreadEventMessage = {
|
||||
type: "thread-event";
|
||||
runId: string;
|
||||
eventType: string; // queued, started, step_complete, completed, failed
|
||||
payload: unknown;
|
||||
};
|
||||
|
||||
type WorkflowReadyMessage = {
|
||||
type: "ready";
|
||||
};
|
||||
|
||||
type WorkflowErrorMessage = {
|
||||
type: "error";
|
||||
runId: string;
|
||||
error: string;
|
||||
};
|
||||
```
|
||||
|
||||
### 5.3 Thread 生命周期
|
||||
|
||||
```
|
||||
┌──────────────────────────────────────────┐
|
||||
│ │
|
||||
trigger ──→ queued ──→ started ──→ step_complete ──→ completed
|
||||
│ ↑ │
|
||||
│ └─────┘
|
||||
│
|
||||
└──→ failed
|
||||
└──→ crashed (worker 崩溃)
|
||||
```
|
||||
|
||||
1. Reflex `StartWorkflow` 触发 → kernel 检查 concurrency
|
||||
2. 有空位 → 状态 `started`,发 `start-thread` 给 worker
|
||||
3. 超出 concurrency:
|
||||
- `overflow: drop` → 丢弃,写 `dropped` log
|
||||
- `overflow: queue` → 状态 `queued`,等空位
|
||||
4. Worker 内部循环:`moderate → execute → moderate → execute → ...`
|
||||
5. `moderate` 返回 `null` → `completed`
|
||||
6. Role `execute` 抛异常 → `failed`
|
||||
7. Worker 进程崩溃 → 所有活跃 thread 标记 `crashed`
|
||||
|
||||
### 5.4 Workflow Worker 内部实现
|
||||
|
||||
```typescript
|
||||
// workflow-worker.ts(engine 代码,不是用户代码)
|
||||
async function runThread(
|
||||
def: WorkflowDefinition,
|
||||
runId: string,
|
||||
triggerPayload: unknown,
|
||||
sendToParent: (msg: WorkerToParentMessage) => void,
|
||||
): Promise<void> {
|
||||
const state: ThreadState = { runId, events: [] };
|
||||
const ctx: WorkflowContext = {
|
||||
runId,
|
||||
workflowName,
|
||||
log: (msg) => sendToParent({
|
||||
type: "thread-event", runId, eventType: "step_complete", payload: { message: msg },
|
||||
}),
|
||||
};
|
||||
|
||||
// 初始事件
|
||||
let event: CommandEvent = { type: "thread_start", ...triggerPayload };
|
||||
|
||||
while (true) {
|
||||
state.events.push(event);
|
||||
const next = def.moderate(state, event);
|
||||
if (next === null) {
|
||||
sendToParent({ type: "thread-event", runId, eventType: "completed", payload: null });
|
||||
return;
|
||||
}
|
||||
const role = def.roles[next.role];
|
||||
if (!role) {
|
||||
sendToParent({ type: "error", runId, error: `Unknown role: ${next.role}` });
|
||||
return;
|
||||
}
|
||||
event = await role.execute(next.prompt, ctx);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## 6. 持久化
|
||||
|
||||
### 6.1 事件溯源
|
||||
|
||||
Thread 状态以 append-only 事件流为 source of truth,写入统一的 `logs.db`:
|
||||
|
||||
```sql
|
||||
-- logs 表(已存在于 RFC-001),source = "workflow"
|
||||
-- type 取值:queued, started, step_complete, completed, failed, crashed, dropped
|
||||
```
|
||||
|
||||
### 6.2 物化表
|
||||
|
||||
为避免每次查活跃 workflow 都扫描全表,维护一张物化表,在写 log 的同一事务中 UPSERT:
|
||||
|
||||
```sql
|
||||
CREATE TABLE workflow_runs (
|
||||
run_id TEXT PRIMARY KEY,
|
||||
workflow TEXT NOT NULL,
|
||||
status TEXT NOT NULL, -- queued, started, completed, failed, crashed
|
||||
ts INTEGER NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX idx_workflow_runs_status ON workflow_runs(status);
|
||||
CREATE INDEX idx_workflow_runs_workflow ON workflow_runs(workflow);
|
||||
```
|
||||
|
||||
写入流程(同一事务):
|
||||
|
||||
```sql
|
||||
BEGIN;
|
||||
INSERT INTO logs (source, type, ref_id, payload, ts)
|
||||
VALUES ('workflow', 'started', 'run-7', '{}', 1001);
|
||||
INSERT OR REPLACE INTO workflow_runs (run_id, workflow, status, ts)
|
||||
VALUES ('run-7', 'cleanup', 'started', 1001);
|
||||
COMMIT;
|
||||
```
|
||||
|
||||
物化表是 logs 的派生数据——数据丢失时可从 logs 重建。
|
||||
|
||||
### 6.3 崩溃恢复
|
||||
|
||||
Worker 重启时:
|
||||
|
||||
1. 查 `workflow_runs WHERE status IN ('queued', 'started')`
|
||||
2. `queued` → 重新排队
|
||||
3. `started` → 从 logs 重建 `ThreadState`(事件列表),resume
|
||||
|
||||
幂等 workflow(`overflow: drop`)直接标记 `crashed` 重新来。
|
||||
|
||||
## 7. Kernel 扩展
|
||||
|
||||
### 7.1 WorkflowManager
|
||||
|
||||
Kernel 新增 `WorkflowManager` 组件:
|
||||
|
||||
```typescript
|
||||
export type WorkflowManager = {
|
||||
/** 触发一个 workflow(来自 Reflex) */
|
||||
startWorkflow: (workflowName: string, payload: unknown) => void;
|
||||
/** 获取活跃 thread 数 */
|
||||
activeCount: (workflowName: string) => number;
|
||||
/** 获取队列长度 */
|
||||
queueLength: (workflowName: string) => number;
|
||||
/** 停止所有 workflow */
|
||||
stop: () => Promise<void>;
|
||||
};
|
||||
```
|
||||
|
||||
### 7.2 Reflex Scheduler 扩展
|
||||
|
||||
当前 `ReflexScheduler` 只处理 `kind: "sense"` 的 reflex。扩展为同时处理 `kind: "workflow"`:
|
||||
|
||||
```typescript
|
||||
// reflex-scheduler.ts 扩展
|
||||
if (reflex.kind === "workflow") {
|
||||
const workflowName = reflex.workflow;
|
||||
if (reflex.on !== null && reflex.on.length > 0) {
|
||||
const watchedSenses = new Set(reflex.on);
|
||||
const unsub = bus.subscribe((signal) => {
|
||||
if (watchedSenses.has(signal.senseId)) {
|
||||
workflowManager.startWorkflow(workflowName, signal.payload);
|
||||
}
|
||||
});
|
||||
unsubscribers.push(unsub);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 7.3 热更新
|
||||
|
||||
| 变化 | 处理 |
|
||||
|------|------|
|
||||
| workflow ts 文件修改 | drain(等活跃 thread 完成,`drainTimeout` 后 force kill + 标记 crashed)→ respawn |
|
||||
| nerve.yaml workflow 配置修改 | 更新 concurrency/overflow,不重启 worker |
|
||||
| 新增 workflow | 下次触发时按需启动 worker |
|
||||
| 删除 workflow | drain + 移除 |
|
||||
|
||||
## 8. 实现计划
|
||||
|
||||
### Phase 1:核心类型与 WorkflowManager 骨架
|
||||
|
||||
- [ ] `packages/core/src/types.ts` — 扩展 workflow 相关类型(`WorkflowDefinition`、`ThreadState`、`CommandEvent`、`ModerateResult` 等)
|
||||
- [ ] `packages/daemon/src/ipc.ts` — 扩展 IPC 消息类型(`StartThreadMessage`、`ThreadEventMessage` 等)
|
||||
- [ ] `packages/daemon/src/workflow-manager.ts` — WorkflowManager 实现(并发控制、队列管理、thread 生命周期)
|
||||
- [ ] `packages/daemon/src/workflow-worker.ts` — Workflow worker 进程(加载用户代码、运行 moderate/execute 循环)
|
||||
- [ ] `packages/daemon/src/log-store.ts` — 扩展 LogStore,添加 `workflow_runs` 物化表 + 查询方法
|
||||
- [ ] 单元测试覆盖:并发控制、队列溢出、thread 生命周期
|
||||
|
||||
### Phase 2:Kernel 集成
|
||||
|
||||
- [ ] `packages/daemon/src/kernel.ts` — 集成 WorkflowManager,处理 workflow worker 的生命周期
|
||||
- [ ] `packages/daemon/src/reflex-scheduler.ts` — 扩展支持 `kind: "workflow"` 的 reflex
|
||||
- [ ] 集成测试:Sense signal → Reflex → Workflow 全链路
|
||||
|
||||
### Phase 3:崩溃恢复与热更新
|
||||
|
||||
- [ ] Worker crash recovery — 从持久化状态恢复 thread
|
||||
- [ ] 热更新 — workflow 代码变更时 drain + respawn
|
||||
- [ ] nerve.yaml workflow 配置变更的增量更新
|
||||
|
||||
### Phase 4:CLI 与用户体验
|
||||
|
||||
- [ ] `nerve workflow list` — 查看活跃 workflow
|
||||
- [ ] `nerve workflow inspect <runId>` — 查看 thread 详情
|
||||
- [ ] `nerve workflow trigger <name>` — 手动触发
|
||||
- [ ] scaffold 模板 — `nerve init workflow <name>`
|
||||
@@ -8,6 +8,14 @@ export type {
|
||||
QueueOverflowConfig,
|
||||
WorkflowConfig,
|
||||
NerveConfig,
|
||||
CommandEvent,
|
||||
ThreadState,
|
||||
ModerateResult,
|
||||
WorkflowContext,
|
||||
RoleExecuteFn,
|
||||
Role,
|
||||
ModerateFn,
|
||||
WorkflowDefinition,
|
||||
} from "./types.js";
|
||||
export type { Result } from "./result.js";
|
||||
export { ok, err } from "./result.js";
|
||||
|
||||
@@ -45,3 +45,58 @@ export type NerveConfig = {
|
||||
reflexes: ReflexConfig[];
|
||||
workflows: Record<string, WorkflowConfig> | null;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Workflow Engine types (RFC-002)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** A single event in the command event stream that drives a workflow thread. */
|
||||
export type CommandEvent = {
|
||||
type: string;
|
||||
[key: string]: unknown;
|
||||
};
|
||||
|
||||
/** Accumulated state of a running thread — the event history for moderate(). */
|
||||
export type ThreadState = {
|
||||
runId: string;
|
||||
/** All events so far, including the initial thread_start event. */
|
||||
events: CommandEvent[];
|
||||
};
|
||||
|
||||
/** The result of moderate() — which role to hand to next, and what prompt to pass. */
|
||||
export type ModerateResult = {
|
||||
role: string;
|
||||
prompt: unknown;
|
||||
};
|
||||
|
||||
/** Context injected into every role execute() call. */
|
||||
export type WorkflowContext = {
|
||||
runId: string;
|
||||
workflowName: string;
|
||||
/** Emit a log message back to the parent process. */
|
||||
log: (message: string) => void;
|
||||
};
|
||||
|
||||
/**
|
||||
* A role's execute function. Has side effects (API calls, file I/O, etc.).
|
||||
* Returns a CommandEvent that is fed back into moderate().
|
||||
*/
|
||||
export type RoleExecuteFn = (prompt: unknown, ctx: WorkflowContext) => Promise<CommandEvent>;
|
||||
|
||||
/** A role in a workflow — a named unit of execution with side effects. */
|
||||
export type Role = {
|
||||
execute: RoleExecuteFn;
|
||||
};
|
||||
|
||||
/**
|
||||
* The moderator function — pure, no side effects.
|
||||
* Decides which role to pass control to next.
|
||||
* Returns null to signal thread completion.
|
||||
*/
|
||||
export type ModerateFn = (thread: ThreadState, event: CommandEvent) => ModerateResult | null;
|
||||
|
||||
/** The complete definition of a workflow, as authored by users. */
|
||||
export type WorkflowDefinition = {
|
||||
roles: Record<string, Role>;
|
||||
moderate: ModerateFn;
|
||||
};
|
||||
|
||||
@@ -0,0 +1,418 @@
|
||||
/**
|
||||
* Integration tests for Kernel + WorkflowManager integration.
|
||||
*
|
||||
* Verifies that sense signals trigger workflow runs via workflow reflexes,
|
||||
* that workflow events are logged, that reloadConfig handles workflow changes,
|
||||
* and that graceful shutdown stops workflow workers.
|
||||
*
|
||||
* Uses mocked child_process.fork to avoid real subprocesses.
|
||||
*/
|
||||
|
||||
import { EventEmitter } from "node:events";
|
||||
|
||||
import type { NerveConfig } from "@uncaged/nerve-core";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock child_process.fork before importing kernel
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type MockChild = EventEmitter & {
|
||||
send: ReturnType<typeof vi.fn>;
|
||||
kill: ReturnType<typeof vi.fn>;
|
||||
connected: boolean;
|
||||
exitCode: number | null;
|
||||
pid: number;
|
||||
};
|
||||
|
||||
const mockChildren: MockChild[] = [];
|
||||
|
||||
function makeMockChild(pid = 1): MockChild {
|
||||
const child = new EventEmitter() as MockChild;
|
||||
child.connected = true;
|
||||
child.exitCode = null;
|
||||
child.pid = pid;
|
||||
child.send = vi.fn((msg: unknown) => {
|
||||
if (
|
||||
msg !== null &&
|
||||
typeof msg === "object" &&
|
||||
(msg as Record<string, unknown>).type === "shutdown"
|
||||
) {
|
||||
setImmediate(() => {
|
||||
child.exitCode = 0;
|
||||
child.connected = false;
|
||||
child.emit("exit", 0, null);
|
||||
});
|
||||
}
|
||||
});
|
||||
child.kill = vi.fn((_signal?: string) => {
|
||||
child.exitCode = 1;
|
||||
child.connected = false;
|
||||
child.emit("exit", null, _signal ?? "SIGKILL");
|
||||
});
|
||||
return child;
|
||||
}
|
||||
|
||||
vi.mock("node:child_process", () => ({
|
||||
fork: vi.fn((_script: string, _args: string[], _opts: unknown) => {
|
||||
const child = makeMockChild(mockChildren.length + 1);
|
||||
mockChildren.push(child);
|
||||
return child;
|
||||
}),
|
||||
}));
|
||||
|
||||
// Import after mock is set up
|
||||
const { createKernel } = await import("../kernel.js");
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeLogStore() {
|
||||
return {
|
||||
append: vi.fn(),
|
||||
query: vi.fn(() => []),
|
||||
getMeta: vi.fn(() => null),
|
||||
setMeta: vi.fn(),
|
||||
upsertWorkflowRun: vi.fn(),
|
||||
appendWithWorkflowUpdate: vi.fn(),
|
||||
getWorkflowRun: vi.fn(() => null),
|
||||
getActiveWorkflowRuns: vi.fn(() => []),
|
||||
close: vi.fn(),
|
||||
};
|
||||
}
|
||||
|
||||
function makeConfig(overrides: Partial<NerveConfig> = {}): NerveConfig {
|
||||
return {
|
||||
senses: {
|
||||
"cpu-usage": { group: "system", throttle: null, timeout: null, gracePeriod: null },
|
||||
},
|
||||
reflexes: [],
|
||||
workflows: null,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("kernel + workflowManager integration", () => {
|
||||
beforeEach(() => {
|
||||
mockChildren.length = 0;
|
||||
vi.useFakeTimers({ shouldAdvanceTime: true });
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
vi.useRealTimers();
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("sense signal triggers workflow via reflex", () => {
|
||||
it("calls workflowManager.startWorkflow when a sense signal fires on a workflow reflex", async () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
senses: {
|
||||
"cpu-usage": { group: "system", throttle: null, timeout: null, gracePeriod: null },
|
||||
},
|
||||
reflexes: [{ kind: "workflow", workflow: "my-workflow", on: ["cpu-usage"] }],
|
||||
workflows: { "my-workflow": { concurrency: 2, overflow: "drop" } },
|
||||
});
|
||||
|
||||
const kernel = createKernel(config, "/tmp/nerve-test", {
|
||||
workerScript: "fake-worker.js",
|
||||
logStore,
|
||||
});
|
||||
|
||||
// Emit a signal from "cpu-usage" on the bus
|
||||
const { createSignalBus } = await import("../signal-bus.js");
|
||||
void createSignalBus; // ensure import resolves
|
||||
kernel.bus.emit({ id: 1, senseId: "cpu-usage", payload: { value: 80 }, ts: Date.now() });
|
||||
|
||||
// The workflow worker should be spawned (one for the sense group, one for workflow)
|
||||
// The sense group worker is mockChildren[0]; the workflow worker is mockChildren[1]
|
||||
// We need to check that a start-thread message was sent to the workflow worker
|
||||
const workflowWorker = mockChildren.find((c) =>
|
||||
(c.send as ReturnType<typeof vi.fn>).mock.calls.some(
|
||||
([msg]: [unknown]) =>
|
||||
msg !== null &&
|
||||
typeof msg === "object" &&
|
||||
(msg as Record<string, unknown>).type === "start-thread",
|
||||
),
|
||||
);
|
||||
expect(workflowWorker).toBeDefined();
|
||||
|
||||
const stopPromise = kernel.stop();
|
||||
await vi.runAllTimersAsync();
|
||||
await stopPromise;
|
||||
});
|
||||
|
||||
it("passes the signal payload as triggerPayload to the workflow", async () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
senses: {
|
||||
"cpu-usage": { group: "system", throttle: null, timeout: null, gracePeriod: null },
|
||||
},
|
||||
reflexes: [{ kind: "workflow", workflow: "alert-workflow", on: ["cpu-usage"] }],
|
||||
workflows: { "alert-workflow": { concurrency: 1, overflow: "drop" } },
|
||||
});
|
||||
|
||||
const kernel = createKernel(config, "/tmp/nerve-test", {
|
||||
workerScript: "fake-worker.js",
|
||||
logStore,
|
||||
});
|
||||
|
||||
const payload = { level: "critical", value: 99 };
|
||||
kernel.bus.emit({ id: 1, senseId: "cpu-usage", payload, ts: Date.now() });
|
||||
|
||||
// Find the start-thread call and verify triggerPayload
|
||||
const startThreadCall = mockChildren
|
||||
.flatMap((c) => (c.send as ReturnType<typeof vi.fn>).mock.calls as [unknown][])
|
||||
.find(
|
||||
([msg]) =>
|
||||
msg !== null &&
|
||||
typeof msg === "object" &&
|
||||
(msg as Record<string, unknown>).type === "start-thread",
|
||||
);
|
||||
|
||||
expect(startThreadCall).toBeDefined();
|
||||
expect(startThreadCall?.[0]).toMatchObject({
|
||||
type: "start-thread",
|
||||
workflow: "alert-workflow",
|
||||
triggerPayload: payload,
|
||||
});
|
||||
|
||||
const stopPromise = kernel.stop();
|
||||
await vi.runAllTimersAsync();
|
||||
await stopPromise;
|
||||
});
|
||||
|
||||
it("does not trigger workflow when signal senseId is not in 'on' list", async () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
senses: {
|
||||
"cpu-usage": { group: "system", throttle: null, timeout: null, gracePeriod: null },
|
||||
"disk-io": { group: "system", throttle: null, timeout: null, gracePeriod: null },
|
||||
},
|
||||
reflexes: [{ kind: "workflow", workflow: "my-workflow", on: ["disk-io"] }],
|
||||
workflows: { "my-workflow": { concurrency: 1, overflow: "drop" } },
|
||||
});
|
||||
|
||||
const kernel = createKernel(config, "/tmp/nerve-test", {
|
||||
workerScript: "fake-worker.js",
|
||||
logStore,
|
||||
});
|
||||
|
||||
// Emit signal from cpu-usage — NOT in the workflow's "on" list
|
||||
kernel.bus.emit({ id: 1, senseId: "cpu-usage", payload: 50, ts: Date.now() });
|
||||
|
||||
// No workflow worker should have been spawned (only the sense group worker)
|
||||
const workflowWorkerSpawned = mockChildren.some((c) =>
|
||||
(c.send as ReturnType<typeof vi.fn>).mock.calls.some(
|
||||
([msg]: [unknown]) =>
|
||||
msg !== null &&
|
||||
typeof msg === "object" &&
|
||||
(msg as Record<string, unknown>).type === "start-thread",
|
||||
),
|
||||
);
|
||||
expect(workflowWorkerSpawned).toBe(false);
|
||||
|
||||
const stopPromise = kernel.stop();
|
||||
await vi.runAllTimersAsync();
|
||||
await stopPromise;
|
||||
});
|
||||
});
|
||||
|
||||
describe("workflow events are logged", () => {
|
||||
it("logs a 'started' event when workflow thread is triggered", async () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
senses: {
|
||||
"cpu-usage": { group: "system", throttle: null, timeout: null, gracePeriod: null },
|
||||
},
|
||||
reflexes: [{ kind: "workflow", workflow: "log-test-workflow", on: ["cpu-usage"] }],
|
||||
workflows: { "log-test-workflow": { concurrency: 2, overflow: "drop" } },
|
||||
});
|
||||
|
||||
const kernel = createKernel(config, "/tmp/nerve-test", {
|
||||
workerScript: "fake-worker.js",
|
||||
logStore,
|
||||
});
|
||||
|
||||
kernel.bus.emit({ id: 1, senseId: "cpu-usage", payload: null, ts: Date.now() });
|
||||
|
||||
expect(logStore.upsertWorkflowRun).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ source: "workflow", type: "started" }),
|
||||
expect.objectContaining({ workflow: "log-test-workflow", status: "started" }),
|
||||
);
|
||||
|
||||
const stopPromise = kernel.stop();
|
||||
await vi.runAllTimersAsync();
|
||||
await stopPromise;
|
||||
});
|
||||
});
|
||||
|
||||
describe("reloadConfig handles workflow changes", () => {
|
||||
it("new workflow reflexes are active after reloadConfig", async () => {
|
||||
const logStore = makeLogStore();
|
||||
const initialConfig = makeConfig({
|
||||
senses: {
|
||||
"cpu-usage": { group: "system", throttle: null, timeout: null, gracePeriod: null },
|
||||
},
|
||||
reflexes: [],
|
||||
workflows: null,
|
||||
});
|
||||
|
||||
const kernel = createKernel(initialConfig, "/tmp/nerve-test", {
|
||||
workerScript: "fake-worker.js",
|
||||
logStore,
|
||||
});
|
||||
|
||||
// Reload with a workflow reflex added
|
||||
const newConfig: NerveConfig = {
|
||||
senses: {
|
||||
"cpu-usage": { group: "system", throttle: null, timeout: null, gracePeriod: null },
|
||||
},
|
||||
reflexes: [{ kind: "workflow", workflow: "new-workflow", on: ["cpu-usage"] }],
|
||||
workflows: { "new-workflow": { concurrency: 1, overflow: "drop" } },
|
||||
};
|
||||
kernel.reloadConfig(newConfig);
|
||||
|
||||
// Now emit a signal — should trigger the new workflow
|
||||
kernel.bus.emit({ id: 2, senseId: "cpu-usage", payload: "reload-test", ts: Date.now() });
|
||||
|
||||
const startThreadCall = mockChildren
|
||||
.flatMap((c) => (c.send as ReturnType<typeof vi.fn>).mock.calls as [unknown][])
|
||||
.find(
|
||||
([msg]) =>
|
||||
msg !== null &&
|
||||
typeof msg === "object" &&
|
||||
(msg as Record<string, unknown>).type === "start-thread" &&
|
||||
(msg as Record<string, unknown>).workflow === "new-workflow",
|
||||
);
|
||||
|
||||
expect(startThreadCall).toBeDefined();
|
||||
|
||||
const stopPromise = kernel.stop();
|
||||
await vi.runAllTimersAsync();
|
||||
await stopPromise;
|
||||
});
|
||||
|
||||
it("old workflow reflexes are removed after reloadConfig", async () => {
|
||||
const logStore = makeLogStore();
|
||||
const initialConfig = makeConfig({
|
||||
senses: {
|
||||
"cpu-usage": { group: "system", throttle: null, timeout: null, gracePeriod: null },
|
||||
},
|
||||
reflexes: [{ kind: "workflow", workflow: "old-workflow", on: ["cpu-usage"] }],
|
||||
workflows: { "old-workflow": { concurrency: 1, overflow: "drop" } },
|
||||
});
|
||||
|
||||
const kernel = createKernel(initialConfig, "/tmp/nerve-test", {
|
||||
workerScript: "fake-worker.js",
|
||||
logStore,
|
||||
});
|
||||
|
||||
// Reload with the workflow reflex removed
|
||||
const newConfig: NerveConfig = {
|
||||
senses: {
|
||||
"cpu-usage": { group: "system", throttle: null, timeout: null, gracePeriod: null },
|
||||
},
|
||||
reflexes: [],
|
||||
workflows: null,
|
||||
};
|
||||
kernel.reloadConfig(newConfig);
|
||||
|
||||
// Clear send history
|
||||
for (const c of mockChildren) {
|
||||
(c.send as ReturnType<typeof vi.fn>).mockClear();
|
||||
}
|
||||
|
||||
// Emit a signal — old-workflow should NOT be triggered
|
||||
kernel.bus.emit({ id: 3, senseId: "cpu-usage", payload: "after-reload", ts: Date.now() });
|
||||
|
||||
const startThreadCall = mockChildren
|
||||
.flatMap((c) => (c.send as ReturnType<typeof vi.fn>).mock.calls as [unknown][])
|
||||
.find(
|
||||
([msg]) =>
|
||||
msg !== null &&
|
||||
typeof msg === "object" &&
|
||||
(msg as Record<string, unknown>).type === "start-thread",
|
||||
);
|
||||
|
||||
expect(startThreadCall).toBeUndefined();
|
||||
|
||||
const stopPromise = kernel.stop();
|
||||
await vi.runAllTimersAsync();
|
||||
await stopPromise;
|
||||
});
|
||||
});
|
||||
|
||||
describe("graceful shutdown stops workflow workers", () => {
|
||||
it("stop() resolves after workflow workers exit", async () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
senses: {
|
||||
"cpu-usage": { group: "system", throttle: null, timeout: null, gracePeriod: null },
|
||||
},
|
||||
reflexes: [{ kind: "workflow", workflow: "shutdown-test", on: ["cpu-usage"] }],
|
||||
workflows: { "shutdown-test": { concurrency: 1, overflow: "drop" } },
|
||||
});
|
||||
|
||||
const kernel = createKernel(config, "/tmp/nerve-test", {
|
||||
workerScript: "fake-worker.js",
|
||||
logStore,
|
||||
});
|
||||
|
||||
// Trigger a workflow so a worker is spawned
|
||||
kernel.bus.emit({ id: 1, senseId: "cpu-usage", payload: null, ts: Date.now() });
|
||||
|
||||
const stopPromise = kernel.stop();
|
||||
await vi.runAllTimersAsync();
|
||||
await expect(stopPromise).resolves.toBeUndefined();
|
||||
});
|
||||
|
||||
it("workflowManager is exposed on kernel", () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
workflows: { "my-wf": { concurrency: 1, overflow: "drop" } },
|
||||
});
|
||||
|
||||
const kernel = createKernel(config, "/tmp/nerve-test", {
|
||||
workerScript: "fake-worker.js",
|
||||
logStore,
|
||||
});
|
||||
|
||||
expect(kernel.workflowManager).toBeDefined();
|
||||
expect(typeof kernel.workflowManager.startWorkflow).toBe("function");
|
||||
expect(typeof kernel.workflowManager.activeCount).toBe("function");
|
||||
expect(typeof kernel.workflowManager.stop).toBe("function");
|
||||
|
||||
kernel.stop().catch(() => {});
|
||||
});
|
||||
|
||||
it("getHealth includes activeWorkflows count", async () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
senses: {
|
||||
"cpu-usage": { group: "system", throttle: null, timeout: null, gracePeriod: null },
|
||||
},
|
||||
reflexes: [{ kind: "workflow", workflow: "health-wf", on: ["cpu-usage"] }],
|
||||
workflows: { "health-wf": { concurrency: 2, overflow: "drop" } },
|
||||
});
|
||||
|
||||
const kernel = createKernel(config, "/tmp/nerve-test", {
|
||||
workerScript: "fake-worker.js",
|
||||
logStore,
|
||||
});
|
||||
|
||||
const health = kernel.getHealth();
|
||||
expect(health).toHaveProperty("activeWorkflows");
|
||||
expect(typeof health.activeWorkflows).toBe("number");
|
||||
|
||||
const stopPromise = kernel.stop();
|
||||
await vi.runAllTimersAsync();
|
||||
await stopPromise;
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,186 @@
|
||||
/**
|
||||
* Tests for workflow-related LogStore functionality (RFC-002 §6.2).
|
||||
*/
|
||||
|
||||
import { mkdtempSync, rmSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { join } from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
|
||||
import { createLogStore } from "../log-store.js";
|
||||
import type { LogStore, WorkflowRun } from "../log-store.js";
|
||||
|
||||
describe("LogStore — workflow_runs", () => {
|
||||
let tmpDir: string;
|
||||
let store: LogStore;
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = mkdtempSync(join(tmpdir(), "nerve-wf-log-test-"));
|
||||
store = createLogStore(join(tmpDir, "data", "logs.db"));
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
store.close();
|
||||
rmSync(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe("upsertWorkflowRun", () => {
|
||||
it("writes a log entry and creates a workflow_runs row atomically", () => {
|
||||
const run: WorkflowRun = {
|
||||
runId: "run-1",
|
||||
workflow: "cleanup",
|
||||
status: "started",
|
||||
ts: 1000,
|
||||
};
|
||||
|
||||
const entry = store.upsertWorkflowRun(
|
||||
{ source: "workflow", type: "started", refId: "run-1", payload: null, ts: 1000 },
|
||||
run,
|
||||
);
|
||||
|
||||
expect(entry.id).toBeGreaterThan(0);
|
||||
expect(entry.source).toBe("workflow");
|
||||
expect(entry.type).toBe("started");
|
||||
|
||||
const stored = store.getWorkflowRun("run-1");
|
||||
expect(stored).not.toBeNull();
|
||||
expect(stored?.runId).toBe("run-1");
|
||||
expect(stored?.workflow).toBe("cleanup");
|
||||
expect(stored?.status).toBe("started");
|
||||
expect(stored?.ts).toBe(1000);
|
||||
});
|
||||
|
||||
it("updates existing workflow_runs row on upsert (status transition)", () => {
|
||||
store.upsertWorkflowRun(
|
||||
{ source: "workflow", type: "started", refId: "run-2", payload: null, ts: 1000 },
|
||||
{ runId: "run-2", workflow: "cleanup", status: "started", ts: 1000 },
|
||||
);
|
||||
|
||||
store.upsertWorkflowRun(
|
||||
{ source: "workflow", type: "completed", refId: "run-2", payload: null, ts: 2000 },
|
||||
{ runId: "run-2", workflow: "cleanup", status: "completed", ts: 2000 },
|
||||
);
|
||||
|
||||
const stored = store.getWorkflowRun("run-2");
|
||||
expect(stored?.status).toBe("completed");
|
||||
expect(stored?.ts).toBe(2000);
|
||||
|
||||
// Both log entries should be present (event sourcing)
|
||||
const logs = store.query({ refId: "run-2" });
|
||||
expect(logs).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("the log entries act as source of truth for event history", () => {
|
||||
for (const [type, status, ts] of [
|
||||
["queued", "queued", 1000],
|
||||
["started", "started", 1001],
|
||||
["step_complete", "started", 1002],
|
||||
["completed", "completed", 1005],
|
||||
] as const) {
|
||||
store.upsertWorkflowRun(
|
||||
{ source: "workflow", type, refId: "run-3", payload: null, ts },
|
||||
{ runId: "run-3", workflow: "cleanup", status, ts },
|
||||
);
|
||||
}
|
||||
|
||||
const logs = store.query({ source: "workflow", refId: "run-3" });
|
||||
expect(logs).toHaveLength(4);
|
||||
expect(logs[0].type).toBe("queued");
|
||||
expect(logs[3].type).toBe("completed");
|
||||
});
|
||||
});
|
||||
|
||||
describe("getWorkflowRun", () => {
|
||||
it("returns null for a non-existent runId", () => {
|
||||
expect(store.getWorkflowRun("no-such-run")).toBeNull();
|
||||
});
|
||||
|
||||
it("returns the latest state after multiple upserts", () => {
|
||||
store.upsertWorkflowRun(
|
||||
{ source: "workflow", type: "queued", refId: "run-4", payload: null, ts: 100 },
|
||||
{ runId: "run-4", workflow: "code-review", status: "queued", ts: 100 },
|
||||
);
|
||||
store.upsertWorkflowRun(
|
||||
{ source: "workflow", type: "started", refId: "run-4", payload: null, ts: 200 },
|
||||
{ runId: "run-4", workflow: "code-review", status: "started", ts: 200 },
|
||||
);
|
||||
|
||||
const run = store.getWorkflowRun("run-4");
|
||||
expect(run?.status).toBe("started");
|
||||
expect(run?.ts).toBe(200);
|
||||
});
|
||||
});
|
||||
|
||||
describe("getActiveWorkflowRuns", () => {
|
||||
beforeEach(() => {
|
||||
store.upsertWorkflowRun(
|
||||
{ source: "workflow", type: "queued", refId: "r1", payload: null, ts: 100 },
|
||||
{ runId: "r1", workflow: "cleanup", status: "queued", ts: 100 },
|
||||
);
|
||||
store.upsertWorkflowRun(
|
||||
{ source: "workflow", type: "started", refId: "r2", payload: null, ts: 200 },
|
||||
{ runId: "r2", workflow: "cleanup", status: "started", ts: 200 },
|
||||
);
|
||||
store.upsertWorkflowRun(
|
||||
{ source: "workflow", type: "completed", refId: "r3", payload: null, ts: 300 },
|
||||
{ runId: "r3", workflow: "cleanup", status: "completed", ts: 300 },
|
||||
);
|
||||
store.upsertWorkflowRun(
|
||||
{ source: "workflow", type: "failed", refId: "r4", payload: null, ts: 400 },
|
||||
{ runId: "r4", workflow: "deploy", status: "queued", ts: 400 },
|
||||
);
|
||||
});
|
||||
|
||||
it("returns queued and started runs by default", () => {
|
||||
const active = store.getActiveWorkflowRuns();
|
||||
expect(active).toHaveLength(3);
|
||||
const ids = active.map((r) => r.runId);
|
||||
expect(ids).toContain("r1");
|
||||
expect(ids).toContain("r2");
|
||||
expect(ids).toContain("r4");
|
||||
});
|
||||
|
||||
it("excludes completed and failed runs", () => {
|
||||
const active = store.getActiveWorkflowRuns();
|
||||
const ids = active.map((r) => r.runId);
|
||||
expect(ids).not.toContain("r3");
|
||||
});
|
||||
|
||||
it("filters by workflow name", () => {
|
||||
const cleanupRuns = store.getActiveWorkflowRuns("cleanup");
|
||||
expect(cleanupRuns).toHaveLength(2);
|
||||
const ids = cleanupRuns.map((r) => r.runId);
|
||||
expect(ids).toContain("r1");
|
||||
expect(ids).toContain("r2");
|
||||
});
|
||||
|
||||
it("returns only matching workflow when name given", () => {
|
||||
const deployRuns = store.getActiveWorkflowRuns("deploy");
|
||||
expect(deployRuns).toHaveLength(1);
|
||||
expect(deployRuns[0].runId).toBe("r4");
|
||||
});
|
||||
|
||||
it("returns empty array when workflow name matches no active runs", () => {
|
||||
expect(store.getActiveWorkflowRuns("nonexistent")).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("returns runs ordered by ts ascending", () => {
|
||||
const active = store.getActiveWorkflowRuns();
|
||||
expect(active[0].ts).toBeLessThan(active[1].ts);
|
||||
});
|
||||
});
|
||||
|
||||
describe("all statuses are storable", () => {
|
||||
it.each(["queued", "started", "completed", "failed", "crashed", "dropped"] as const)(
|
||||
"stores status=%s",
|
||||
(status) => {
|
||||
const runId = `run-${status}`;
|
||||
store.upsertWorkflowRun(
|
||||
{ source: "workflow", type: status, refId: runId, payload: null, ts: 1 },
|
||||
{ runId, workflow: "test", status, ts: 1 },
|
||||
);
|
||||
expect(store.getWorkflowRun(runId)?.status).toBe(status);
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,362 @@
|
||||
/**
|
||||
* Tests for WorkflowManager — concurrency, overflow, queue, and lifecycle.
|
||||
*/
|
||||
|
||||
import { EventEmitter } from "node:events";
|
||||
|
||||
import type { NerveConfig } from "@uncaged/nerve-core";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Mock child_process.fork before importing workflow-manager
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
type MockChild = EventEmitter & {
|
||||
send: ReturnType<typeof vi.fn>;
|
||||
kill: ReturnType<typeof vi.fn>;
|
||||
connected: boolean;
|
||||
exitCode: number | null;
|
||||
pid: number;
|
||||
};
|
||||
|
||||
const mockChildren: MockChild[] = [];
|
||||
|
||||
function makeMockChild(pid = 1): MockChild {
|
||||
const child = new EventEmitter() as MockChild;
|
||||
child.connected = true;
|
||||
child.exitCode = null;
|
||||
child.pid = pid;
|
||||
child.send = vi.fn((msg: unknown) => {
|
||||
if (
|
||||
msg !== null &&
|
||||
typeof msg === "object" &&
|
||||
(msg as Record<string, unknown>).type === "shutdown"
|
||||
) {
|
||||
setImmediate(() => {
|
||||
child.exitCode = 0;
|
||||
child.connected = false;
|
||||
child.emit("exit", 0, null);
|
||||
});
|
||||
}
|
||||
});
|
||||
child.kill = vi.fn((_signal?: string) => {
|
||||
child.exitCode = 1;
|
||||
child.connected = false;
|
||||
child.emit("exit", null, _signal ?? "SIGKILL");
|
||||
});
|
||||
return child;
|
||||
}
|
||||
|
||||
vi.mock("node:child_process", () => ({
|
||||
fork: vi.fn((_script: string, _args: string[], _opts: unknown) => {
|
||||
const child = makeMockChild(mockChildren.length + 1);
|
||||
mockChildren.push(child);
|
||||
return child;
|
||||
}),
|
||||
}));
|
||||
|
||||
// Import after mock is set up
|
||||
const { createWorkflowManager } = await import("../workflow-manager.js");
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeLogStore() {
|
||||
return {
|
||||
append: vi.fn(),
|
||||
query: vi.fn(() => []),
|
||||
getMeta: vi.fn(() => null),
|
||||
setMeta: vi.fn(),
|
||||
upsertWorkflowRun: vi.fn(),
|
||||
appendWithWorkflowUpdate: vi.fn(),
|
||||
getWorkflowRun: vi.fn(() => null),
|
||||
getActiveWorkflowRuns: vi.fn(() => []),
|
||||
close: vi.fn(),
|
||||
};
|
||||
}
|
||||
|
||||
function makeConfig(overrides: Partial<NerveConfig["workflows"]> = {}): NerveConfig {
|
||||
return {
|
||||
senses: {},
|
||||
reflexes: [],
|
||||
workflows: overrides as NerveConfig["workflows"],
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe("WorkflowManager", () => {
|
||||
beforeEach(() => {
|
||||
mockChildren.length = 0;
|
||||
vi.useFakeTimers({ shouldAdvanceTime: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.useRealTimers();
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("startWorkflow under concurrency limit dispatches thread", () => {
|
||||
it("forks a worker and sends start-thread when active < concurrency", () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
"my-workflow": { concurrency: 2, overflow: "drop" },
|
||||
});
|
||||
const mgr = createWorkflowManager("/nerve-root", config, logStore);
|
||||
|
||||
mgr.startWorkflow("my-workflow", { event: "test" });
|
||||
|
||||
expect(mockChildren).toHaveLength(1);
|
||||
expect(mockChildren[0].send).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ type: "start-thread", workflow: "my-workflow" }),
|
||||
);
|
||||
expect(mgr.activeCount("my-workflow")).toBe(1);
|
||||
});
|
||||
|
||||
it("reuses the same worker for a second thread under the limit", () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
"my-workflow": { concurrency: 3, overflow: "drop" },
|
||||
});
|
||||
const mgr = createWorkflowManager("/nerve-root", config, logStore);
|
||||
|
||||
mgr.startWorkflow("my-workflow", { n: 1 });
|
||||
mgr.startWorkflow("my-workflow", { n: 2 });
|
||||
|
||||
// Only one forked child — worker is reused
|
||||
expect(mockChildren).toHaveLength(1);
|
||||
expect(mockChildren[0].send).toHaveBeenCalledTimes(2);
|
||||
expect(mgr.activeCount("my-workflow")).toBe(2);
|
||||
});
|
||||
|
||||
it("logs a 'started' event for each dispatched thread", () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
"my-workflow": { concurrency: 2, overflow: "drop" },
|
||||
});
|
||||
const mgr = createWorkflowManager("/nerve-root", config, logStore);
|
||||
|
||||
mgr.startWorkflow("my-workflow", { x: 1 });
|
||||
|
||||
expect(logStore.upsertWorkflowRun).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ source: "workflow", type: "started" }),
|
||||
expect.objectContaining({ workflow: "my-workflow", status: "started" }),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("startWorkflow at limit with drop overflow drops the request", () => {
|
||||
it("does NOT send start-thread when at concurrency limit with overflow=drop", () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
"drop-wf": { concurrency: 1, overflow: "drop" },
|
||||
});
|
||||
const mgr = createWorkflowManager("/nerve-root", config, logStore);
|
||||
|
||||
mgr.startWorkflow("drop-wf", { first: true });
|
||||
// now at limit — second call should be dropped
|
||||
mgr.startWorkflow("drop-wf", { second: true });
|
||||
|
||||
expect(mgr.activeCount("drop-wf")).toBe(1);
|
||||
expect(mgr.queueLength("drop-wf")).toBe(0);
|
||||
// Only one start-thread sent
|
||||
expect(mockChildren[0].send).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("logs a 'dropped' event when overflow=drop", () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
"drop-wf": { concurrency: 1, overflow: "drop" },
|
||||
});
|
||||
const mgr = createWorkflowManager("/nerve-root", config, logStore);
|
||||
|
||||
mgr.startWorkflow("drop-wf", {});
|
||||
mgr.startWorkflow("drop-wf", {});
|
||||
|
||||
const droppedCall = logStore.upsertWorkflowRun.mock.calls.find(
|
||||
([entry]) => entry.type === "dropped",
|
||||
);
|
||||
expect(droppedCall).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("startWorkflow at limit with queue overflow queues the request", () => {
|
||||
it("queues the thread when at concurrency limit with overflow=queue", () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
"queue-wf": { concurrency: 1, overflow: "queue", maxQueue: 5 },
|
||||
});
|
||||
const mgr = createWorkflowManager("/nerve-root", config, logStore);
|
||||
|
||||
mgr.startWorkflow("queue-wf", { first: true });
|
||||
mgr.startWorkflow("queue-wf", { second: true });
|
||||
|
||||
expect(mgr.activeCount("queue-wf")).toBe(1);
|
||||
expect(mgr.queueLength("queue-wf")).toBe(1);
|
||||
});
|
||||
|
||||
it("logs a 'queued' event for the waiting thread", () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
"queue-wf": { concurrency: 1, overflow: "queue", maxQueue: 5 },
|
||||
});
|
||||
const mgr = createWorkflowManager("/nerve-root", config, logStore);
|
||||
|
||||
mgr.startWorkflow("queue-wf", {});
|
||||
mgr.startWorkflow("queue-wf", {});
|
||||
|
||||
const queuedCall = logStore.upsertWorkflowRun.mock.calls.find(
|
||||
([entry]) => entry.type === "queued",
|
||||
);
|
||||
expect(queuedCall).toBeDefined();
|
||||
expect(queuedCall?.[1]).toMatchObject({ workflow: "queue-wf", status: "queued" });
|
||||
});
|
||||
});
|
||||
|
||||
describe("queue overflow with maxQueue drops oldest when full", () => {
|
||||
it("drops oldest queued item when queue reaches maxQueue", () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
"queue-wf": { concurrency: 1, overflow: "queue", maxQueue: 2 },
|
||||
});
|
||||
const mgr = createWorkflowManager("/nerve-root", config, logStore);
|
||||
|
||||
// Fill the concurrency slot
|
||||
mgr.startWorkflow("queue-wf", { n: 0 });
|
||||
// Fill the queue to maxQueue
|
||||
mgr.startWorkflow("queue-wf", { n: 1 });
|
||||
mgr.startWorkflow("queue-wf", { n: 2 });
|
||||
// This one should push out { n: 1 }
|
||||
mgr.startWorkflow("queue-wf", { n: 3 });
|
||||
|
||||
// Queue should still be at maxQueue (2)
|
||||
expect(mgr.queueLength("queue-wf")).toBe(2);
|
||||
|
||||
// There should be a dropped event (for the oldest evicted item)
|
||||
const droppedCalls = logStore.upsertWorkflowRun.mock.calls.filter(
|
||||
([entry]) => entry.type === "dropped",
|
||||
);
|
||||
expect(droppedCalls).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("completing a thread dequeues the next one", () => {
|
||||
it("dispatches the next queued thread when the active thread sends completed", () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
"queue-wf": { concurrency: 1, overflow: "queue", maxQueue: 5 },
|
||||
});
|
||||
const mgr = createWorkflowManager("/nerve-root", config, logStore);
|
||||
|
||||
mgr.startWorkflow("queue-wf", { first: true });
|
||||
mgr.startWorkflow("queue-wf", { second: true });
|
||||
|
||||
expect(mgr.activeCount("queue-wf")).toBe(1);
|
||||
expect(mgr.queueLength("queue-wf")).toBe(1);
|
||||
|
||||
// Simulate the worker sending a "thread-event: completed"
|
||||
const child = mockChildren[0];
|
||||
const startCalls = (child.send as ReturnType<typeof vi.fn>).mock.calls;
|
||||
const firstRunId = (startCalls[0][0] as { runId: string }).runId;
|
||||
|
||||
child.emit("message", {
|
||||
type: "thread-event",
|
||||
runId: firstRunId,
|
||||
eventType: "completed",
|
||||
payload: null,
|
||||
});
|
||||
|
||||
// The queued thread should now be active
|
||||
expect(mgr.activeCount("queue-wf")).toBe(1);
|
||||
expect(mgr.queueLength("queue-wf")).toBe(0);
|
||||
// A second start-thread message should have been sent
|
||||
expect(child.send).toHaveBeenCalledTimes(2);
|
||||
expect(child.send).toHaveBeenLastCalledWith(
|
||||
expect.objectContaining({ type: "start-thread", workflow: "queue-wf" }),
|
||||
);
|
||||
});
|
||||
|
||||
it("dispatches next queued thread when active thread sends failed", () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
"queue-wf": { concurrency: 1, overflow: "queue", maxQueue: 5 },
|
||||
});
|
||||
const mgr = createWorkflowManager("/nerve-root", config, logStore);
|
||||
|
||||
mgr.startWorkflow("queue-wf", { first: true });
|
||||
mgr.startWorkflow("queue-wf", { second: true });
|
||||
|
||||
const child = mockChildren[0];
|
||||
const firstRunId = (child.send as ReturnType<typeof vi.fn>).mock.calls[0][0].runId as string;
|
||||
|
||||
child.emit("message", {
|
||||
type: "thread-event",
|
||||
runId: firstRunId,
|
||||
eventType: "failed",
|
||||
payload: { error: "boom" },
|
||||
});
|
||||
|
||||
expect(mgr.activeCount("queue-wf")).toBe(1);
|
||||
expect(mgr.queueLength("queue-wf")).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("stop() shuts down all workers", () => {
|
||||
it("sends shutdown to every worker and resolves when they exit", async () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
"wf-a": { concurrency: 2, overflow: "drop" },
|
||||
"wf-b": { concurrency: 1, overflow: "drop" },
|
||||
});
|
||||
const mgr = createWorkflowManager("/nerve-root", config, logStore);
|
||||
|
||||
mgr.startWorkflow("wf-a", {});
|
||||
mgr.startWorkflow("wf-b", {});
|
||||
|
||||
// Two distinct workers should have been forked
|
||||
expect(mockChildren).toHaveLength(2);
|
||||
|
||||
const stopPromise = mgr.stop();
|
||||
// Let setImmediate callbacks run (the mock exits on shutdown)
|
||||
await vi.runAllTimersAsync();
|
||||
await stopPromise;
|
||||
|
||||
for (const child of mockChildren) {
|
||||
expect(child.send).toHaveBeenCalledWith(expect.objectContaining({ type: "shutdown" }));
|
||||
}
|
||||
});
|
||||
|
||||
it("ignores startWorkflow calls after stop()", async () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({
|
||||
"wf-a": { concurrency: 2, overflow: "drop" },
|
||||
});
|
||||
const mgr = createWorkflowManager("/nerve-root", config, logStore);
|
||||
|
||||
const stopPromise = mgr.stop();
|
||||
await vi.runAllTimersAsync();
|
||||
await stopPromise;
|
||||
|
||||
mgr.startWorkflow("wf-a", {});
|
||||
|
||||
// No worker should have been spawned
|
||||
expect(mockChildren).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("unknown workflow name", () => {
|
||||
it("does nothing for an unknown workflow name", () => {
|
||||
const logStore = makeLogStore();
|
||||
const config = makeConfig({});
|
||||
const mgr = createWorkflowManager("/nerve-root", config, logStore);
|
||||
|
||||
mgr.startWorkflow("no-such-workflow", {});
|
||||
|
||||
expect(mockChildren).toHaveLength(0);
|
||||
expect(logStore.upsertWorkflowRun).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -8,6 +8,10 @@ export type {
|
||||
ErrorMessage,
|
||||
ReadyMessage,
|
||||
WorkerToParentMessage,
|
||||
StartThreadMessage,
|
||||
ResumeThreadMessage,
|
||||
ThreadEventMessage,
|
||||
WorkflowErrorMessage,
|
||||
} from "./ipc.js";
|
||||
|
||||
export type { SignalBus, SignalHandler, Unsubscribe } from "./signal-bus.js";
|
||||
@@ -29,4 +33,7 @@ export { createFileWatcher } from "./file-watcher.js";
|
||||
export type { FileWatcher, FileChange, FileChangeHandler } from "./file-watcher.js";
|
||||
|
||||
export { createLogStore } from "./log-store.js";
|
||||
export type { LogStore, LogEntry, LogQuery } from "./log-store.js";
|
||||
export type { LogStore, LogEntry, LogQuery, WorkflowRun, WorkflowRunStatus } from "./log-store.js";
|
||||
|
||||
export { createWorkflowManager } from "./workflow-manager.js";
|
||||
export type { WorkflowManager } from "./workflow-manager.js";
|
||||
|
||||
+118
-4
@@ -22,8 +22,32 @@ export type HealthRequestMessage = {
|
||||
type: "health-request";
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Workflow IPC messages (RFC-002 §5.2)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Parent → Workflow Worker: start a new thread */
|
||||
export type StartThreadMessage = {
|
||||
type: "start-thread";
|
||||
runId: string;
|
||||
workflow: string;
|
||||
/** The trigger payload from the Reflex that initiated this thread. */
|
||||
triggerPayload: unknown;
|
||||
};
|
||||
|
||||
/** Parent → Workflow Worker: resume an existing thread after crash recovery */
|
||||
export type ResumeThreadMessage = {
|
||||
type: "resume-thread";
|
||||
runId: string;
|
||||
};
|
||||
|
||||
/** Union of all messages the parent sends to a worker */
|
||||
export type ParentToWorkerMessage = ComputeMessage | ShutdownMessage | HealthRequestMessage;
|
||||
export type ParentToWorkerMessage =
|
||||
| ComputeMessage
|
||||
| ShutdownMessage
|
||||
| HealthRequestMessage
|
||||
| StartThreadMessage
|
||||
| ResumeThreadMessage;
|
||||
|
||||
/** Worker → Parent: compute produced a signal */
|
||||
export type SignalMessage = {
|
||||
@@ -51,14 +75,46 @@ export type HealthResponseMessage = {
|
||||
inFlightCount: number;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Workflow Worker → Parent messages (RFC-002 §5.2)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Valid lifecycle event types for a workflow thread. */
|
||||
export type ThreadEventType = "queued" | "started" | "step_complete" | "completed" | "failed";
|
||||
|
||||
/**
|
||||
* Workflow Worker → Parent: a thread lifecycle event.
|
||||
*/
|
||||
export type ThreadEventMessage = {
|
||||
type: "thread-event";
|
||||
runId: string;
|
||||
eventType: ThreadEventType;
|
||||
payload: unknown;
|
||||
};
|
||||
|
||||
/** Workflow Worker → Parent: a thread encountered an unrecoverable error. */
|
||||
export type WorkflowErrorMessage = {
|
||||
type: "workflow-error";
|
||||
runId: string;
|
||||
error: string;
|
||||
};
|
||||
|
||||
/** Union of all messages a worker sends to the parent */
|
||||
export type WorkerToParentMessage =
|
||||
| SignalMessage
|
||||
| ErrorMessage
|
||||
| ReadyMessage
|
||||
| HealthResponseMessage;
|
||||
| HealthResponseMessage
|
||||
| ThreadEventMessage
|
||||
| WorkflowErrorMessage;
|
||||
|
||||
const PARENT_MSG_TYPES = new Set(["compute", "shutdown", "health-request"]);
|
||||
const PARENT_MSG_TYPES = new Set([
|
||||
"compute",
|
||||
"shutdown",
|
||||
"health-request",
|
||||
"start-thread",
|
||||
"resume-thread",
|
||||
]);
|
||||
|
||||
/** Validate and parse an unknown IPC message received from the parent process. */
|
||||
export function parseParentMessage(raw: unknown): Result<ParentToWorkerMessage> {
|
||||
@@ -72,6 +128,18 @@ export function parseParentMessage(raw: unknown): Result<ParentToWorkerMessage>
|
||||
if (!PARENT_MSG_TYPES.has(obj.type)) {
|
||||
return err(new Error(`Unknown IPC message type: "${obj.type}"`));
|
||||
}
|
||||
if (obj.type === "start-thread") {
|
||||
if (typeof obj.runId !== "string")
|
||||
return err(new Error("'start-thread' message missing string 'runId'"));
|
||||
if (typeof obj.workflow !== "string")
|
||||
return err(new Error("'start-thread' message missing string 'workflow'"));
|
||||
if (!("triggerPayload" in obj))
|
||||
return err(new Error("'start-thread' message missing 'triggerPayload'"));
|
||||
}
|
||||
if (obj.type === "resume-thread") {
|
||||
if (typeof obj.runId !== "string")
|
||||
return err(new Error("'resume-thread' message missing string 'runId'"));
|
||||
}
|
||||
return ok(raw as ParentToWorkerMessage);
|
||||
}
|
||||
|
||||
@@ -108,7 +176,51 @@ function parseHealthResponseMsg(
|
||||
return ok(raw as HealthResponseMessage);
|
||||
}
|
||||
|
||||
const WORKER_MSG_TYPES = new Set(["signal", "error", "ready", "health-response"]);
|
||||
const THREAD_EVENT_TYPES = new Set<string>([
|
||||
"queued",
|
||||
"started",
|
||||
"step_complete",
|
||||
"completed",
|
||||
"failed",
|
||||
]);
|
||||
|
||||
function parseThreadEventMsg(
|
||||
obj: Record<string, unknown>,
|
||||
raw: unknown,
|
||||
): Result<WorkerToParentMessage> {
|
||||
if (typeof obj.runId !== "string") {
|
||||
return err(new Error("Worker 'thread-event' message missing string 'runId' field"));
|
||||
}
|
||||
if (typeof obj.eventType !== "string" || !THREAD_EVENT_TYPES.has(obj.eventType)) {
|
||||
return err(new Error(`Worker 'thread-event' message has invalid 'eventType': "${obj.eventType}"`));
|
||||
}
|
||||
if (!("payload" in obj)) {
|
||||
return err(new Error("Worker 'thread-event' message missing 'payload' field"));
|
||||
}
|
||||
return ok(raw as ThreadEventMessage);
|
||||
}
|
||||
|
||||
function parseWorkflowErrorMsg(
|
||||
obj: Record<string, unknown>,
|
||||
raw: unknown,
|
||||
): Result<WorkerToParentMessage> {
|
||||
if (typeof obj.runId !== "string") {
|
||||
return err(new Error("Worker 'workflow-error' message missing string 'runId' field"));
|
||||
}
|
||||
if (typeof obj.error !== "string") {
|
||||
return err(new Error("Worker 'workflow-error' message missing string 'error' field"));
|
||||
}
|
||||
return ok(raw as WorkflowErrorMessage);
|
||||
}
|
||||
|
||||
const WORKER_MSG_TYPES = new Set([
|
||||
"signal",
|
||||
"error",
|
||||
"ready",
|
||||
"health-response",
|
||||
"thread-event",
|
||||
"workflow-error",
|
||||
]);
|
||||
|
||||
/** Validate and parse an unknown IPC message received from a worker process. */
|
||||
export function parseWorkerMessage(raw: unknown): Result<WorkerToParentMessage> {
|
||||
@@ -125,5 +237,7 @@ export function parseWorkerMessage(raw: unknown): Result<WorkerToParentMessage>
|
||||
if (obj.type === "signal") return parseSignalMsg(obj, raw);
|
||||
if (obj.type === "error") return parseErrorMsg(obj, raw);
|
||||
if (obj.type === "health-response") return parseHealthResponseMsg(obj, raw);
|
||||
if (obj.type === "thread-event") return parseThreadEventMsg(obj, raw);
|
||||
if (obj.type === "workflow-error") return parseWorkflowErrorMsg(obj, raw);
|
||||
return ok({ type: "ready" });
|
||||
}
|
||||
|
||||
@@ -31,12 +31,15 @@ import { createReflexScheduler } from "./reflex-scheduler.js";
|
||||
import type { ReflexScheduler } from "./reflex-scheduler.js";
|
||||
import { createSignalBus } from "./signal-bus.js";
|
||||
import type { SignalBus } from "./signal-bus.js";
|
||||
import { createWorkflowManager } from "./workflow-manager.js";
|
||||
import type { WorkflowManager } from "./workflow-manager.js";
|
||||
|
||||
export type KernelHealth = {
|
||||
uptime: number;
|
||||
activeSenses: number;
|
||||
activeGroups: number;
|
||||
pendingComputes: number;
|
||||
activeWorkflows: number;
|
||||
memoryUsage: NodeJS.MemoryUsage;
|
||||
};
|
||||
|
||||
@@ -46,6 +49,7 @@ export type Kernel = {
|
||||
senseCount: number;
|
||||
bus: SignalBus;
|
||||
logStore: LogStore;
|
||||
workflowManager: WorkflowManager;
|
||||
/** Resolves when all workers have sent their initial "ready" message. */
|
||||
ready: Promise<void>;
|
||||
/** Returns the PID of the worker process for a given group, or null if not found. */
|
||||
@@ -143,6 +147,8 @@ export function createKernel(
|
||||
return _signalIdCounter;
|
||||
}
|
||||
|
||||
const workflowManager = createWorkflowManager(nerveRoot, config, logStore);
|
||||
|
||||
const groups = new Set<string>();
|
||||
for (const senseConfig of Object.values(config.senses)) {
|
||||
groups.add(senseConfig.group);
|
||||
@@ -267,7 +273,12 @@ export function createKernel(
|
||||
sendCompute(entry.process, senseName);
|
||||
}
|
||||
|
||||
scheduler = createReflexScheduler(config, bus, triggerFn, { logStore });
|
||||
scheduler = createReflexScheduler(config, bus, triggerFn, {
|
||||
logStore,
|
||||
workflowTriggerFn: (workflowName, payload) => {
|
||||
workflowManager.startWorkflow(workflowName, payload);
|
||||
},
|
||||
});
|
||||
|
||||
if (groups.size === 0) {
|
||||
readyResolve?.();
|
||||
@@ -350,7 +361,13 @@ export function createKernel(
|
||||
// Note: pending/throttled computes in the old scheduler are silently dropped here.
|
||||
// In-flight state is not preserved across reloadConfig.
|
||||
scheduler.stop();
|
||||
scheduler = createReflexScheduler(config, bus, triggerFn, { logStore });
|
||||
scheduler = createReflexScheduler(config, bus, triggerFn, {
|
||||
logStore,
|
||||
workflowTriggerFn: (workflowName, payload) => {
|
||||
workflowManager.startWorkflow(workflowName, payload);
|
||||
},
|
||||
});
|
||||
workflowManager.updateConfig(newConfig);
|
||||
const newGroups = collectGroups(newConfig);
|
||||
removeStaleGroups(oldGroups, newGroups);
|
||||
addNewGroups(oldGroups, newGroups);
|
||||
@@ -378,6 +395,7 @@ export function createKernel(
|
||||
activeSenses: Object.keys(config.senses).length,
|
||||
activeGroups: workers.size,
|
||||
pendingComputes: 0,
|
||||
activeWorkflows: workflowManager.totalActiveCount(),
|
||||
memoryUsage: process.memoryUsage(),
|
||||
};
|
||||
}
|
||||
@@ -441,6 +459,7 @@ export function createKernel(
|
||||
fileWatcher = null;
|
||||
}
|
||||
scheduler.stop();
|
||||
await workflowManager.stop();
|
||||
const exitPromises: Promise<void>[] = [];
|
||||
for (const entry of workers.values()) {
|
||||
sendShutdown(entry.process);
|
||||
@@ -469,6 +488,7 @@ export function createKernel(
|
||||
senseCount,
|
||||
bus,
|
||||
logStore,
|
||||
workflowManager,
|
||||
ready,
|
||||
getWorkerPid,
|
||||
triggerCompute: triggerFn,
|
||||
|
||||
@@ -30,11 +30,70 @@ export type LogQuery = {
|
||||
limit?: number;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Workflow runs materialized view (RFC-002 §6.2)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type WorkflowRunStatus =
|
||||
| "queued"
|
||||
| "started"
|
||||
| "completed"
|
||||
| "failed"
|
||||
| "crashed"
|
||||
| "dropped";
|
||||
|
||||
const VALID_WORKFLOW_STATUSES = new Set<string>([
|
||||
"queued",
|
||||
"started",
|
||||
"completed",
|
||||
"failed",
|
||||
"crashed",
|
||||
"dropped",
|
||||
]);
|
||||
|
||||
function validateWorkflowRunStatus(status: string): WorkflowRunStatus {
|
||||
if (!VALID_WORKFLOW_STATUSES.has(status)) {
|
||||
throw new Error(`Invalid workflow run status from DB: "${status}"`);
|
||||
}
|
||||
return status as WorkflowRunStatus;
|
||||
}
|
||||
|
||||
/** One row in the workflow_runs materialized table. */
|
||||
export type WorkflowRun = {
|
||||
runId: string;
|
||||
workflow: string;
|
||||
status: WorkflowRunStatus;
|
||||
ts: number;
|
||||
};
|
||||
|
||||
export type LogStore = {
|
||||
append: (entry: Omit<LogEntry, "id">) => LogEntry;
|
||||
query: (filter?: LogQuery) => LogEntry[];
|
||||
getMeta: (key: string) => string | null;
|
||||
setMeta: (key: string, value: string) => void;
|
||||
/**
|
||||
* Append a workflow log event and atomically upsert the workflow_runs
|
||||
* materialized table — both in a single SQLite transaction (RFC-002 §6.2).
|
||||
*/
|
||||
upsertWorkflowRun: (
|
||||
entry: Omit<LogEntry, "id">,
|
||||
run: WorkflowRun,
|
||||
) => LogEntry;
|
||||
/**
|
||||
* Alias for upsertWorkflowRun — append a log entry and update workflow_runs
|
||||
* in one atomic transaction.
|
||||
*/
|
||||
appendWithWorkflowUpdate: (
|
||||
entry: Omit<LogEntry, "id">,
|
||||
run: WorkflowRun,
|
||||
) => LogEntry;
|
||||
/** Get the current materialized state of a specific workflow run. */
|
||||
getWorkflowRun: (runId: string) => WorkflowRun | null;
|
||||
/**
|
||||
* Get all workflow runs with status 'queued' or 'started'.
|
||||
* Optionally filter by workflow name.
|
||||
*/
|
||||
getActiveWorkflowRuns: (workflowName?: string) => WorkflowRun[];
|
||||
close: () => void;
|
||||
};
|
||||
|
||||
@@ -56,6 +115,16 @@ CREATE TABLE IF NOT EXISTS meta (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS workflow_runs (
|
||||
run_id TEXT PRIMARY KEY,
|
||||
workflow TEXT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
ts INTEGER NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_workflow_runs_status ON workflow_runs(status);
|
||||
CREATE INDEX IF NOT EXISTS idx_workflow_runs_workflow ON workflow_runs(workflow);
|
||||
`;
|
||||
|
||||
export function createLogStore(dbPath: string): LogStore {
|
||||
@@ -74,6 +143,41 @@ export function createLogStore(dbPath: string): LogStore {
|
||||
"INSERT INTO meta (key, value) VALUES (@key, @value) ON CONFLICT(key) DO UPDATE SET value = @value",
|
||||
);
|
||||
|
||||
const upsertWorkflowRunStmt = sqlite.prepare(
|
||||
"INSERT OR REPLACE INTO workflow_runs (run_id, workflow, status, ts) VALUES (@runId, @workflow, @status, @ts)",
|
||||
);
|
||||
|
||||
const getWorkflowRunStmt = sqlite.prepare(
|
||||
"SELECT run_id, workflow, status, ts FROM workflow_runs WHERE run_id = ?",
|
||||
);
|
||||
|
||||
const getActiveWorkflowRunsStmt = sqlite.prepare(
|
||||
"SELECT run_id, workflow, status, ts FROM workflow_runs WHERE status IN ('queued', 'started') ORDER BY ts ASC",
|
||||
);
|
||||
|
||||
const getActiveWorkflowRunsByNameStmt = sqlite.prepare(
|
||||
"SELECT run_id, workflow, status, ts FROM workflow_runs WHERE status IN ('queued', 'started') AND workflow = ? ORDER BY ts ASC",
|
||||
);
|
||||
|
||||
const upsertWorkflowRunTx = sqlite.transaction(
|
||||
(entry: Omit<LogEntry, "id">, run: WorkflowRun) => {
|
||||
const info = insertStmt.run({
|
||||
source: entry.source,
|
||||
type: entry.type,
|
||||
refId: entry.refId,
|
||||
payload: entry.payload,
|
||||
ts: entry.ts,
|
||||
});
|
||||
upsertWorkflowRunStmt.run({
|
||||
runId: run.runId,
|
||||
workflow: run.workflow,
|
||||
status: run.status,
|
||||
ts: run.ts,
|
||||
});
|
||||
return { ...entry, id: Number(info.lastInsertRowid) };
|
||||
},
|
||||
);
|
||||
|
||||
function append(entry: Omit<LogEntry, "id">): LogEntry {
|
||||
const info = insertStmt.run({
|
||||
source: entry.source,
|
||||
@@ -142,9 +246,44 @@ export function createLogStore(dbPath: string): LogStore {
|
||||
setMetaStmt.run({ key, value });
|
||||
}
|
||||
|
||||
function upsertWorkflowRun(entry: Omit<LogEntry, "id">, run: WorkflowRun): LogEntry {
|
||||
return upsertWorkflowRunTx(entry, run) as LogEntry;
|
||||
}
|
||||
|
||||
function appendWithWorkflowUpdate(entry: Omit<LogEntry, "id">, run: WorkflowRun): LogEntry {
|
||||
return upsertWorkflowRunTx(entry, run) as LogEntry;
|
||||
}
|
||||
|
||||
function getWorkflowRun(runId: string): WorkflowRun | null {
|
||||
const row = getWorkflowRunStmt.get(runId) as
|
||||
| { run_id: string; workflow: string; status: string; ts: number }
|
||||
| undefined;
|
||||
if (row === undefined) return null;
|
||||
return {
|
||||
runId: row.run_id,
|
||||
workflow: row.workflow,
|
||||
status: validateWorkflowRunStatus(row.status),
|
||||
ts: row.ts,
|
||||
};
|
||||
}
|
||||
|
||||
function getActiveWorkflowRuns(workflowName?: string): WorkflowRun[] {
|
||||
const rows = (
|
||||
workflowName !== undefined
|
||||
? getActiveWorkflowRunsByNameStmt.all(workflowName)
|
||||
: getActiveWorkflowRunsStmt.all()
|
||||
) as Array<{ run_id: string; workflow: string; status: string; ts: number }>;
|
||||
return rows.map((r) => ({
|
||||
runId: r.run_id,
|
||||
workflow: r.workflow,
|
||||
status: validateWorkflowRunStatus(r.status),
|
||||
ts: r.ts,
|
||||
}));
|
||||
}
|
||||
|
||||
function close(): void {
|
||||
sqlite.close();
|
||||
}
|
||||
|
||||
return { append, query, getMeta, setMeta, close };
|
||||
return { append, query, getMeta, setMeta, upsertWorkflowRun, appendWithWorkflowUpdate, getWorkflowRun, getActiveWorkflowRuns, close };
|
||||
}
|
||||
|
||||
@@ -16,6 +16,9 @@ import type { SignalBus, Unsubscribe } from "./signal-bus.js";
|
||||
/** Sends a compute message to the worker responsible for the given sense. */
|
||||
export type TriggerFn = (senseName: string) => void;
|
||||
|
||||
/** Triggers a workflow run in response to a signal. */
|
||||
export type WorkflowTriggerFn = (workflowName: string, payload: unknown) => void;
|
||||
|
||||
/** Per-sense mutable state tracked by the scheduler. */
|
||||
type SenseState = {
|
||||
lastComputeAt: number;
|
||||
@@ -37,6 +40,7 @@ function makeSenseState(): SenseState {
|
||||
|
||||
export type ReflexSchedulerOptions = {
|
||||
logStore?: LogStore;
|
||||
workflowTriggerFn?: WorkflowTriggerFn;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -153,6 +157,21 @@ export function createReflexScheduler(
|
||||
}
|
||||
|
||||
for (const reflex of config.reflexes) {
|
||||
if (reflex.kind === "workflow") {
|
||||
if (opts?.workflowTriggerFn !== undefined && reflex.on !== null && reflex.on.length > 0) {
|
||||
const workflowTriggerFn = opts.workflowTriggerFn;
|
||||
const workflowName = reflex.workflow;
|
||||
const watchedSenses = new Set(reflex.on);
|
||||
const unsub = bus.subscribe((signal) => {
|
||||
if (watchedSenses.has(signal.senseId)) {
|
||||
workflowTriggerFn(workflowName, signal.payload);
|
||||
}
|
||||
});
|
||||
unsubscribers.push(unsub);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (reflex.kind !== "sense") continue;
|
||||
const senseReflex = reflex;
|
||||
const senseName = senseReflex.sense;
|
||||
|
||||
@@ -0,0 +1,380 @@
|
||||
/**
|
||||
* WorkflowManager — manages per-workflow worker processes, concurrency control,
|
||||
* and thread lifecycle tracking.
|
||||
*
|
||||
* RFC-002 §7.1: one worker process per workflow name, reused while alive.
|
||||
* Concurrency and overflow (drop/queue) are enforced here in the parent process.
|
||||
*/
|
||||
|
||||
import { fork } from "node:child_process";
|
||||
import type { ChildProcess } from "node:child_process";
|
||||
import { dirname, join } from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
import type { NerveConfig, WorkflowConfig } from "@uncaged/nerve-core";
|
||||
|
||||
import type { ShutdownMessage, StartThreadMessage, ThreadEventMessage } from "./ipc.js";
|
||||
import { parseWorkerMessage } from "./ipc.js";
|
||||
import type { LogStore } from "./log-store.js";
|
||||
|
||||
export type WorkflowManager = {
|
||||
/** Trigger a new workflow thread (called by Reflex scheduler). */
|
||||
startWorkflow: (workflowName: string, payload: unknown) => void;
|
||||
/** Number of currently active (running) threads for a workflow. */
|
||||
activeCount: (workflowName: string) => number;
|
||||
/** Number of pending queued threads waiting to run for a workflow. */
|
||||
queueLength: (workflowName: string) => number;
|
||||
/** Total active workflow threads across all workflows. */
|
||||
totalActiveCount: () => number;
|
||||
/** Update the config reference (e.g. after hot reload). Active workers are unaffected. */
|
||||
updateConfig: (newConfig: NerveConfig) => void;
|
||||
/** Gracefully shut down all workflow workers. */
|
||||
stop: () => Promise<void>;
|
||||
};
|
||||
|
||||
type PendingThread = {
|
||||
runId: string;
|
||||
payload: unknown;
|
||||
};
|
||||
|
||||
type WorkflowState = {
|
||||
active: Set<string>;
|
||||
queue: PendingThread[];
|
||||
};
|
||||
|
||||
type WorkerEntry = {
|
||||
workflowName: string;
|
||||
process: ChildProcess;
|
||||
stopping: boolean;
|
||||
};
|
||||
|
||||
const DEFAULT_MAX_QUEUE = 100;
|
||||
|
||||
function resolveWorkerScript(): string {
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dir = dirname(__filename);
|
||||
return join(__dir, "workflow-worker.js");
|
||||
}
|
||||
|
||||
function spawnWorkflowWorker(
|
||||
nerveRoot: string,
|
||||
workflowName: string,
|
||||
workerScript: string,
|
||||
): ChildProcess {
|
||||
return fork(workerScript, ["--workflow", workflowName, "--root", nerveRoot], {
|
||||
stdio: ["ignore", "inherit", "inherit", "ipc"],
|
||||
});
|
||||
}
|
||||
|
||||
function sendStartThread(worker: ChildProcess, msg: StartThreadMessage): void {
|
||||
if (worker.connected === false) return;
|
||||
try {
|
||||
worker.send(msg);
|
||||
} catch {
|
||||
// IPC channel closed between connected check and send
|
||||
}
|
||||
}
|
||||
|
||||
function sendShutdown(worker: ChildProcess, entry: WorkerEntry): void {
|
||||
entry.stopping = true;
|
||||
if (worker.connected === false) return;
|
||||
const msg: ShutdownMessage = { type: "shutdown" };
|
||||
try {
|
||||
worker.send(msg);
|
||||
} catch {
|
||||
// IPC channel closed between connected check and send
|
||||
}
|
||||
}
|
||||
|
||||
function waitForExit(child: ChildProcess, timeoutMs: number): Promise<void> {
|
||||
return new Promise((resolve) => {
|
||||
const timer = setTimeout(() => {
|
||||
child.kill("SIGKILL");
|
||||
resolve();
|
||||
}, timeoutMs);
|
||||
child.once("exit", () => {
|
||||
clearTimeout(timer);
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export function createWorkflowManager(
|
||||
nerveRoot: string,
|
||||
initialConfig: NerveConfig,
|
||||
logStore: LogStore,
|
||||
): WorkflowManager {
|
||||
const workerScript = resolveWorkerScript();
|
||||
|
||||
const states = new Map<string, WorkflowState>();
|
||||
const workers = new Map<string, WorkerEntry>();
|
||||
let stopped = false;
|
||||
let config = initialConfig;
|
||||
|
||||
function getOrCreateState(workflowName: string): WorkflowState {
|
||||
let state = states.get(workflowName);
|
||||
if (state === undefined) {
|
||||
state = { active: new Set<string>(), queue: [] };
|
||||
states.set(workflowName, state);
|
||||
}
|
||||
return state;
|
||||
}
|
||||
|
||||
function workflowConfig(workflowName: string): WorkflowConfig | null {
|
||||
return config.workflows?.[workflowName] ?? null;
|
||||
}
|
||||
|
||||
function logWorkflowEvent(
|
||||
workflowName: string,
|
||||
runId: string,
|
||||
eventType: string,
|
||||
payload?: unknown,
|
||||
): void {
|
||||
const ts = Date.now();
|
||||
if (
|
||||
eventType === "started" ||
|
||||
eventType === "queued" ||
|
||||
eventType === "completed" ||
|
||||
eventType === "failed" ||
|
||||
eventType === "crashed" ||
|
||||
eventType === "dropped"
|
||||
) {
|
||||
const status =
|
||||
eventType === "dropped"
|
||||
? ("dropped" as const)
|
||||
: eventType === "queued"
|
||||
? ("queued" as const)
|
||||
: eventType === "started"
|
||||
? ("started" as const)
|
||||
: eventType === "completed"
|
||||
? ("completed" as const)
|
||||
: eventType === "failed"
|
||||
? ("failed" as const)
|
||||
: ("crashed" as const);
|
||||
logStore.upsertWorkflowRun(
|
||||
{
|
||||
source: "workflow",
|
||||
type: eventType,
|
||||
refId: runId,
|
||||
payload: payload !== undefined ? JSON.stringify(payload) : null,
|
||||
ts,
|
||||
},
|
||||
{ runId, workflow: workflowName, status, ts },
|
||||
);
|
||||
} else {
|
||||
logStore.append({
|
||||
source: "workflow",
|
||||
type: eventType,
|
||||
refId: runId,
|
||||
payload: payload !== undefined ? JSON.stringify(payload) : null,
|
||||
ts,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function dispatchThread(workflowName: string, runId: string, payload: unknown): void {
|
||||
const state = getOrCreateState(workflowName);
|
||||
state.active.add(runId);
|
||||
|
||||
const worker = getOrSpawnWorker(workflowName);
|
||||
const msg: StartThreadMessage = {
|
||||
type: "start-thread",
|
||||
runId,
|
||||
workflow: workflowName,
|
||||
triggerPayload: payload,
|
||||
};
|
||||
sendStartThread(worker.process, msg);
|
||||
logWorkflowEvent(workflowName, runId, "started");
|
||||
}
|
||||
|
||||
function dequeueNext(workflowName: string): void {
|
||||
const state = states.get(workflowName);
|
||||
if (state === undefined || state.queue.length === 0) return;
|
||||
|
||||
const wfConfig = workflowConfig(workflowName);
|
||||
const concurrency = wfConfig?.concurrency ?? 1;
|
||||
|
||||
if (state.active.size < concurrency) {
|
||||
const next = state.queue.shift();
|
||||
if (next !== undefined) {
|
||||
dispatchThread(workflowName, next.runId, next.payload);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function handleThreadEvent(workflowName: string, msg: ThreadEventMessage): void {
|
||||
const state = states.get(workflowName);
|
||||
if (state === undefined) return;
|
||||
|
||||
if (msg.eventType === "completed" || msg.eventType === "failed") {
|
||||
state.active.delete(msg.runId);
|
||||
dequeueNext(workflowName);
|
||||
}
|
||||
|
||||
if (msg.eventType === "completed" || msg.eventType === "failed") {
|
||||
logWorkflowEvent(workflowName, msg.runId, msg.eventType, msg.payload);
|
||||
}
|
||||
}
|
||||
|
||||
function handleWorkerCrash(workflowName: string): void {
|
||||
const state = states.get(workflowName);
|
||||
if (state === undefined) return;
|
||||
|
||||
const crashedCount = state.active.size;
|
||||
if (crashedCount > 0) {
|
||||
process.stderr.write(
|
||||
`[workflow-manager] worker for "${workflowName}" crashed with ${crashedCount} active thread(s)\n`,
|
||||
);
|
||||
}
|
||||
|
||||
// All active threads are now crashed — we can't recover runIds from this
|
||||
// in-memory state alone (Phase 3 crash recovery will use the DB).
|
||||
// Reset active set so the manager stays consistent.
|
||||
state.active.clear();
|
||||
workers.delete(workflowName);
|
||||
}
|
||||
|
||||
function getOrSpawnWorker(workflowName: string): WorkerEntry {
|
||||
const existing = workers.get(workflowName);
|
||||
if (existing !== undefined && existing.process.exitCode === null) {
|
||||
return existing;
|
||||
}
|
||||
|
||||
const child = spawnWorkflowWorker(nerveRoot, workflowName, workerScript);
|
||||
|
||||
child.on("message", (raw: unknown) => {
|
||||
const result = parseWorkerMessage(raw);
|
||||
if (!result.ok) {
|
||||
process.stderr.write(
|
||||
`[workflow-manager] invalid message from "${workflowName}" worker: ${result.error.message}\n`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
const msg = result.value;
|
||||
|
||||
if (msg.type === "thread-event") {
|
||||
handleThreadEvent(workflowName, msg);
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === "workflow-error") {
|
||||
process.stderr.write(
|
||||
`[workflow-manager] workflow-error for runId "${msg.runId}" in "${workflowName}": ${msg.error}\n`,
|
||||
);
|
||||
const state = states.get(workflowName);
|
||||
if (state !== undefined) {
|
||||
state.active.delete(msg.runId);
|
||||
dequeueNext(workflowName);
|
||||
}
|
||||
logWorkflowEvent(workflowName, msg.runId, "failed", { error: msg.error });
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === "error") {
|
||||
process.stderr.write(
|
||||
`[workflow-manager] error from "${workflowName}" worker: ${msg.error}\n`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
child.on("exit", (code) => {
|
||||
const entry = workers.get(workflowName);
|
||||
if (entry?.stopping) {
|
||||
workers.delete(workflowName);
|
||||
const state = states.get(workflowName);
|
||||
if (state !== undefined) {
|
||||
state.active.clear();
|
||||
}
|
||||
return;
|
||||
}
|
||||
process.stderr.write(
|
||||
`[workflow-manager] worker for "${workflowName}" exited with code ${code ?? "null"}\n`,
|
||||
);
|
||||
handleWorkerCrash(workflowName);
|
||||
});
|
||||
|
||||
const entry: WorkerEntry = { workflowName, process: child, stopping: false };
|
||||
workers.set(workflowName, entry);
|
||||
return entry;
|
||||
}
|
||||
|
||||
function startWorkflow(workflowName: string, payload: unknown): void {
|
||||
if (stopped) return;
|
||||
|
||||
const wfConfig = workflowConfig(workflowName);
|
||||
if (wfConfig === null) {
|
||||
process.stderr.write(
|
||||
`[workflow-manager] startWorkflow: unknown workflow "${workflowName}"\n`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const state = getOrCreateState(workflowName);
|
||||
const runId = crypto.randomUUID();
|
||||
|
||||
if (state.active.size < wfConfig.concurrency) {
|
||||
dispatchThread(workflowName, runId, payload);
|
||||
return;
|
||||
}
|
||||
|
||||
// At concurrency limit — apply overflow policy
|
||||
if (wfConfig.overflow === "drop") {
|
||||
logWorkflowEvent(workflowName, runId, "dropped");
|
||||
process.stderr.write(
|
||||
`[workflow-manager] dropped thread for "${workflowName}" (overflow=drop)\n`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// overflow === "queue"
|
||||
const maxQueue = (wfConfig as { maxQueue?: number }).maxQueue ?? DEFAULT_MAX_QUEUE;
|
||||
if (state.queue.length >= maxQueue) {
|
||||
const oldest = state.queue.shift();
|
||||
if (oldest !== undefined) {
|
||||
process.stderr.write(
|
||||
`[workflow-manager] queue overflow for "${workflowName}", dropping oldest runId "${oldest.runId}"\n`,
|
||||
);
|
||||
logWorkflowEvent(workflowName, oldest.runId, "dropped");
|
||||
}
|
||||
}
|
||||
|
||||
state.queue.push({ runId, payload });
|
||||
logWorkflowEvent(workflowName, runId, "queued");
|
||||
process.stderr.write(
|
||||
`[workflow-manager] queued thread for "${workflowName}" runId "${runId}" (queue length: ${state.queue.length})\n`,
|
||||
);
|
||||
}
|
||||
|
||||
function activeCount(workflowName: string): number {
|
||||
return states.get(workflowName)?.active.size ?? 0;
|
||||
}
|
||||
|
||||
function queueLength(workflowName: string): number {
|
||||
return states.get(workflowName)?.queue.length ?? 0;
|
||||
}
|
||||
|
||||
function totalActiveCount(): number {
|
||||
let total = 0;
|
||||
for (const state of states.values()) {
|
||||
total += state.active.size;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
function updateConfig(newConfig: NerveConfig): void {
|
||||
config = newConfig;
|
||||
}
|
||||
|
||||
async function stop(): Promise<void> {
|
||||
stopped = true;
|
||||
const exitPromises: Promise<void>[] = [];
|
||||
for (const entry of workers.values()) {
|
||||
sendShutdown(entry.process, entry);
|
||||
exitPromises.push(waitForExit(entry.process, 5000));
|
||||
}
|
||||
await Promise.all(exitPromises);
|
||||
workers.clear();
|
||||
}
|
||||
|
||||
return { startWorkflow, activeCount, queueLength, totalActiveCount, updateConfig, stop };
|
||||
}
|
||||
@@ -0,0 +1,244 @@
|
||||
/**
|
||||
* Workflow Worker runtime bootstrap.
|
||||
*
|
||||
* Entry point for a forked workflow worker process.
|
||||
* Receives the workflow name and nerve root via CLI args, dynamically imports
|
||||
* the user's WorkflowDefinition, then signals ready and enters the IPC event loop.
|
||||
*
|
||||
* Layout assumptions (nerve user config at `<nerveRoot>/`):
|
||||
* workflows/<name>/index.ts (or .js) ← user workflow definition
|
||||
*/
|
||||
|
||||
import { resolve, join } from "node:path";
|
||||
import { existsSync } from "node:fs";
|
||||
|
||||
import type {
|
||||
CommandEvent,
|
||||
ThreadState,
|
||||
WorkflowContext,
|
||||
WorkflowDefinition,
|
||||
} from "@uncaged/nerve-core";
|
||||
|
||||
import type { WorkerToParentMessage, ThreadEventType } from "./ipc.js";
|
||||
import { parseParentMessage } from "./ipc.js";
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// IPC helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function send(msg: WorkerToParentMessage): void {
|
||||
if (process.send) {
|
||||
process.send(msg);
|
||||
}
|
||||
}
|
||||
|
||||
function sendReady(): void {
|
||||
send({ type: "ready" });
|
||||
}
|
||||
|
||||
function sendThreadEvent(runId: string, eventType: ThreadEventType, payload: unknown): void {
|
||||
send({ type: "thread-event", runId, eventType, payload });
|
||||
}
|
||||
|
||||
function sendWorkflowError(runId: string, error: string): void {
|
||||
send({ type: "workflow-error", runId, error });
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Thread loop (RFC-002 §5.4)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function runThread(
|
||||
def: WorkflowDefinition,
|
||||
workflowName: string,
|
||||
runId: string,
|
||||
triggerPayload: unknown,
|
||||
): Promise<void> {
|
||||
const state: ThreadState = { runId, events: [] };
|
||||
const ctx: WorkflowContext = {
|
||||
runId,
|
||||
workflowName,
|
||||
log: (msg) =>
|
||||
sendThreadEvent(runId, "step_complete", { message: msg }),
|
||||
};
|
||||
|
||||
let event: CommandEvent = {
|
||||
type: "thread_start",
|
||||
triggerPayload: triggerPayload != null && typeof triggerPayload === "object" ? triggerPayload : {},
|
||||
};
|
||||
|
||||
const MAX_STEPS = 1000;
|
||||
let step = 0;
|
||||
while (step < MAX_STEPS) {
|
||||
step++;
|
||||
state.events.push(event);
|
||||
const next = def.moderate(state, event);
|
||||
|
||||
if (next === null) {
|
||||
sendThreadEvent(runId, "completed", null);
|
||||
return;
|
||||
}
|
||||
|
||||
const role = def.roles[next.role];
|
||||
if (!role) {
|
||||
sendWorkflowError(runId, `Unknown role: ${next.role}`);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
event = await role.execute(next.prompt, ctx);
|
||||
} catch (e: unknown) {
|
||||
const errMsg = e instanceof Error ? e.message : String(e);
|
||||
sendThreadEvent(runId, "failed", { error: errMsg });
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (step >= MAX_STEPS) {
|
||||
sendWorkflowError(runId, `Thread exceeded maximum steps (${MAX_STEPS})`);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Workflow definition loader
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function loadWorkflowDefinition(
|
||||
nerveRoot: string,
|
||||
workflowName: string,
|
||||
): Promise<WorkflowDefinition> {
|
||||
const candidates = [
|
||||
resolve(join(nerveRoot, "workflows", workflowName, "index.ts")),
|
||||
resolve(join(nerveRoot, "workflows", workflowName, "index.js")),
|
||||
];
|
||||
|
||||
const indexPath = candidates.find((p) => existsSync(p));
|
||||
if (!indexPath) {
|
||||
throw new Error(
|
||||
`Workflow definition not found for "${workflowName}". Tried:\n` +
|
||||
candidates.map((p) => ` ${p}`).join("\n"),
|
||||
);
|
||||
}
|
||||
|
||||
const mod = await import(indexPath);
|
||||
const def: unknown = mod.default ?? mod;
|
||||
|
||||
if (
|
||||
def === null ||
|
||||
typeof def !== "object" ||
|
||||
typeof (def as WorkflowDefinition).moderate !== "function" ||
|
||||
typeof (def as WorkflowDefinition).roles !== "object"
|
||||
) {
|
||||
throw new Error(
|
||||
`Workflow "${workflowName}" must export a WorkflowDefinition with "roles" and "moderate".`,
|
||||
);
|
||||
}
|
||||
|
||||
return def as WorkflowDefinition;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// IPC message dispatch
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function handleMessage(
|
||||
raw: unknown,
|
||||
def: WorkflowDefinition,
|
||||
workflowName: string,
|
||||
inFlight: Map<string, Promise<void>>,
|
||||
shuttingDown: { value: boolean },
|
||||
): void {
|
||||
const parseResult = parseParentMessage(raw);
|
||||
if (!parseResult.ok) {
|
||||
process.stderr.write(`[workflow-worker] Invalid IPC message: ${parseResult.error.message}\n`);
|
||||
return;
|
||||
}
|
||||
|
||||
const msg = parseResult.value;
|
||||
|
||||
if (msg.type === "shutdown") {
|
||||
shuttingDown.value = true;
|
||||
const timeout = new Promise<void>(r => setTimeout(r, 10_000));
|
||||
Promise.race([Promise.all(inFlight.values()), timeout])
|
||||
.then(() => process.exit(0))
|
||||
.catch(() => process.exit(1));
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === "start-thread") {
|
||||
if (shuttingDown.value) return;
|
||||
const { runId, triggerPayload } = msg;
|
||||
|
||||
const previous = inFlight.get(runId) ?? Promise.resolve();
|
||||
const next = previous
|
||||
.then(() => runThread(def, workflowName, runId, triggerPayload))
|
||||
.catch((e: unknown) => {
|
||||
const errMsg = e instanceof Error ? e.message : String(e);
|
||||
sendWorkflowError(runId, errMsg);
|
||||
})
|
||||
.finally(() => {
|
||||
inFlight.delete(runId);
|
||||
});
|
||||
|
||||
inFlight.set(runId, next);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Bootstrap
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function bootstrap(nerveRoot: string, workflowName: string): Promise<void> {
|
||||
let def: WorkflowDefinition;
|
||||
try {
|
||||
def = await loadWorkflowDefinition(nerveRoot, workflowName);
|
||||
} catch (e: unknown) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
process.stderr.write(`[workflow-worker] Failed to load workflow "${workflowName}": ${msg}\n`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const inFlight = new Map<string, Promise<void>>();
|
||||
const shuttingDown = { value: false };
|
||||
|
||||
sendReady();
|
||||
|
||||
process.on("message", (raw: unknown) => {
|
||||
handleMessage(raw, def, workflowName, inFlight, shuttingDown);
|
||||
});
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CLI entrypoint
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function parseArgs(): { nerveRoot: string; workflow: string } | null {
|
||||
const args = process.argv.slice(2);
|
||||
let workflow: string | null = null;
|
||||
let nerveRoot: string | null = null;
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if (args[i] === "--workflow" && i + 1 < args.length) {
|
||||
workflow = args[i + 1];
|
||||
i++;
|
||||
} else if (args[i] === "--root" && i + 1 < args.length) {
|
||||
nerveRoot = args[i + 1];
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!workflow || !nerveRoot) return null;
|
||||
return { nerveRoot, workflow };
|
||||
}
|
||||
|
||||
const parsed = parseArgs();
|
||||
if (!parsed) {
|
||||
process.stderr.write("Usage: workflow-worker --workflow <name> --root <nerve-root-dir>\n");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
bootstrap(parsed.nerveRoot, parsed.workflow).catch((e) => {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
process.stderr.write(`[workflow-worker] Unhandled bootstrap error: ${msg}\n`);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user