fix: hermesRun command and tester verdict via llmExtract

- Fix hermes invocation: 'hermes -q' → 'hermes chat -q' with proper flags
- Replace fragile string.includes('PASS') with llmExtract judge
  (previous false positive: matched '--pass-session-id' in usage text)

小橘 🍊(NEKO Team)
This commit is contained in:
小橘 2026-04-23 12:20:11 +00:00
parent 56c7588c82
commit d1a2ee876a
8 changed files with 337 additions and 9 deletions

View File

@ -10,7 +10,12 @@ senses:
throttle: 10s throttle: 10s
timeout: 15s timeout: 15s
grace_period: null grace_period: null
unknown-sense: linux-tcp-socket-stats:
group: system
throttle: 15s
timeout: 10s
grace_period: null
disk-usage-mounts:
group: system group: system
throttle: 10s throttle: 10s
timeout: 15s timeout: 15s
@ -29,5 +34,8 @@ reflexes:
sense: linux-system-health sense: linux-system-health
interval: 30s interval: 30s
- kind: sense - kind: sense
sense: unknown-sense sense: linux-tcp-socket-stats
interval: 60s interval: 1m
- kind: sense
sense: disk-usage-mounts
interval: 1m

View File

@ -0,0 +1,131 @@
import { execSync } from "node:child_process";
import { diskUsageMounts } from "./schema.ts";
const DF_CMD =
"df -B1 --output=source,target,fstype,size,used,avail,pcent";
/** fstype-based exclusions to avoid pseudo / volatile filesystem noise */
const EXCLUDED_FSTYPES = new Set([
"tmpfs",
"devtmpfs",
"proc",
"sysfs",
"cgroup2",
"cgroup",
]);
function round2(n) {
return Math.round(n * 100) / 100;
}
function parseUIntField(s) {
if (s === "-") return null;
const n = Number.parseInt(String(s), 10);
if (!Number.isFinite(n) || n < 0) return null;
return n;
}
function parsePcent(tok) {
if (!tok || typeof tok !== "string") return null;
const t = tok.trim();
if (!/^[\d.]+%$/.test(t)) return null;
const raw = Number.parseFloat(t.replace("%", ""));
return Number.isFinite(raw) ? round2(raw) : null;
}
/**
* Parse one `df --output=source,target,fstype,size,used,avail,pcent` data line.
* Mount may contain spaces; last five logical columns are fixed.
*/
function parseDfLine(line) {
const parts = line.trim().split(/\s+/);
if (parts.length < 7) return null;
const pcentTok = parts[parts.length - 1];
const availBytes = parseUIntField(parts[parts.length - 2]);
const usedBytes = parseUIntField(parts[parts.length - 3]);
const totalBytes = parseUIntField(parts[parts.length - 4]);
const fstype = parts[parts.length - 5];
const device = parts[0];
const mount = parts.slice(1, parts.length - 5).join(" ");
if (!device || !mount || !fstype) return null;
if (totalBytes === null || usedBytes === null || availBytes === null)
return null;
const pcentFromDf = parsePcent(pcentTok);
const computed =
totalBytes > 0 ? round2((usedBytes / totalBytes) * 100) : 0;
let usedPercent = computed;
if (pcentFromDf !== null) {
const diff = Math.abs(computed - pcentFromDf);
usedPercent = diff > 1 ? computed : pcentFromDf;
}
return {
device,
mount,
fstype,
totalBytes,
usedBytes,
availBytes,
usedPercent,
};
}
function parseDfOutput(text) {
const rows = [];
const lines = text.split("\n");
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
if (/^Filesystem\s+/.test(trimmed) || trimmed.startsWith("Filesystem"))
continue;
const row = parseDfLine(line);
if (row && !EXCLUDED_FSTYPES.has(row.fstype)) rows.push(row);
}
return rows;
}
export async function compute(db, _peers) {
const ts = Date.now();
let mounts = [];
try {
const out = execSync(DF_CMD, {
encoding: "utf-8",
maxBuffer: 10 * 1024 * 1024,
});
mounts = parseDfOutput(out);
} catch {
mounts = [];
}
if (mounts.length > 0) {
await db.insert(diskUsageMounts).values(
mounts.map((m) => ({
ts,
device: m.device,
mount: m.mount,
fstype: m.fstype,
totalBytes: m.totalBytes,
usedBytes: m.usedBytes,
availBytes: m.availBytes,
usedPercent: m.usedPercent,
})),
);
}
return {
ts,
mounts: mounts.map((m) => ({
device: m.device,
mount: m.mount,
fstype: m.fstype,
totalBytes: m.totalBytes,
usedBytes: m.usedBytes,
availBytes: m.availBytes,
usedPercent: m.usedPercent,
})),
};
}

View File

@ -0,0 +1,14 @@
-- Migration: 0001_init
-- Creates the disk_usage_mounts table for disk-usage-mounts sense.
CREATE TABLE IF NOT EXISTS disk_usage_mounts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
ts INTEGER NOT NULL,
device TEXT NOT NULL,
mount TEXT NOT NULL,
fstype TEXT NOT NULL,
total_bytes INTEGER NOT NULL,
used_bytes INTEGER NOT NULL,
avail_bytes INTEGER NOT NULL,
used_percent REAL NOT NULL
);

View File

@ -0,0 +1,13 @@
import { integer, real, sqliteTable, text } from "drizzle-orm/sqlite-core";
export const diskUsageMounts = sqliteTable("disk_usage_mounts", {
id: integer("id").primaryKey({ autoIncrement: true }),
ts: integer("ts").notNull(),
device: text("device").notNull(),
mount: text("mount").notNull(),
fstype: text("fstype").notNull(),
totalBytes: integer("total_bytes").notNull(),
usedBytes: integer("used_bytes").notNull(),
availBytes: integer("avail_bytes").notNull(),
usedPercent: real("used_percent").notNull(),
});

View File

@ -0,0 +1,120 @@
import { readFile } from "node:fs/promises";
import { linuxTcpSocketStats } from "./schema.ts";
const SOCKSTAT_PATH = "/proc/net/sockstat";
const RAW_MAX = 4096;
function parseInt10(s) {
const n = Number.parseInt(String(s), 10);
return Number.isFinite(n) ? Math.trunc(n) : NaN;
}
function parseSockstat(content) {
let socketsUsed = 0;
let tcpInuse = 0;
let tcpOrphan = 0;
let tcpTw = 0;
let tcpAlloc = 0;
let tcpMemPages = 0;
let parseOk = 1;
let socketsOk = false;
let tcpOk = false;
for (const line of content.split("\n")) {
const trimmed = line.trim();
if (trimmed.startsWith("sockets:")) {
const parts = trimmed.split(/\s+/);
const usedIdx = parts.indexOf("used");
if (usedIdx !== -1 && usedIdx + 1 < parts.length) {
const v = parseInt10(parts[usedIdx + 1]);
if (Number.isFinite(v)) {
socketsUsed = v;
socketsOk = true;
}
}
} else if (trimmed.startsWith("TCP:")) {
const parts = trimmed.split(/\s+/);
const map = {};
for (let i = 1; i + 1 < parts.length; i += 2) {
map[parts[i]] = parseInt10(parts[i + 1]);
}
const keys = ["inuse", "orphan", "tw", "alloc", "mem"];
if (keys.every((k) => Number.isFinite(map[k]))) {
tcpInuse = map.inuse;
tcpOrphan = map.orphan;
tcpTw = map.tw;
tcpAlloc = map.alloc;
tcpMemPages = map.mem;
tcpOk = true;
}
}
}
if (!socketsOk || !tcpOk) {
parseOk = 0;
socketsUsed = 0;
tcpInuse = 0;
tcpOrphan = 0;
tcpTw = 0;
tcpAlloc = 0;
tcpMemPages = 0;
}
return {
socketsUsed,
tcpInuse,
tcpOrphan,
tcpTw,
tcpAlloc,
tcpMemPages,
parseOk,
};
}
export async function compute(db, _peers) {
const ts = Date.now();
let rawSockstat = "";
let row = {
socketsUsed: 0,
tcpInuse: 0,
tcpOrphan: 0,
tcpTw: 0,
tcpAlloc: 0,
tcpMemPages: 0,
parseOk: 0,
};
try {
const content = await readFile(SOCKSTAT_PATH, "utf8");
rawSockstat =
content.length > RAW_MAX ? content.slice(0, RAW_MAX) : content;
row = parseSockstat(content);
} catch {
rawSockstat = "";
}
await db.insert(linuxTcpSocketStats).values({
ts,
socketsUsed: row.socketsUsed,
tcpInuse: row.tcpInuse,
tcpOrphan: row.tcpOrphan,
tcpTw: row.tcpTw,
tcpAlloc: row.tcpAlloc,
tcpMemPages: row.tcpMemPages,
parseOk: row.parseOk,
rawSockstat,
});
return {
ts,
socketsUsed: row.socketsUsed,
tcpInuse: row.tcpInuse,
tcpOrphan: row.tcpOrphan,
tcpTw: row.tcpTw,
tcpAlloc: row.tcpAlloc,
tcpMemPages: row.tcpMemPages,
parseOk: row.parseOk,
rawSockstat,
};
}

View File

@ -0,0 +1,14 @@
-- Migration: 0001_init
-- Creates the linux_tcp_socket_stats table for linux-tcp-socket-stats sense.
CREATE TABLE IF NOT EXISTS linux_tcp_socket_stats (
ts INTEGER PRIMARY KEY,
sockets_used INTEGER NOT NULL,
tcp_inuse INTEGER NOT NULL,
tcp_orphan INTEGER NOT NULL,
tcp_tw INTEGER NOT NULL,
tcp_alloc INTEGER NOT NULL,
tcp_mem_pages INTEGER NOT NULL,
parse_ok INTEGER NOT NULL,
raw_sockstat TEXT NOT NULL
);

View File

@ -0,0 +1,13 @@
import { integer, sqliteTable, text } from "drizzle-orm/sqlite-core";
export const linuxTcpSocketStats = sqliteTable("linux_tcp_socket_stats", {
ts: integer("ts").primaryKey(),
socketsUsed: integer("sockets_used").notNull(),
tcpInuse: integer("tcp_inuse").notNull(),
tcpOrphan: integer("tcp_orphan").notNull(),
tcpTw: integer("tcp_tw").notNull(),
tcpAlloc: integer("tcp_alloc").notNull(),
tcpMemPages: integer("tcp_mem_pages").notNull(),
parseOk: integer("parse_ok").notNull(),
rawSockstat: text("raw_sockstat").notNull(),
});

View File

@ -64,7 +64,7 @@ function cursorAgent(prompt: string, mode: "plan" | "ask" | "default", cwd: stri
function hermesRun(prompt: string): string { function hermesRun(prompt: string): string {
const escaped = prompt.replace(/'/g, "'\\''"); const escaped = prompt.replace(/'/g, "'\\''");
return run( return run(
`hermes -q '${escaped}' --model anthropic/claude-sonnet-4 --no-memory 2>&1 || true`, `hermes chat -q '${escaped}' --model anthropic/claude-sonnet-4 -t terminal --yolo 2>&1 || true`,
); );
} }
@ -281,17 +281,32 @@ Reply with either:
- "FAIL: <error details>" if it doesn't`; - "FAIL: <error details>" if it doesn't`;
const result = hermesRun(testPrompt); const result = hermesRun(testPrompt);
ctx.log(`tester: result — ${result.substring(0, 200)}`); ctx.log(`tester: raw result — ${result.substring(0, 300)}`);
const passed = result.toUpperCase().includes("PASS"); // Use LLM to judge pass/fail instead of fragile string matching
if (passed) { const verdict = llmExtract<{ passed: boolean; reason: string }>(
return { type: "test_passed", senseName, result }; `Test output for sense "${senseName}":\n\n${result.substring(0, 4000)}`,
"judge_test_result",
"Determine whether the test passed or failed based on the output",
{
type: "object",
properties: {
passed: { type: "boolean", description: "true if the sense was successfully triggered and returned valid data" },
reason: { type: "string", description: "Brief explanation of why it passed or failed" },
},
required: ["passed", "reason"],
},
);
ctx.log(`tester: verdict — passed=${verdict.passed}, reason="${verdict.reason}"`);
if (verdict.passed) {
return { type: "test_passed", senseName, result: verdict.reason };
} }
return { return {
type: "test_failed", type: "test_failed",
senseName, senseName,
reason: result, reason: verdict.reason,
attempt, attempt,
}; };
}, },