fix: hermesRun command and tester verdict via llmExtract

- Fix hermes invocation: 'hermes -q' → 'hermes chat -q' with proper flags - Replace fragile string.includes('PASS') with llmExtract judge (previous false positive: matched '--pass-session-id' in usage text) 小橘 🍊（NEKO Team）
2026-04-23 12:20:11 +00:00 · 2026-04-23 12:20:11 +00:00 · d1a2ee876a
commit d1a2ee876a
parent 56c7588c82
8 changed files with 337 additions and 9 deletions
--- a/nerve.yaml
+++ b/nerve.yaml
@ -10,7 +10,12 @@ senses:
    throttle: 10s
    timeout: 15s
    grace_period: null
-  unknown-sense:
+  linux-tcp-socket-stats:
    group: system
    throttle: 15s
    timeout: 10s
    grace_period: null
  disk-usage-mounts:
    group: system
    throttle: 10s
    timeout: 15s
@ -29,5 +34,8 @@ reflexes:
    sense: linux-system-health
    interval: 30s
  - kind: sense
-    sense: unknown-sense
+    sense: linux-tcp-socket-stats
-    interval: 60s
+    interval: 1m
  - kind: sense
    sense: disk-usage-mounts
    interval: 1m
--- a/senses/disk-usage-mounts/index.js
+++ b/senses/disk-usage-mounts/index.js
@ -0,0 +1,131 @@
 import { execSync } from "node:child_process";
 import { diskUsageMounts } from "./schema.ts";
 const DF_CMD =
  "df -B1 --output=source,target,fstype,size,used,avail,pcent";
 /** fstype-based exclusions to avoid pseudo / volatile filesystem noise */
 const EXCLUDED_FSTYPES = new Set([
  "tmpfs",
  "devtmpfs",
  "proc",
  "sysfs",
  "cgroup2",
  "cgroup",
 ]);
 function round2(n) {
  return Math.round(n * 100) / 100;
 }
 function parseUIntField(s) {
  if (s === "-") return null;
  const n = Number.parseInt(String(s), 10);
  if (!Number.isFinite(n) || n < 0) return null;
  return n;
 }
 function parsePcent(tok) {
  if (!tok || typeof tok !== "string") return null;
  const t = tok.trim();
  if (!/^[\d.]+%$/.test(t)) return null;
  const raw = Number.parseFloat(t.replace("%", ""));
  return Number.isFinite(raw) ? round2(raw) : null;
 }
 /**
 * Parse one `df --output=source,target,fstype,size,used,avail,pcent` data line.
 * Mount may contain spaces; last five logical columns are fixed.
 */
 function parseDfLine(line) {
  const parts = line.trim().split(/\s+/);
  if (parts.length < 7) return null;
  const pcentTok = parts[parts.length - 1];
  const availBytes = parseUIntField(parts[parts.length - 2]);
  const usedBytes = parseUIntField(parts[parts.length - 3]);
  const totalBytes = parseUIntField(parts[parts.length - 4]);
  const fstype = parts[parts.length - 5];
  const device = parts[0];
  const mount = parts.slice(1, parts.length - 5).join(" ");
  if (!device || !mount || !fstype) return null;
  if (totalBytes === null || usedBytes === null || availBytes === null)
    return null;
  const pcentFromDf = parsePcent(pcentTok);
  const computed =
    totalBytes > 0 ? round2((usedBytes / totalBytes) * 100) : 0;
  let usedPercent = computed;
  if (pcentFromDf !== null) {
    const diff = Math.abs(computed - pcentFromDf);
    usedPercent = diff > 1 ? computed : pcentFromDf;
  }
  return {
    device,
    mount,
    fstype,
    totalBytes,
    usedBytes,
    availBytes,
    usedPercent,
  };
 }
 function parseDfOutput(text) {
  const rows = [];
  const lines = text.split("\n");
  for (const line of lines) {
    const trimmed = line.trim();
    if (!trimmed) continue;
    if (/^Filesystem\s+/.test(trimmed) || trimmed.startsWith("Filesystem"))
      continue;
    const row = parseDfLine(line);
    if (row && !EXCLUDED_FSTYPES.has(row.fstype)) rows.push(row);
  }
  return rows;
 }
 export async function compute(db, _peers) {
  const ts = Date.now();
  let mounts = [];
  try {
    const out = execSync(DF_CMD, {
      encoding: "utf-8",
      maxBuffer: 10 * 1024 * 1024,
    });
    mounts = parseDfOutput(out);
  } catch {
    mounts = [];
  }
  if (mounts.length > 0) {
    await db.insert(diskUsageMounts).values(
      mounts.map((m) => ({
        ts,
        device: m.device,
        mount: m.mount,
        fstype: m.fstype,
        totalBytes: m.totalBytes,
        usedBytes: m.usedBytes,
        availBytes: m.availBytes,
        usedPercent: m.usedPercent,
      })),
    );
  }
  return {
    ts,
    mounts: mounts.map((m) => ({
      device: m.device,
      mount: m.mount,
      fstype: m.fstype,
      totalBytes: m.totalBytes,
      usedBytes: m.usedBytes,
      availBytes: m.availBytes,
      usedPercent: m.usedPercent,
    })),
  };
 }
--- a/senses/disk-usage-mounts/migrations/0001_init.sql
+++ b/senses/disk-usage-mounts/migrations/0001_init.sql
@ -0,0 +1,14 @@
 -- Migration: 0001_init
 -- Creates the disk_usage_mounts table for disk-usage-mounts sense.
 CREATE TABLE IF NOT EXISTS disk_usage_mounts (
  id            INTEGER PRIMARY KEY AUTOINCREMENT,
  ts            INTEGER NOT NULL,
  device        TEXT    NOT NULL,
  mount         TEXT    NOT NULL,
  fstype        TEXT    NOT NULL,
  total_bytes   INTEGER NOT NULL,
  used_bytes    INTEGER NOT NULL,
  avail_bytes   INTEGER NOT NULL,
  used_percent  REAL    NOT NULL
 );
--- a/senses/disk-usage-mounts/schema.ts
+++ b/senses/disk-usage-mounts/schema.ts
@ -0,0 +1,13 @@
 import { integer, real, sqliteTable, text } from "drizzle-orm/sqlite-core";
 export const diskUsageMounts = sqliteTable("disk_usage_mounts", {
  id: integer("id").primaryKey({ autoIncrement: true }),
  ts: integer("ts").notNull(),
  device: text("device").notNull(),
  mount: text("mount").notNull(),
  fstype: text("fstype").notNull(),
  totalBytes: integer("total_bytes").notNull(),
  usedBytes: integer("used_bytes").notNull(),
  availBytes: integer("avail_bytes").notNull(),
  usedPercent: real("used_percent").notNull(),
 });
--- a/senses/linux-tcp-socket-stats/index.js
+++ b/senses/linux-tcp-socket-stats/index.js
@ -0,0 +1,120 @@
 import { readFile } from "node:fs/promises";
 import { linuxTcpSocketStats } from "./schema.ts";
 const SOCKSTAT_PATH = "/proc/net/sockstat";
 const RAW_MAX = 4096;
 function parseInt10(s) {
  const n = Number.parseInt(String(s), 10);
  return Number.isFinite(n) ? Math.trunc(n) : NaN;
 }
 function parseSockstat(content) {
  let socketsUsed = 0;
  let tcpInuse = 0;
  let tcpOrphan = 0;
  let tcpTw = 0;
  let tcpAlloc = 0;
  let tcpMemPages = 0;
  let parseOk = 1;
  let socketsOk = false;
  let tcpOk = false;
  for (const line of content.split("\n")) {
    const trimmed = line.trim();
    if (trimmed.startsWith("sockets:")) {
      const parts = trimmed.split(/\s+/);
      const usedIdx = parts.indexOf("used");
      if (usedIdx !== -1 && usedIdx + 1 < parts.length) {
        const v = parseInt10(parts[usedIdx + 1]);
        if (Number.isFinite(v)) {
          socketsUsed = v;
          socketsOk = true;
        }
      }
    } else if (trimmed.startsWith("TCP:")) {
      const parts = trimmed.split(/\s+/);
      const map = {};
      for (let i = 1; i + 1 < parts.length; i += 2) {
        map[parts[i]] = parseInt10(parts[i + 1]);
      }
      const keys = ["inuse", "orphan", "tw", "alloc", "mem"];
      if (keys.every((k) => Number.isFinite(map[k]))) {
        tcpInuse = map.inuse;
        tcpOrphan = map.orphan;
        tcpTw = map.tw;
        tcpAlloc = map.alloc;
        tcpMemPages = map.mem;
        tcpOk = true;
      }
    }
  }
  if (!socketsOk || !tcpOk) {
    parseOk = 0;
    socketsUsed = 0;
    tcpInuse = 0;
    tcpOrphan = 0;
    tcpTw = 0;
    tcpAlloc = 0;
    tcpMemPages = 0;
  }
  return {
    socketsUsed,
    tcpInuse,
    tcpOrphan,
    tcpTw,
    tcpAlloc,
    tcpMemPages,
    parseOk,
  };
 }
 export async function compute(db, _peers) {
  const ts = Date.now();
  let rawSockstat = "";
  let row = {
    socketsUsed: 0,
    tcpInuse: 0,
    tcpOrphan: 0,
    tcpTw: 0,
    tcpAlloc: 0,
    tcpMemPages: 0,
    parseOk: 0,
  };
  try {
    const content = await readFile(SOCKSTAT_PATH, "utf8");
    rawSockstat =
      content.length > RAW_MAX ? content.slice(0, RAW_MAX) : content;
    row = parseSockstat(content);
  } catch {
    rawSockstat = "";
  }
  await db.insert(linuxTcpSocketStats).values({
    ts,
    socketsUsed: row.socketsUsed,
    tcpInuse: row.tcpInuse,
    tcpOrphan: row.tcpOrphan,
    tcpTw: row.tcpTw,
    tcpAlloc: row.tcpAlloc,
    tcpMemPages: row.tcpMemPages,
    parseOk: row.parseOk,
    rawSockstat,
  });
  return {
    ts,
    socketsUsed: row.socketsUsed,
    tcpInuse: row.tcpInuse,
    tcpOrphan: row.tcpOrphan,
    tcpTw: row.tcpTw,
    tcpAlloc: row.tcpAlloc,
    tcpMemPages: row.tcpMemPages,
    parseOk: row.parseOk,
    rawSockstat,
  };
 }
--- a/senses/linux-tcp-socket-stats/migrations/0001_init.sql
+++ b/senses/linux-tcp-socket-stats/migrations/0001_init.sql
@ -0,0 +1,14 @@
 -- Migration: 0001_init
 -- Creates the linux_tcp_socket_stats table for linux-tcp-socket-stats sense.
 CREATE TABLE IF NOT EXISTS linux_tcp_socket_stats (
  ts               INTEGER PRIMARY KEY,
  sockets_used     INTEGER NOT NULL,
  tcp_inuse        INTEGER NOT NULL,
  tcp_orphan       INTEGER NOT NULL,
  tcp_tw           INTEGER NOT NULL,
  tcp_alloc        INTEGER NOT NULL,
  tcp_mem_pages    INTEGER NOT NULL,
  parse_ok         INTEGER NOT NULL,
  raw_sockstat     TEXT    NOT NULL
 );
--- a/senses/linux-tcp-socket-stats/schema.ts
+++ b/senses/linux-tcp-socket-stats/schema.ts
@ -0,0 +1,13 @@
 import { integer, sqliteTable, text } from "drizzle-orm/sqlite-core";
 export const linuxTcpSocketStats = sqliteTable("linux_tcp_socket_stats", {
  ts: integer("ts").primaryKey(),
  socketsUsed: integer("sockets_used").notNull(),
  tcpInuse: integer("tcp_inuse").notNull(),
  tcpOrphan: integer("tcp_orphan").notNull(),
  tcpTw: integer("tcp_tw").notNull(),
  tcpAlloc: integer("tcp_alloc").notNull(),
  tcpMemPages: integer("tcp_mem_pages").notNull(),
  parseOk: integer("parse_ok").notNull(),
  rawSockstat: text("raw_sockstat").notNull(),
 });
--- a/workflows/sense-generator/index.ts
+++ b/workflows/sense-generator/index.ts
@ -64,7 +64,7 @@ function cursorAgent(prompt: string, mode: "plan" | "ask" | "default", cwd: stri
 function hermesRun(prompt: string): string {
  const escaped = prompt.replace(/'/g, "'\\''");
  return run(
-    `hermes -q '${escaped}' --model anthropic/claude-sonnet-4 --no-memory 2>&1 || true`,
+    `hermes chat -q '${escaped}' --model anthropic/claude-sonnet-4 -t terminal --yolo 2>&1 || true`,
  );
 }
@ -281,17 +281,32 @@ Reply with either:
 - "FAIL: <error details>" if it doesn't`;
        const result = hermesRun(testPrompt);
-        ctx.log(`tester: result — ${result.substring(0, 200)}`);
+        ctx.log(`tester: raw result — ${result.substring(0, 300)}`);
-        const passed = result.toUpperCase().includes("PASS");
+        // Use LLM to judge pass/fail instead of fragile string matching
-        if (passed) {
+        const verdict = llmExtract<{ passed: boolean; reason: string }>(
-          return { type: "test_passed", senseName, result };
+          `Test output for sense "${senseName}":\n\n${result.substring(0, 4000)}`,
          "judge_test_result",
          "Determine whether the test passed or failed based on the output",
          {
            type: "object",
            properties: {
              passed: { type: "boolean", description: "true if the sense was successfully triggered and returned valid data" },
              reason: { type: "string", description: "Brief explanation of why it passed or failed" },
            },
            required: ["passed", "reason"],
          },
        );
        ctx.log(`tester: verdict — passed=${verdict.passed}, reason="${verdict.reason}"`);
        if (verdict.passed) {
          return { type: "test_passed", senseName, result: verdict.reason };
        }
        return {
          type: "test_failed",
          senseName,
-          reason: result,
+          reason: verdict.reason,
          attempt,
        };
      },