feat(cli,core): RFC-003 Phase 6 — Knowledge Layer + review fixes

Knowledge Layer: - knowledge.yaml parser in core (include/exclude globs) - Chunking: markdown (by heading), TypeScript/JS (by function/block) - knowledge.db: SQLite storage for chunks + embeddings (node:sqlite) - CLI: nerve knowledge sync, nerve knowledge query - Scoping: -r (specific repo), -g (global search), mutually exclusive - Repo registry (~/.nerve-knowledge-registry.json) for global search - Placeholder embedding (content hash) until remote service ready - Word-overlap similarity for query ranking Review fixes (from PR #241 feedback): - KNOWN_AGENT_ADAPTER_IDS: add cursor/hermes/codex + sync docs - collectWorkflowSpecAgentReferences: document regex comment false-positive - assertZodMetaSchemas: one-time compile-time validation utility Closes #240 Ref: #234
2026-04-29 05:39:00 +00:00
parent 3d89fc4a7a
commit 62434847c4
27 changed files with 956 additions and 3 deletions
@@ -23,6 +23,7 @@
    "@uncaged/nerve-core": "workspace:*",
    "@uncaged/nerve-store": "workspace:*",
    "citty": "^0.1.6",
+    "picomatch": "^4.0.2",
    "yaml": "^2.8.3"
  },
  "devDependencies": {
@@ -7,6 +7,7 @@ import { createCommand } from "./commands/create.js";
 import { daemonCommand } from "./commands/daemon.js";
 import { devCommand } from "./commands/dev.js";
 import { initCommand } from "./commands/init.js";
+import { knowledgeCommand } from "./commands/knowledge.js";
 import { remoteCommand } from "./commands/remote.js";
 import { senseCommand } from "./commands/sense.js";
 import { storeCommand } from "./commands/store.js";
@@ -46,6 +47,7 @@ const main = defineCommand({
    daemon: daemonCommand,
    dev: devCommand,
    validate: validateCommand,
+    knowledge: knowledgeCommand,
    sense: senseCommand,
    store: storeCommand,
    remote: remoteCommand,
@@ -78,6 +78,7 @@ const GITIGNORE = `data/
 logs/
 nerve.pid
 node_modules/
+knowledge.db
 `;

 const NERVE_SKILLS_MDC = `---
@@ -0,0 +1,79 @@
+import { existsSync } from "node:fs";
+import { resolve } from "node:path";
+
+import { KNOWLEDGE_DB } from "../knowledge/paths.js";
+import { queryKnowledgeGlobal, queryKnowledgeRepo } from "../knowledge/query.js";
+import { listRegisteredKnowledgeRoots } from "../knowledge/registry.js";
+import { findKnowledgeRepoRoot } from "../knowledge/repo-root.js";
+
+const DEFAULT_LIMIT = 10;
+
+export function parseKnowledgeQueryLimit(raw: string | undefined): number {
+  if (raw === undefined || raw.trim().length === 0) {
+    return DEFAULT_LIMIT;
+  }
+  const n = Number.parseInt(raw, 10);
+  return Number.isFinite(n) && n > 0 ? n : DEFAULT_LIMIT;
+}
+
+export function runKnowledgeQueryGlobal(queryText: string, limit: number): void {
+  const roots = listRegisteredKnowledgeRoots();
+  if (roots.length === 0) {
+    process.stderr.write(
+      "❌ No registered repos — run `nerve knowledge sync` in each repo first.\n",
+    );
+    process.exit(1);
+  }
+  const hits = queryKnowledgeGlobal(roots, KNOWLEDGE_DB, queryText, limit);
+  if (hits.length === 0) {
+    process.stdout.write("No results.\n");
+    return;
+  }
+  for (let i = 0; i < hits.length; i++) {
+    const h = hits[i];
+    if (h === undefined) continue;
+    const prefix = h.repoRoot !== null ? `[${h.repoRoot}] ` : "";
+    process.stdout.write(
+      `${String(i + 1)}. score=${h.score.toFixed(4)} ${prefix}${h.path} (${h.slug})\n${h.text}\n---\n`,
+    );
+  }
+}
+
+export function runKnowledgeQueryScoped(
+  repoFlag: string | undefined,
+  queryText: string,
+  limit: number,
+): void {
+  let repoRoot: string | null = null;
+  if (repoFlag !== undefined && String(repoFlag).trim().length > 0) {
+    repoRoot = resolve(String(repoFlag).trim());
+  } else {
+    repoRoot = findKnowledgeRepoRoot(process.cwd());
+  }
+
+  if (repoRoot === null) {
+    process.stderr.write("❌ No knowledge.yaml found — use -r <path> or run from a repo root.\n");
+    process.exit(1);
+  }
+
+  const dbPath = `${repoRoot}/${KNOWLEDGE_DB}`;
+  if (!existsSync(dbPath)) {
+    process.stderr.write(
+      `❌ No ${KNOWLEDGE_DB} in ${repoRoot} — run \`nerve knowledge sync\` first.\n`,
+    );
+    process.exit(1);
+  }
+
+  const hits = queryKnowledgeRepo(repoRoot, dbPath, queryText, limit);
+  if (hits.length === 0) {
+    process.stdout.write("No results.\n");
+    return;
+  }
+  for (let i = 0; i < hits.length; i++) {
+    const h = hits[i];
+    if (h === undefined) continue;
+    process.stdout.write(
+      `${String(i + 1)}. score=${h.score.toFixed(4)} ${h.path} (${h.slug})\n${h.text}\n---\n`,
+    );
+  }
+}
@@ -0,0 +1,93 @@
+import { defineCommand } from "citty";
+
+import { knowledgeQueryScopeConflictMessage } from "../knowledge/query-scope.js";
+import { findKnowledgeRepoRoot } from "../knowledge/repo-root.js";
+import { runKnowledgeSync } from "../knowledge/sync.js";
+import {
+  parseKnowledgeQueryLimit,
+  runKnowledgeQueryGlobal,
+  runKnowledgeQueryScoped,
+} from "./knowledge-query-run.js";
+
+const syncCommand = defineCommand({
+  meta: {
+    name: "sync",
+    description: "Chunk matching files from knowledge.yaml and rebuild knowledge.db",
+  },
+  async run() {
+    const repoRoot = findKnowledgeRepoRoot(process.cwd());
+    if (repoRoot === null) {
+      process.stderr.write(
+        "❌ No knowledge.yaml found — run from a repo that contains knowledge.yaml.\n",
+      );
+      process.exit(1);
+    }
+    try {
+      const result = runKnowledgeSync(repoRoot);
+      process.stdout.write(
+        `✅ Indexed ${String(result.filesIndexed)} file(s), ${String(result.chunksWritten)} chunk(s) → ${result.dbPath}\n`,
+      );
+    } catch (e) {
+      const msg = e instanceof Error ? e.message : String(e);
+      process.stderr.write(`❌ knowledge sync failed: ${msg}\n`);
+      process.exit(1);
+    }
+  },
+});
+
+const queryCommand = defineCommand({
+  meta: {
+    name: "query",
+    description: "Search indexed knowledge (word overlap placeholder until embeddings)",
+  },
+  args: {
+    query: {
+      type: "positional",
+      required: true,
+      description: "Search text",
+    },
+    r: {
+      type: "string",
+      description: "Use knowledge.db from another repo root",
+      required: false,
+    },
+    g: {
+      type: "boolean",
+      description: "Search across all repos registered via prior sync",
+      default: false,
+    },
+    limit: {
+      type: "string",
+      description: "Max hits (default 10)",
+      required: false,
+    },
+  },
+  async run({ args }) {
+    const conflict = knowledgeQueryScopeConflictMessage(args.r, args.g);
+    if (conflict !== null) {
+      process.stderr.write(`${conflict}\n`);
+      process.exit(1);
+    }
+
+    const queryText = args.query;
+    const limit = parseKnowledgeQueryLimit(args.limit);
+
+    if (args.g) {
+      runKnowledgeQueryGlobal(queryText, limit);
+      return;
+    }
+
+    runKnowledgeQueryScoped(args.r, queryText, limit);
+  },
+});
+
+export const knowledgeCommand = defineCommand({
+  meta: {
+    name: "knowledge",
+    description: "Project knowledge index (knowledge.yaml + knowledge.db, RFC-003)",
+  },
+  subCommands: {
+    sync: syncCommand,
+    query: queryCommand,
+  },
+});
@@ -0,0 +1,88 @@
+const HEADING_RE = /^(#{1,6})\s+(.+)$/;
+
+export type MarkdownChunk = {
+  slug: string;
+  text: string;
+};
+
+function slugPart(title: string): string {
+  const t = title.trim().toLowerCase().replace(/\s+/g, "-");
+  const safe = t.replace(/[^a-z0-9_-]+/g, "");
+  return safe.length > 0 ? safe : "section";
+}
+
+function splitLargeMarkdownChunk(slugBase: string, text: string): MarkdownChunk[] {
+  const maxParagraphs = 24;
+  const paragraphs = text.split(/\n\s*\n/).filter((p) => p.trim().length > 0);
+  if (paragraphs.length <= maxParagraphs) {
+    return [{ slug: slugBase, text }];
+  }
+  const chunks: MarkdownChunk[] = [];
+  let part = 0;
+  for (let i = 0; i < paragraphs.length; i += maxParagraphs) {
+    const slice = paragraphs.slice(i, i + maxParagraphs).join("\n\n");
+    chunks.push({ slug: `${slugBase}-part${String(part)}`, text: slice });
+    part += 1;
+  }
+  return chunks;
+}
+
+function headingLineIndices(lines: string[]): number[] {
+  const headingIdx: number[] = [];
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    if (line !== undefined && HEADING_RE.test(line)) {
+      headingIdx.push(i);
+    }
+  }
+  return headingIdx;
+}
+
+function chunksFromHeadings(
+  lines: string[],
+  headingIdx: number[],
+  baseSlug: string,
+): MarkdownChunk[] {
+  const chunks: MarkdownChunk[] = [];
+  const firstHead = headingIdx[0] ?? 0;
+  if (firstHead > 0) {
+    const preamble = lines.slice(0, firstHead).join("\n").trim();
+    if (preamble.length > 0) {
+      chunks.push(...splitLargeMarkdownChunk(`${baseSlug}#preamble`, preamble));
+    }
+  }
+
+  for (let h = 0; h < headingIdx.length; h++) {
+    const start = headingIdx[h] ?? 0;
+    const end = h + 1 < headingIdx.length ? (headingIdx[h + 1] ?? lines.length) : lines.length;
+    const block = lines.slice(start, end).join("\n").trim();
+    if (block.length === 0) {
+      continue;
+    }
+    const titleLine = lines[start] ?? "";
+    const ht = HEADING_RE.exec(titleLine);
+    const suffix = ht !== null ? slugPart(ht[2] ?? "h") : `h${String(h)}`;
+    chunks.push(...splitLargeMarkdownChunk(`${baseSlug}#${suffix}-${String(h)}`, block));
+  }
+  return chunks;
+}
+
+/**
+ * Split Markdown by headings; long sections are split further by blank-line paragraphs.
+ */
+export function chunkMarkdown(relativePath: string, source: string): MarkdownChunk[] {
+  const lines = source.split(/\r?\n/);
+  const headingIdx = headingLineIndices(lines);
+  const baseSlug = relativePath.replace(/\//g, "-");
+
+  if (headingIdx.length === 0) {
+    const text = source.trim();
+    if (text.length === 0) {
+      return [];
+    }
+    return splitLargeMarkdownChunk(`${baseSlug}#doc`, text);
+  }
+
+  const chunks = chunksFromHeadings(lines, headingIdx, baseSlug);
+  return chunks;
+}
@@ -0,0 +1,87 @@
+export type TsJsChunk = {
+  slug: string;
+  text: string;
+};
+
+/**
+ * Line starts a function-like declaration (heuristic, no full TS parse).
+ */
+function isFunctionStartLine(line: string): boolean {
+  const t = line.trimStart();
+  if (/^(export\s+)?declare\s+/.test(t)) {
+    return false;
+  }
+  if (/^(export\s+)?(async\s+)?function\s+[A-Za-z_$][\w$]*\s*\(/.test(t)) {
+    return true;
+  }
+  if (/^(export\s+)?const\s+[A-Za-z_$][\w$]*\s*=\s*(async\s*)?\(/.test(t)) {
+    return true;
+  }
+  if (/^(export\s+)?const\s+[A-Za-z_$][\w$]*\s*=\s*async\s+function/.test(t)) {
+    return true;
+  }
+  return false;
+}
+
+function slugPart(name: string): string {
+  const safe = name.replace(/[^\w$-]+/g, "-").toLowerCase();
+  return safe.length > 0 ? safe : "block";
+}
+
+function extractRoughName(firstLine: string): string {
+  const m =
+    /function\s+([A-Za-z_$][\w$]*)/.exec(firstLine) ?? /const\s+([A-Za-z_$][\w$]*)/.exec(firstLine);
+  return m !== null && m[1] !== undefined ? m[1] : "fn";
+}
+
+/**
+ * Split `.ts` / `.js` by top-level function-like lines; falls back to paragraph chunks.
+ */
+export function chunkTypeScriptOrJavaScript(relativePath: string, source: string): TsJsChunk[] {
+  const baseSlug = relativePath.replace(/\./g, "-").replace(/\//g, "-");
+  const lines = source.split(/\r?\n/);
+  const starts: number[] = [];
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    if (line !== undefined && isFunctionStartLine(line)) {
+      starts.push(i);
+    }
+  }
+
+  if (starts.length === 0) {
+    return paragraphFallbackChunks(baseSlug, source);
+  }
+
+  const chunks: TsJsChunk[] = [];
+  for (let s = 0; s < starts.length; s++) {
+    const start = starts[s] ?? 0;
+    const end = s + 1 < starts.length ? (starts[s + 1] ?? lines.length) : lines.length;
+    const block = lines.slice(start, end).join("\n").trim();
+    if (block.length === 0) {
+      continue;
+    }
+    const first = lines[start] ?? "";
+    const name = extractRoughName(first);
+    chunks.push({
+      slug: `${baseSlug}#${slugPart(name)}-${String(s)}`,
+      text: block,
+    });
+  }
+
+  return chunks.length > 0 ? chunks : paragraphFallbackChunks(baseSlug, source);
+}
+
+function paragraphFallbackChunks(baseSlug: string, source: string): TsJsChunk[] {
+  const text = source.trim();
+  if (text.length === 0) {
+    return [];
+  }
+  const parts = text.split(/\n\s*\n/).filter((p) => p.trim().length > 0);
+  if (parts.length === 0) {
+    return [{ slug: `${baseSlug}#0`, text }];
+  }
+  return parts.map((p, i) => ({
+    slug: `${baseSlug}#para-${String(i)}`,
+    text: p.trim(),
+  }));
+}
@@ -0,0 +1,23 @@
+import { chunkMarkdown } from "./chunk-markdown.js";
+import { chunkTypeScriptOrJavaScript } from "./chunk-typescript.js";
+
+export type KnowledgeChunk = {
+  slug: string;
+  text: string;
+};
+
+export function chunkKnowledgeFile(relativePath: string, source: string): KnowledgeChunk[] {
+  const lower = relativePath.toLowerCase();
+  if (lower.endsWith(".md")) {
+    return chunkMarkdown(relativePath, source);
+  }
+  if (
+    lower.endsWith(".ts") ||
+    lower.endsWith(".tsx") ||
+    lower.endsWith(".js") ||
+    lower.endsWith(".jsx")
+  ) {
+    return chunkTypeScriptOrJavaScript(relativePath, source);
+  }
+  return [{ slug: `${relativePath.replace(/\//g, "-")}#0`, text: source.trim() }];
+}
@@ -0,0 +1,19 @@
+import picomatch from "picomatch";
+
+const PICOMATCH_OPTS = { dot: true } as const;
+
+/**
+ * True if `relativePosixPath` matches any exclude glob (POSIX slashes).
+ */
+export function matchesKnowledgeExclude(
+  relativePosixPath: string,
+  excludePatterns: ReadonlyArray<string>,
+): boolean {
+  for (const pattern of excludePatterns) {
+    const isMatch = picomatch(pattern, PICOMATCH_OPTS);
+    if (isMatch(relativePosixPath)) {
+      return true;
+    }
+  }
+  return false;
+}
@@ -0,0 +1,7 @@
+import { createHash } from "node:crypto";
+
+/** Deterministic placeholder embedding bytes until a remote embedding service exists (RFC-003). */
+export function fakeEmbeddingBytes(text: string): Buffer {
+  const hash = createHash("sha256").update(text, "utf8").digest();
+  return Buffer.concat([hash, hash, hash, hash]);
+}
@@ -0,0 +1,42 @@
+import { globSync, statSync } from "node:fs";
+import { join } from "node:path";
+
+import type { KnowledgeConfig } from "@uncaged/nerve-core";
+
+import { matchesKnowledgeExclude } from "./exclude-match.js";
+
+function toPosix(rel: string): string {
+  return rel.split("\\").join("/");
+}
+
+function isFileUnderRoot(repoRoot: string, rel: string): boolean {
+  try {
+    return statSync(join(repoRoot, rel)).isFile();
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Files matched by `include` globs minus `exclude` globs, relative POSIX paths, sorted.
+ */
+export function listKnowledgeFiles(repoRoot: string, config: KnowledgeConfig): string[] {
+  const matched = new Set<string>();
+  for (const pattern of config.include) {
+    const paths = globSync(pattern, {
+      cwd: repoRoot,
+      windowsPathsNoEscape: true,
+    });
+    for (const rel of paths) {
+      const posix = toPosix(rel);
+      if (!isFileUnderRoot(repoRoot, posix)) {
+        continue;
+      }
+      if (matchesKnowledgeExclude(posix, config.exclude)) {
+        continue;
+      }
+      matched.add(posix);
+    }
+  }
+  return [...matched].sort();
+}
@@ -0,0 +1,96 @@
+import { createHash } from "node:crypto";
+import { DatabaseSync } from "node:sqlite";
+
+import { fakeEmbeddingBytes } from "./fake-embedding.js";
+
+export type KnowledgeChunkRow = {
+  path: string;
+  slug: string;
+  chunkIndex: number;
+  text: string;
+  embedding: Buffer;
+  contentHash: string;
+};
+
+export type KnowledgeChunkInsert = {
+  path: string;
+  slug: string;
+  chunkIndex: number;
+  text: string;
+  contentHash: string;
+};
+
+const SCHEMA = `
+CREATE TABLE IF NOT EXISTS chunks (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  path TEXT NOT NULL,
+  chunk_index INTEGER NOT NULL,
+  slug TEXT NOT NULL,
+  text TEXT NOT NULL,
+  embedding BLOB NOT NULL,
+  content_hash TEXT NOT NULL,
+  UNIQUE(path, chunk_index)
+);
+
+CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);
+`;
+
+export function openKnowledgeDb(dbPath: string): DatabaseSync {
+  const db = new DatabaseSync(dbPath);
+  db.exec(SCHEMA);
+  return db;
+}
+
+export function contentHash(text: string): string {
+  return createHash("sha256").update(text, "utf8").digest("hex");
+}
+
+export function replaceAllChunks(db: DatabaseSync, rows: KnowledgeChunkInsert[]): void {
+  db.exec("BEGIN IMMEDIATE");
+  try {
+    db.prepare("DELETE FROM chunks").run();
+    const insert = db.prepare(
+      `INSERT INTO chunks (path, chunk_index, slug, text, embedding, content_hash)
+       VALUES (@path, @chunk_index, @slug, @text, @embedding, @content_hash)`,
+    );
+    for (let i = 0; i < rows.length; i++) {
+      const row = rows[i];
+      if (row === undefined) continue;
+      const emb = fakeEmbeddingBytes(row.text);
+      insert.run({
+        path: row.path,
+        chunk_index: row.chunkIndex,
+        slug: row.slug,
+        text: row.text,
+        embedding: emb,
+        content_hash: row.contentHash,
+      });
+    }
+    db.exec("COMMIT");
+  } catch (e) {
+    db.exec("ROLLBACK");
+    throw e;
+  }
+}
+
+export function loadAllChunks(db: DatabaseSync): KnowledgeChunkRow[] {
+  const stmt = db.prepare(
+    "SELECT path, chunk_index, slug, text, embedding, content_hash FROM chunks ORDER BY path, chunk_index",
+  );
+  const rows = stmt.all() as Array<{
+    path: string;
+    chunk_index: number;
+    slug: string;
+    text: string;
+    embedding: Buffer;
+    content_hash: string;
+  }>;
+  return rows.map((r) => ({
+    path: r.path,
+    slug: r.slug,
+    chunkIndex: r.chunk_index,
+    text: r.text,
+    embedding: r.embedding,
+    contentHash: r.content_hash,
+  }));
+}
@@ -0,0 +1,2 @@
+export const KNOWLEDGE_YAML = "knowledge.yaml";
+export const KNOWLEDGE_DB = "knowledge.db";
@@ -0,0 +1,13 @@
+/**
+ * `-r` and `-g` are mutually exclusive for `nerve knowledge query`.
+ */
+export function knowledgeQueryScopeConflictMessage(
+  repoFlag: string | null | undefined,
+  globalFlag: boolean,
+): string | null {
+  const hasR = repoFlag !== undefined && repoFlag !== null && String(repoFlag).trim().length > 0;
+  if (hasR && globalFlag) {
+    return "❌ Use either -r <path> or -g, not both.";
+  }
+  return null;
+}
@@ -0,0 +1,82 @@
+import { existsSync } from "node:fs";
+import { join } from "node:path";
+
+import type { KnowledgeChunkRow } from "./knowledge-db.js";
+import { loadAllChunks, openKnowledgeDb } from "./knowledge-db.js";
+import { wordOverlapScore } from "./word-overlap.js";
+
+export type KnowledgeQueryHit = {
+  repoRoot: string | null;
+  path: string;
+  slug: string;
+  text: string;
+  score: number;
+};
+
+export function rankChunksByWordOverlap(
+  query: string,
+  chunks: KnowledgeChunkRow[],
+  limit: number,
+): Array<{ chunk: KnowledgeChunkRow; score: number }> {
+  const scored = chunks.map((chunk) => ({
+    chunk,
+    score: wordOverlapScore(query, `${chunk.text}\n${chunk.path}`),
+  }));
+  scored.sort((a, b) => b.score - a.score);
+  return scored.slice(0, limit);
+}
+
+export function queryKnowledgeRepo(
+  repoRoot: string,
+  dbPath: string,
+  queryText: string,
+  limit: number,
+): KnowledgeQueryHit[] {
+  const db = openKnowledgeDb(dbPath);
+  try {
+    const rows = loadAllChunks(db);
+    const ranked = rankChunksByWordOverlap(queryText, rows, limit);
+    return ranked.map((r) => ({
+      repoRoot,
+      path: r.chunk.path,
+      slug: r.chunk.slug,
+      text: r.chunk.text,
+      score: r.score,
+    }));
+  } finally {
+    db.close();
+  }
+}
+
+export function queryKnowledgeGlobal(
+  repoRoots: ReadonlyArray<string>,
+  dbFileName: string,
+  queryText: string,
+  limit: number,
+): KnowledgeQueryHit[] {
+  const combined: KnowledgeQueryHit[] = [];
+  for (const root of repoRoots) {
+    const dbPath = join(root, dbFileName);
+    if (!existsSync(dbPath)) {
+      continue;
+    }
+    const db = openKnowledgeDb(dbPath);
+    try {
+      const rows = loadAllChunks(db);
+      const ranked = rankChunksByWordOverlap(queryText, rows, limit);
+      for (const r of ranked) {
+        combined.push({
+          repoRoot: root,
+          path: r.chunk.path,
+          slug: r.chunk.slug,
+          text: r.chunk.text,
+          score: r.score,
+        });
+      }
+    } finally {
+      db.close();
+    }
+  }
+  combined.sort((a, b) => b.score - a.score);
+  return combined.slice(0, limit);
+}
@@ -0,0 +1,55 @@
+import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { dirname } from "node:path";
+
+import { getNerveRoot } from "../workspace.js";
+
+export type KnowledgeRepoRegistry = {
+  roots: ReadonlyArray<string>;
+};
+
+const FILE_NAME = "knowledge-repos.json";
+
+export function getKnowledgeRegistryPath(): string {
+  return `${getNerveRoot()}/data/${FILE_NAME}`;
+}
+
+function defaultRegistry(): KnowledgeRepoRegistry {
+  return { roots: [] };
+}
+
+export function readKnowledgeRegistry(): KnowledgeRepoRegistry {
+  const path = getKnowledgeRegistryPath();
+  try {
+    const raw = readFileSync(path, "utf8");
+    const parsed: unknown = JSON.parse(raw);
+    if (
+      typeof parsed === "object" &&
+      parsed !== null &&
+      "roots" in parsed &&
+      Array.isArray(parsed.roots)
+    ) {
+      const roots = parsed.roots.filter((x): x is string => typeof x === "string");
+      return { roots: [...new Set(roots)].sort() };
+    }
+  } catch {
+    // missing or invalid — treat as empty
+  }
+  return defaultRegistry();
+}
+
+export function registerKnowledgeRepoRoot(repoRootAbsolute: string): void {
+  const resolved = repoRootAbsolute.trim();
+  if (resolved.length === 0) {
+    return;
+  }
+  const prev = readKnowledgeRegistry();
+  const nextRoots = [...new Set([...prev.roots, resolved])].sort();
+  const next: KnowledgeRepoRegistry = { roots: nextRoots };
+  const path = getKnowledgeRegistryPath();
+  mkdirSync(dirname(path), { recursive: true });
+  writeFileSync(path, `${JSON.stringify(next, null, 2)}\n`, "utf8");
+}
+
+export function listRegisteredKnowledgeRoots(): string[] {
+  return [...readKnowledgeRegistry().roots];
+}
@@ -0,0 +1,21 @@
+import { existsSync } from "node:fs";
+import { dirname, join, resolve } from "node:path";
+
+import { KNOWLEDGE_YAML } from "./paths.js";
+
+/**
+ * Walk upward from `startDir` until `knowledge.yaml` exists.
+ */
+export function findKnowledgeRepoRoot(startDir: string): string | null {
+  let dir = resolve(startDir);
+  while (true) {
+    if (existsSync(join(dir, KNOWLEDGE_YAML))) {
+      return dir;
+    }
+    const parent = dirname(dir);
+    if (parent === dir) {
+      return null;
+    }
+    dir = parent;
+  }
+}
@@ -0,0 +1,73 @@
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+import { type KnowledgeConfig, parseKnowledgeYaml } from "@uncaged/nerve-core";
+
+import { chunkKnowledgeFile } from "./chunk.js";
+import { listKnowledgeFiles } from "./glob-files.js";
+import { contentHash, openKnowledgeDb, replaceAllChunks } from "./knowledge-db.js";
+import { KNOWLEDGE_DB, KNOWLEDGE_YAML } from "./paths.js";
+import { registerKnowledgeRepoRoot } from "./registry.js";
+
+export type KnowledgeSyncResult = {
+  repoRoot: string;
+  dbPath: string;
+  filesIndexed: number;
+  chunksWritten: number;
+};
+
+function loadConfig(repoRoot: string): KnowledgeConfig {
+  const raw = readFileSync(join(repoRoot, KNOWLEDGE_YAML), "utf8");
+  const parsed = parseKnowledgeYaml(raw);
+  if (!parsed.ok) {
+    throw parsed.error;
+  }
+  return parsed.value;
+}
+
+export function runKnowledgeSync(repoRoot: string): KnowledgeSyncResult {
+  const config = loadConfig(repoRoot);
+  const relFiles = listKnowledgeFiles(repoRoot, config);
+  const inserts: Array<{
+    path: string;
+    slug: string;
+    chunkIndex: number;
+    text: string;
+    contentHash: string;
+  }> = [];
+
+  for (const rel of relFiles) {
+    const abs = join(repoRoot, rel);
+    const source = readFileSync(abs, "utf8");
+    const chunks = chunkKnowledgeFile(rel, source);
+    for (let i = 0; i < chunks.length; i++) {
+      const ch = chunks[i];
+      if (ch === undefined) continue;
+      const text = ch.text;
+      inserts.push({
+        path: rel,
+        slug: ch.slug,
+        chunkIndex: i,
+        text,
+        contentHash: contentHash(text),
+      });
+    }
+  }
+
+  const dbPath = join(repoRoot, KNOWLEDGE_DB);
+  const db = openKnowledgeDb(dbPath);
+  try {
+    replaceAllChunks(db, inserts);
+  } finally {
+    db.close();
+  }
+
+  registerKnowledgeRepoRoot(repoRoot);
+
+  return {
+    repoRoot,
+    dbPath,
+    filesIndexed: relFiles.length,
+    chunksWritten: inserts.length,
+  };
+}
@@ -0,0 +1,26 @@
+function tokenize(s: string): Set<string> {
+  const parts = s
+    .toLowerCase()
+    .split(/[^\w]+/)
+    .filter((x) => x.length > 0);
+  return new Set(parts);
+}
+
+/**
+ * Jaccard-like score over word sets (placeholder until real embeddings; RFC-003).
+ */
+export function wordOverlapScore(query: string, document: string): number {
+  const q = tokenize(query);
+  const d = tokenize(document);
+  if (q.size === 0) {
+    return 0;
+  }
+  let inter = 0;
+  for (const w of q) {
+    if (d.has(w)) {
+      inter += 1;
+    }
+  }
+  const union = q.size + d.size - inter;
+  return union === 0 ? 0 : inter / union;
+}
@@ -8,7 +8,12 @@ import { join } from "node:path";
 import type { NerveConfig } from "@uncaged/nerve-core";
 import { KNOWN_AGENT_ADAPTER_IDS } from "@uncaged/nerve-core";

-/** Matches RoleSpec `agent: "name"` / `agent: 'name'` in workflow TypeScript sources. */
+/**
+ * Matches RoleSpec `agent: "name"` / `agent: 'name'` in workflow TypeScript sources.
+ * NOTE: This regex can match occurrences inside comments. For current usage (validation
+ * hint) this is acceptable — false positives just trigger a "missing agent" warning that
+ * the user can ignore. If precision becomes important, switch to AST-based extraction.
+ */
 const WORKFLOW_SPEC_AGENT_PATTERN = /agent:\s*["']([^"']+)["']/g;

 function collectTsSourceFiles(dir: string, acc: string[]): void {