From 62434847c4068fc88a0853352286efe14e073c29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Wed, 29 Apr 2026 05:39:00 +0000 Subject: [PATCH 1/2] =?UTF-8?q?feat(cli,core):=20RFC-003=20Phase=206=20?= =?UTF-8?q?=E2=80=94=20Knowledge=20Layer=20+=20review=20fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Knowledge Layer: - knowledge.yaml parser in core (include/exclude globs) - Chunking: markdown (by heading), TypeScript/JS (by function/block) - knowledge.db: SQLite storage for chunks + embeddings (node:sqlite) - CLI: nerve knowledge sync, nerve knowledge query - Scoping: -r (specific repo), -g (global search), mutually exclusive - Repo registry (~/.nerve-knowledge-registry.json) for global search - Placeholder embedding (content hash) until remote service ready - Word-overlap similarity for query ranking Review fixes (from PR #241 feedback): - KNOWN_AGENT_ADAPTER_IDS: add cursor/hermes/codex + sync docs - collectWorkflowSpecAgentReferences: document regex comment false-positive - assertZodMetaSchemas: one-time compile-time validation utility Closes #240 Ref: #234 --- packages/cli/package.json | 1 + packages/cli/src/cli.ts | 2 + packages/cli/src/commands/init.ts | 1 + .../cli/src/commands/knowledge-query-run.ts | 79 +++++++++++++++ packages/cli/src/commands/knowledge.ts | 93 ++++++++++++++++++ packages/cli/src/knowledge/chunk-markdown.ts | 88 +++++++++++++++++ .../cli/src/knowledge/chunk-typescript.ts | 87 +++++++++++++++++ packages/cli/src/knowledge/chunk.ts | 23 +++++ packages/cli/src/knowledge/exclude-match.ts | 19 ++++ packages/cli/src/knowledge/fake-embedding.ts | 7 ++ packages/cli/src/knowledge/glob-files.ts | 42 ++++++++ packages/cli/src/knowledge/knowledge-db.ts | 96 +++++++++++++++++++ packages/cli/src/knowledge/paths.ts | 2 + packages/cli/src/knowledge/query-scope.ts | 13 +++ packages/cli/src/knowledge/query.ts | 82 ++++++++++++++++ packages/cli/src/knowledge/registry.ts | 55 +++++++++++ packages/cli/src/knowledge/repo-root.ts | 21 ++++ packages/cli/src/knowledge/sync.ts | 73 ++++++++++++++ packages/cli/src/knowledge/word-overlap.ts | 26 +++++ packages/cli/src/workflow-agent-validation.ts | 7 +- .../src/__tests__/knowledge-config.test.ts | 49 ++++++++++ packages/core/src/agent-adapter-ids.ts | 6 +- packages/core/src/index.ts | 2 + packages/core/src/knowledge-config.ts | 64 +++++++++++++ packages/workflow-utils/src/index.ts | 1 + .../workflow-utils/src/shared/extract-fn.ts | 15 +++ pnpm-lock.yaml | 5 + 27 files changed, 956 insertions(+), 3 deletions(-) create mode 100644 packages/cli/src/commands/knowledge-query-run.ts create mode 100644 packages/cli/src/commands/knowledge.ts create mode 100644 packages/cli/src/knowledge/chunk-markdown.ts create mode 100644 packages/cli/src/knowledge/chunk-typescript.ts create mode 100644 packages/cli/src/knowledge/chunk.ts create mode 100644 packages/cli/src/knowledge/exclude-match.ts create mode 100644 packages/cli/src/knowledge/fake-embedding.ts create mode 100644 packages/cli/src/knowledge/glob-files.ts create mode 100644 packages/cli/src/knowledge/knowledge-db.ts create mode 100644 packages/cli/src/knowledge/paths.ts create mode 100644 packages/cli/src/knowledge/query-scope.ts create mode 100644 packages/cli/src/knowledge/query.ts create mode 100644 packages/cli/src/knowledge/registry.ts create mode 100644 packages/cli/src/knowledge/repo-root.ts create mode 100644 packages/cli/src/knowledge/sync.ts create mode 100644 packages/cli/src/knowledge/word-overlap.ts create mode 100644 packages/core/src/__tests__/knowledge-config.test.ts create mode 100644 packages/core/src/knowledge-config.ts diff --git a/packages/cli/package.json b/packages/cli/package.json index cfb2d70..eddf0f1 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -23,6 +23,7 @@ "@uncaged/nerve-core": "workspace:*", "@uncaged/nerve-store": "workspace:*", "citty": "^0.1.6", + "picomatch": "^4.0.2", "yaml": "^2.8.3" }, "devDependencies": { diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index a599dd7..603c03a 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -7,6 +7,7 @@ import { createCommand } from "./commands/create.js"; import { daemonCommand } from "./commands/daemon.js"; import { devCommand } from "./commands/dev.js"; import { initCommand } from "./commands/init.js"; +import { knowledgeCommand } from "./commands/knowledge.js"; import { remoteCommand } from "./commands/remote.js"; import { senseCommand } from "./commands/sense.js"; import { storeCommand } from "./commands/store.js"; @@ -46,6 +47,7 @@ const main = defineCommand({ daemon: daemonCommand, dev: devCommand, validate: validateCommand, + knowledge: knowledgeCommand, sense: senseCommand, store: storeCommand, remote: remoteCommand, diff --git a/packages/cli/src/commands/init.ts b/packages/cli/src/commands/init.ts index d79f511..e77caf3 100644 --- a/packages/cli/src/commands/init.ts +++ b/packages/cli/src/commands/init.ts @@ -78,6 +78,7 @@ const GITIGNORE = `data/ logs/ nerve.pid node_modules/ +knowledge.db `; const NERVE_SKILLS_MDC = `--- diff --git a/packages/cli/src/commands/knowledge-query-run.ts b/packages/cli/src/commands/knowledge-query-run.ts new file mode 100644 index 0000000..fe11b8a --- /dev/null +++ b/packages/cli/src/commands/knowledge-query-run.ts @@ -0,0 +1,79 @@ +import { existsSync } from "node:fs"; +import { resolve } from "node:path"; + +import { KNOWLEDGE_DB } from "../knowledge/paths.js"; +import { queryKnowledgeGlobal, queryKnowledgeRepo } from "../knowledge/query.js"; +import { listRegisteredKnowledgeRoots } from "../knowledge/registry.js"; +import { findKnowledgeRepoRoot } from "../knowledge/repo-root.js"; + +const DEFAULT_LIMIT = 10; + +export function parseKnowledgeQueryLimit(raw: string | undefined): number { + if (raw === undefined || raw.trim().length === 0) { + return DEFAULT_LIMIT; + } + const n = Number.parseInt(raw, 10); + return Number.isFinite(n) && n > 0 ? n : DEFAULT_LIMIT; +} + +export function runKnowledgeQueryGlobal(queryText: string, limit: number): void { + const roots = listRegisteredKnowledgeRoots(); + if (roots.length === 0) { + process.stderr.write( + "❌ No registered repos — run `nerve knowledge sync` in each repo first.\n", + ); + process.exit(1); + } + const hits = queryKnowledgeGlobal(roots, KNOWLEDGE_DB, queryText, limit); + if (hits.length === 0) { + process.stdout.write("No results.\n"); + return; + } + for (let i = 0; i < hits.length; i++) { + const h = hits[i]; + if (h === undefined) continue; + const prefix = h.repoRoot !== null ? `[${h.repoRoot}] ` : ""; + process.stdout.write( + `${String(i + 1)}. score=${h.score.toFixed(4)} ${prefix}${h.path} (${h.slug})\n${h.text}\n---\n`, + ); + } +} + +export function runKnowledgeQueryScoped( + repoFlag: string | undefined, + queryText: string, + limit: number, +): void { + let repoRoot: string | null = null; + if (repoFlag !== undefined && String(repoFlag).trim().length > 0) { + repoRoot = resolve(String(repoFlag).trim()); + } else { + repoRoot = findKnowledgeRepoRoot(process.cwd()); + } + + if (repoRoot === null) { + process.stderr.write("❌ No knowledge.yaml found — use -r or run from a repo root.\n"); + process.exit(1); + } + + const dbPath = `${repoRoot}/${KNOWLEDGE_DB}`; + if (!existsSync(dbPath)) { + process.stderr.write( + `❌ No ${KNOWLEDGE_DB} in ${repoRoot} — run \`nerve knowledge sync\` first.\n`, + ); + process.exit(1); + } + + const hits = queryKnowledgeRepo(repoRoot, dbPath, queryText, limit); + if (hits.length === 0) { + process.stdout.write("No results.\n"); + return; + } + for (let i = 0; i < hits.length; i++) { + const h = hits[i]; + if (h === undefined) continue; + process.stdout.write( + `${String(i + 1)}. score=${h.score.toFixed(4)} ${h.path} (${h.slug})\n${h.text}\n---\n`, + ); + } +} diff --git a/packages/cli/src/commands/knowledge.ts b/packages/cli/src/commands/knowledge.ts new file mode 100644 index 0000000..d42bef5 --- /dev/null +++ b/packages/cli/src/commands/knowledge.ts @@ -0,0 +1,93 @@ +import { defineCommand } from "citty"; + +import { knowledgeQueryScopeConflictMessage } from "../knowledge/query-scope.js"; +import { findKnowledgeRepoRoot } from "../knowledge/repo-root.js"; +import { runKnowledgeSync } from "../knowledge/sync.js"; +import { + parseKnowledgeQueryLimit, + runKnowledgeQueryGlobal, + runKnowledgeQueryScoped, +} from "./knowledge-query-run.js"; + +const syncCommand = defineCommand({ + meta: { + name: "sync", + description: "Chunk matching files from knowledge.yaml and rebuild knowledge.db", + }, + async run() { + const repoRoot = findKnowledgeRepoRoot(process.cwd()); + if (repoRoot === null) { + process.stderr.write( + "❌ No knowledge.yaml found — run from a repo that contains knowledge.yaml.\n", + ); + process.exit(1); + } + try { + const result = runKnowledgeSync(repoRoot); + process.stdout.write( + `✅ Indexed ${String(result.filesIndexed)} file(s), ${String(result.chunksWritten)} chunk(s) → ${result.dbPath}\n`, + ); + } catch (e) { + const msg = e instanceof Error ? e.message : String(e); + process.stderr.write(`❌ knowledge sync failed: ${msg}\n`); + process.exit(1); + } + }, +}); + +const queryCommand = defineCommand({ + meta: { + name: "query", + description: "Search indexed knowledge (word overlap placeholder until embeddings)", + }, + args: { + query: { + type: "positional", + required: true, + description: "Search text", + }, + r: { + type: "string", + description: "Use knowledge.db from another repo root", + required: false, + }, + g: { + type: "boolean", + description: "Search across all repos registered via prior sync", + default: false, + }, + limit: { + type: "string", + description: "Max hits (default 10)", + required: false, + }, + }, + async run({ args }) { + const conflict = knowledgeQueryScopeConflictMessage(args.r, args.g); + if (conflict !== null) { + process.stderr.write(`${conflict}\n`); + process.exit(1); + } + + const queryText = args.query; + const limit = parseKnowledgeQueryLimit(args.limit); + + if (args.g) { + runKnowledgeQueryGlobal(queryText, limit); + return; + } + + runKnowledgeQueryScoped(args.r, queryText, limit); + }, +}); + +export const knowledgeCommand = defineCommand({ + meta: { + name: "knowledge", + description: "Project knowledge index (knowledge.yaml + knowledge.db, RFC-003)", + }, + subCommands: { + sync: syncCommand, + query: queryCommand, + }, +}); diff --git a/packages/cli/src/knowledge/chunk-markdown.ts b/packages/cli/src/knowledge/chunk-markdown.ts new file mode 100644 index 0000000..cd826a4 --- /dev/null +++ b/packages/cli/src/knowledge/chunk-markdown.ts @@ -0,0 +1,88 @@ +const HEADING_RE = /^(#{1,6})\s+(.+)$/; + +export type MarkdownChunk = { + slug: string; + text: string; +}; + +function slugPart(title: string): string { + const t = title.trim().toLowerCase().replace(/\s+/g, "-"); + const safe = t.replace(/[^a-z0-9_-]+/g, ""); + return safe.length > 0 ? safe : "section"; +} + +function splitLargeMarkdownChunk(slugBase: string, text: string): MarkdownChunk[] { + const maxParagraphs = 24; + const paragraphs = text.split(/\n\s*\n/).filter((p) => p.trim().length > 0); + if (paragraphs.length <= maxParagraphs) { + return [{ slug: slugBase, text }]; + } + const chunks: MarkdownChunk[] = []; + let part = 0; + for (let i = 0; i < paragraphs.length; i += maxParagraphs) { + const slice = paragraphs.slice(i, i + maxParagraphs).join("\n\n"); + chunks.push({ slug: `${slugBase}-part${String(part)}`, text: slice }); + part += 1; + } + return chunks; +} + +function headingLineIndices(lines: string[]): number[] { + const headingIdx: number[] = []; + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (line !== undefined && HEADING_RE.test(line)) { + headingIdx.push(i); + } + } + return headingIdx; +} + +function chunksFromHeadings( + lines: string[], + headingIdx: number[], + baseSlug: string, +): MarkdownChunk[] { + const chunks: MarkdownChunk[] = []; + const firstHead = headingIdx[0] ?? 0; + if (firstHead > 0) { + const preamble = lines.slice(0, firstHead).join("\n").trim(); + if (preamble.length > 0) { + chunks.push(...splitLargeMarkdownChunk(`${baseSlug}#preamble`, preamble)); + } + } + + for (let h = 0; h < headingIdx.length; h++) { + const start = headingIdx[h] ?? 0; + const end = h + 1 < headingIdx.length ? (headingIdx[h + 1] ?? lines.length) : lines.length; + const block = lines.slice(start, end).join("\n").trim(); + if (block.length === 0) { + continue; + } + const titleLine = lines[start] ?? ""; + const ht = HEADING_RE.exec(titleLine); + const suffix = ht !== null ? slugPart(ht[2] ?? "h") : `h${String(h)}`; + chunks.push(...splitLargeMarkdownChunk(`${baseSlug}#${suffix}-${String(h)}`, block)); + } + return chunks; +} + +/** + * Split Markdown by headings; long sections are split further by blank-line paragraphs. + */ +export function chunkMarkdown(relativePath: string, source: string): MarkdownChunk[] { + const lines = source.split(/\r?\n/); + const headingIdx = headingLineIndices(lines); + const baseSlug = relativePath.replace(/\//g, "-"); + + if (headingIdx.length === 0) { + const text = source.trim(); + if (text.length === 0) { + return []; + } + return splitLargeMarkdownChunk(`${baseSlug}#doc`, text); + } + + const chunks = chunksFromHeadings(lines, headingIdx, baseSlug); + return chunks; +} diff --git a/packages/cli/src/knowledge/chunk-typescript.ts b/packages/cli/src/knowledge/chunk-typescript.ts new file mode 100644 index 0000000..5b4d1a6 --- /dev/null +++ b/packages/cli/src/knowledge/chunk-typescript.ts @@ -0,0 +1,87 @@ +export type TsJsChunk = { + slug: string; + text: string; +}; + +/** + * Line starts a function-like declaration (heuristic, no full TS parse). + */ +function isFunctionStartLine(line: string): boolean { + const t = line.trimStart(); + if (/^(export\s+)?declare\s+/.test(t)) { + return false; + } + if (/^(export\s+)?(async\s+)?function\s+[A-Za-z_$][\w$]*\s*\(/.test(t)) { + return true; + } + if (/^(export\s+)?const\s+[A-Za-z_$][\w$]*\s*=\s*(async\s*)?\(/.test(t)) { + return true; + } + if (/^(export\s+)?const\s+[A-Za-z_$][\w$]*\s*=\s*async\s+function/.test(t)) { + return true; + } + return false; +} + +function slugPart(name: string): string { + const safe = name.replace(/[^\w$-]+/g, "-").toLowerCase(); + return safe.length > 0 ? safe : "block"; +} + +function extractRoughName(firstLine: string): string { + const m = + /function\s+([A-Za-z_$][\w$]*)/.exec(firstLine) ?? /const\s+([A-Za-z_$][\w$]*)/.exec(firstLine); + return m !== null && m[1] !== undefined ? m[1] : "fn"; +} + +/** + * Split `.ts` / `.js` by top-level function-like lines; falls back to paragraph chunks. + */ +export function chunkTypeScriptOrJavaScript(relativePath: string, source: string): TsJsChunk[] { + const baseSlug = relativePath.replace(/\./g, "-").replace(/\//g, "-"); + const lines = source.split(/\r?\n/); + const starts: number[] = []; + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (line !== undefined && isFunctionStartLine(line)) { + starts.push(i); + } + } + + if (starts.length === 0) { + return paragraphFallbackChunks(baseSlug, source); + } + + const chunks: TsJsChunk[] = []; + for (let s = 0; s < starts.length; s++) { + const start = starts[s] ?? 0; + const end = s + 1 < starts.length ? (starts[s + 1] ?? lines.length) : lines.length; + const block = lines.slice(start, end).join("\n").trim(); + if (block.length === 0) { + continue; + } + const first = lines[start] ?? ""; + const name = extractRoughName(first); + chunks.push({ + slug: `${baseSlug}#${slugPart(name)}-${String(s)}`, + text: block, + }); + } + + return chunks.length > 0 ? chunks : paragraphFallbackChunks(baseSlug, source); +} + +function paragraphFallbackChunks(baseSlug: string, source: string): TsJsChunk[] { + const text = source.trim(); + if (text.length === 0) { + return []; + } + const parts = text.split(/\n\s*\n/).filter((p) => p.trim().length > 0); + if (parts.length === 0) { + return [{ slug: `${baseSlug}#0`, text }]; + } + return parts.map((p, i) => ({ + slug: `${baseSlug}#para-${String(i)}`, + text: p.trim(), + })); +} diff --git a/packages/cli/src/knowledge/chunk.ts b/packages/cli/src/knowledge/chunk.ts new file mode 100644 index 0000000..f937f7c --- /dev/null +++ b/packages/cli/src/knowledge/chunk.ts @@ -0,0 +1,23 @@ +import { chunkMarkdown } from "./chunk-markdown.js"; +import { chunkTypeScriptOrJavaScript } from "./chunk-typescript.js"; + +export type KnowledgeChunk = { + slug: string; + text: string; +}; + +export function chunkKnowledgeFile(relativePath: string, source: string): KnowledgeChunk[] { + const lower = relativePath.toLowerCase(); + if (lower.endsWith(".md")) { + return chunkMarkdown(relativePath, source); + } + if ( + lower.endsWith(".ts") || + lower.endsWith(".tsx") || + lower.endsWith(".js") || + lower.endsWith(".jsx") + ) { + return chunkTypeScriptOrJavaScript(relativePath, source); + } + return [{ slug: `${relativePath.replace(/\//g, "-")}#0`, text: source.trim() }]; +} diff --git a/packages/cli/src/knowledge/exclude-match.ts b/packages/cli/src/knowledge/exclude-match.ts new file mode 100644 index 0000000..7061991 --- /dev/null +++ b/packages/cli/src/knowledge/exclude-match.ts @@ -0,0 +1,19 @@ +import picomatch from "picomatch"; + +const PICOMATCH_OPTS = { dot: true } as const; + +/** + * True if `relativePosixPath` matches any exclude glob (POSIX slashes). + */ +export function matchesKnowledgeExclude( + relativePosixPath: string, + excludePatterns: ReadonlyArray, +): boolean { + for (const pattern of excludePatterns) { + const isMatch = picomatch(pattern, PICOMATCH_OPTS); + if (isMatch(relativePosixPath)) { + return true; + } + } + return false; +} diff --git a/packages/cli/src/knowledge/fake-embedding.ts b/packages/cli/src/knowledge/fake-embedding.ts new file mode 100644 index 0000000..454504d --- /dev/null +++ b/packages/cli/src/knowledge/fake-embedding.ts @@ -0,0 +1,7 @@ +import { createHash } from "node:crypto"; + +/** Deterministic placeholder embedding bytes until a remote embedding service exists (RFC-003). */ +export function fakeEmbeddingBytes(text: string): Buffer { + const hash = createHash("sha256").update(text, "utf8").digest(); + return Buffer.concat([hash, hash, hash, hash]); +} diff --git a/packages/cli/src/knowledge/glob-files.ts b/packages/cli/src/knowledge/glob-files.ts new file mode 100644 index 0000000..372074f --- /dev/null +++ b/packages/cli/src/knowledge/glob-files.ts @@ -0,0 +1,42 @@ +import { globSync, statSync } from "node:fs"; +import { join } from "node:path"; + +import type { KnowledgeConfig } from "@uncaged/nerve-core"; + +import { matchesKnowledgeExclude } from "./exclude-match.js"; + +function toPosix(rel: string): string { + return rel.split("\\").join("/"); +} + +function isFileUnderRoot(repoRoot: string, rel: string): boolean { + try { + return statSync(join(repoRoot, rel)).isFile(); + } catch { + return false; + } +} + +/** + * Files matched by `include` globs minus `exclude` globs, relative POSIX paths, sorted. + */ +export function listKnowledgeFiles(repoRoot: string, config: KnowledgeConfig): string[] { + const matched = new Set(); + for (const pattern of config.include) { + const paths = globSync(pattern, { + cwd: repoRoot, + windowsPathsNoEscape: true, + }); + for (const rel of paths) { + const posix = toPosix(rel); + if (!isFileUnderRoot(repoRoot, posix)) { + continue; + } + if (matchesKnowledgeExclude(posix, config.exclude)) { + continue; + } + matched.add(posix); + } + } + return [...matched].sort(); +} diff --git a/packages/cli/src/knowledge/knowledge-db.ts b/packages/cli/src/knowledge/knowledge-db.ts new file mode 100644 index 0000000..978acd4 --- /dev/null +++ b/packages/cli/src/knowledge/knowledge-db.ts @@ -0,0 +1,96 @@ +import { createHash } from "node:crypto"; +import { DatabaseSync } from "node:sqlite"; + +import { fakeEmbeddingBytes } from "./fake-embedding.js"; + +export type KnowledgeChunkRow = { + path: string; + slug: string; + chunkIndex: number; + text: string; + embedding: Buffer; + contentHash: string; +}; + +export type KnowledgeChunkInsert = { + path: string; + slug: string; + chunkIndex: number; + text: string; + contentHash: string; +}; + +const SCHEMA = ` +CREATE TABLE IF NOT EXISTS chunks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + path TEXT NOT NULL, + chunk_index INTEGER NOT NULL, + slug TEXT NOT NULL, + text TEXT NOT NULL, + embedding BLOB NOT NULL, + content_hash TEXT NOT NULL, + UNIQUE(path, chunk_index) +); + +CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path); +`; + +export function openKnowledgeDb(dbPath: string): DatabaseSync { + const db = new DatabaseSync(dbPath); + db.exec(SCHEMA); + return db; +} + +export function contentHash(text: string): string { + return createHash("sha256").update(text, "utf8").digest("hex"); +} + +export function replaceAllChunks(db: DatabaseSync, rows: KnowledgeChunkInsert[]): void { + db.exec("BEGIN IMMEDIATE"); + try { + db.prepare("DELETE FROM chunks").run(); + const insert = db.prepare( + `INSERT INTO chunks (path, chunk_index, slug, text, embedding, content_hash) + VALUES (@path, @chunk_index, @slug, @text, @embedding, @content_hash)`, + ); + for (let i = 0; i < rows.length; i++) { + const row = rows[i]; + if (row === undefined) continue; + const emb = fakeEmbeddingBytes(row.text); + insert.run({ + path: row.path, + chunk_index: row.chunkIndex, + slug: row.slug, + text: row.text, + embedding: emb, + content_hash: row.contentHash, + }); + } + db.exec("COMMIT"); + } catch (e) { + db.exec("ROLLBACK"); + throw e; + } +} + +export function loadAllChunks(db: DatabaseSync): KnowledgeChunkRow[] { + const stmt = db.prepare( + "SELECT path, chunk_index, slug, text, embedding, content_hash FROM chunks ORDER BY path, chunk_index", + ); + const rows = stmt.all() as Array<{ + path: string; + chunk_index: number; + slug: string; + text: string; + embedding: Buffer; + content_hash: string; + }>; + return rows.map((r) => ({ + path: r.path, + slug: r.slug, + chunkIndex: r.chunk_index, + text: r.text, + embedding: r.embedding, + contentHash: r.content_hash, + })); +} diff --git a/packages/cli/src/knowledge/paths.ts b/packages/cli/src/knowledge/paths.ts new file mode 100644 index 0000000..f8627f6 --- /dev/null +++ b/packages/cli/src/knowledge/paths.ts @@ -0,0 +1,2 @@ +export const KNOWLEDGE_YAML = "knowledge.yaml"; +export const KNOWLEDGE_DB = "knowledge.db"; diff --git a/packages/cli/src/knowledge/query-scope.ts b/packages/cli/src/knowledge/query-scope.ts new file mode 100644 index 0000000..d757147 --- /dev/null +++ b/packages/cli/src/knowledge/query-scope.ts @@ -0,0 +1,13 @@ +/** + * `-r` and `-g` are mutually exclusive for `nerve knowledge query`. + */ +export function knowledgeQueryScopeConflictMessage( + repoFlag: string | null | undefined, + globalFlag: boolean, +): string | null { + const hasR = repoFlag !== undefined && repoFlag !== null && String(repoFlag).trim().length > 0; + if (hasR && globalFlag) { + return "❌ Use either -r or -g, not both."; + } + return null; +} diff --git a/packages/cli/src/knowledge/query.ts b/packages/cli/src/knowledge/query.ts new file mode 100644 index 0000000..9e53c05 --- /dev/null +++ b/packages/cli/src/knowledge/query.ts @@ -0,0 +1,82 @@ +import { existsSync } from "node:fs"; +import { join } from "node:path"; + +import type { KnowledgeChunkRow } from "./knowledge-db.js"; +import { loadAllChunks, openKnowledgeDb } from "./knowledge-db.js"; +import { wordOverlapScore } from "./word-overlap.js"; + +export type KnowledgeQueryHit = { + repoRoot: string | null; + path: string; + slug: string; + text: string; + score: number; +}; + +export function rankChunksByWordOverlap( + query: string, + chunks: KnowledgeChunkRow[], + limit: number, +): Array<{ chunk: KnowledgeChunkRow; score: number }> { + const scored = chunks.map((chunk) => ({ + chunk, + score: wordOverlapScore(query, `${chunk.text}\n${chunk.path}`), + })); + scored.sort((a, b) => b.score - a.score); + return scored.slice(0, limit); +} + +export function queryKnowledgeRepo( + repoRoot: string, + dbPath: string, + queryText: string, + limit: number, +): KnowledgeQueryHit[] { + const db = openKnowledgeDb(dbPath); + try { + const rows = loadAllChunks(db); + const ranked = rankChunksByWordOverlap(queryText, rows, limit); + return ranked.map((r) => ({ + repoRoot, + path: r.chunk.path, + slug: r.chunk.slug, + text: r.chunk.text, + score: r.score, + })); + } finally { + db.close(); + } +} + +export function queryKnowledgeGlobal( + repoRoots: ReadonlyArray, + dbFileName: string, + queryText: string, + limit: number, +): KnowledgeQueryHit[] { + const combined: KnowledgeQueryHit[] = []; + for (const root of repoRoots) { + const dbPath = join(root, dbFileName); + if (!existsSync(dbPath)) { + continue; + } + const db = openKnowledgeDb(dbPath); + try { + const rows = loadAllChunks(db); + const ranked = rankChunksByWordOverlap(queryText, rows, limit); + for (const r of ranked) { + combined.push({ + repoRoot: root, + path: r.chunk.path, + slug: r.chunk.slug, + text: r.chunk.text, + score: r.score, + }); + } + } finally { + db.close(); + } + } + combined.sort((a, b) => b.score - a.score); + return combined.slice(0, limit); +} diff --git a/packages/cli/src/knowledge/registry.ts b/packages/cli/src/knowledge/registry.ts new file mode 100644 index 0000000..91e2c55 --- /dev/null +++ b/packages/cli/src/knowledge/registry.ts @@ -0,0 +1,55 @@ +import { mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { dirname } from "node:path"; + +import { getNerveRoot } from "../workspace.js"; + +export type KnowledgeRepoRegistry = { + roots: ReadonlyArray; +}; + +const FILE_NAME = "knowledge-repos.json"; + +export function getKnowledgeRegistryPath(): string { + return `${getNerveRoot()}/data/${FILE_NAME}`; +} + +function defaultRegistry(): KnowledgeRepoRegistry { + return { roots: [] }; +} + +export function readKnowledgeRegistry(): KnowledgeRepoRegistry { + const path = getKnowledgeRegistryPath(); + try { + const raw = readFileSync(path, "utf8"); + const parsed: unknown = JSON.parse(raw); + if ( + typeof parsed === "object" && + parsed !== null && + "roots" in parsed && + Array.isArray(parsed.roots) + ) { + const roots = parsed.roots.filter((x): x is string => typeof x === "string"); + return { roots: [...new Set(roots)].sort() }; + } + } catch { + // missing or invalid — treat as empty + } + return defaultRegistry(); +} + +export function registerKnowledgeRepoRoot(repoRootAbsolute: string): void { + const resolved = repoRootAbsolute.trim(); + if (resolved.length === 0) { + return; + } + const prev = readKnowledgeRegistry(); + const nextRoots = [...new Set([...prev.roots, resolved])].sort(); + const next: KnowledgeRepoRegistry = { roots: nextRoots }; + const path = getKnowledgeRegistryPath(); + mkdirSync(dirname(path), { recursive: true }); + writeFileSync(path, `${JSON.stringify(next, null, 2)}\n`, "utf8"); +} + +export function listRegisteredKnowledgeRoots(): string[] { + return [...readKnowledgeRegistry().roots]; +} diff --git a/packages/cli/src/knowledge/repo-root.ts b/packages/cli/src/knowledge/repo-root.ts new file mode 100644 index 0000000..c6e0497 --- /dev/null +++ b/packages/cli/src/knowledge/repo-root.ts @@ -0,0 +1,21 @@ +import { existsSync } from "node:fs"; +import { dirname, join, resolve } from "node:path"; + +import { KNOWLEDGE_YAML } from "./paths.js"; + +/** + * Walk upward from `startDir` until `knowledge.yaml` exists. + */ +export function findKnowledgeRepoRoot(startDir: string): string | null { + let dir = resolve(startDir); + while (true) { + if (existsSync(join(dir, KNOWLEDGE_YAML))) { + return dir; + } + const parent = dirname(dir); + if (parent === dir) { + return null; + } + dir = parent; + } +} diff --git a/packages/cli/src/knowledge/sync.ts b/packages/cli/src/knowledge/sync.ts new file mode 100644 index 0000000..def84db --- /dev/null +++ b/packages/cli/src/knowledge/sync.ts @@ -0,0 +1,73 @@ +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +import { type KnowledgeConfig, parseKnowledgeYaml } from "@uncaged/nerve-core"; + +import { chunkKnowledgeFile } from "./chunk.js"; +import { listKnowledgeFiles } from "./glob-files.js"; +import { contentHash, openKnowledgeDb, replaceAllChunks } from "./knowledge-db.js"; +import { KNOWLEDGE_DB, KNOWLEDGE_YAML } from "./paths.js"; +import { registerKnowledgeRepoRoot } from "./registry.js"; + +export type KnowledgeSyncResult = { + repoRoot: string; + dbPath: string; + filesIndexed: number; + chunksWritten: number; +}; + +function loadConfig(repoRoot: string): KnowledgeConfig { + const raw = readFileSync(join(repoRoot, KNOWLEDGE_YAML), "utf8"); + const parsed = parseKnowledgeYaml(raw); + if (!parsed.ok) { + throw parsed.error; + } + return parsed.value; +} + +export function runKnowledgeSync(repoRoot: string): KnowledgeSyncResult { + const config = loadConfig(repoRoot); + const relFiles = listKnowledgeFiles(repoRoot, config); + const inserts: Array<{ + path: string; + slug: string; + chunkIndex: number; + text: string; + contentHash: string; + }> = []; + + for (const rel of relFiles) { + const abs = join(repoRoot, rel); + const source = readFileSync(abs, "utf8"); + const chunks = chunkKnowledgeFile(rel, source); + for (let i = 0; i < chunks.length; i++) { + const ch = chunks[i]; + if (ch === undefined) continue; + const text = ch.text; + inserts.push({ + path: rel, + slug: ch.slug, + chunkIndex: i, + text, + contentHash: contentHash(text), + }); + } + } + + const dbPath = join(repoRoot, KNOWLEDGE_DB); + const db = openKnowledgeDb(dbPath); + try { + replaceAllChunks(db, inserts); + } finally { + db.close(); + } + + registerKnowledgeRepoRoot(repoRoot); + + return { + repoRoot, + dbPath, + filesIndexed: relFiles.length, + chunksWritten: inserts.length, + }; +} diff --git a/packages/cli/src/knowledge/word-overlap.ts b/packages/cli/src/knowledge/word-overlap.ts new file mode 100644 index 0000000..7c17bd4 --- /dev/null +++ b/packages/cli/src/knowledge/word-overlap.ts @@ -0,0 +1,26 @@ +function tokenize(s: string): Set { + const parts = s + .toLowerCase() + .split(/[^\w]+/) + .filter((x) => x.length > 0); + return new Set(parts); +} + +/** + * Jaccard-like score over word sets (placeholder until real embeddings; RFC-003). + */ +export function wordOverlapScore(query: string, document: string): number { + const q = tokenize(query); + const d = tokenize(document); + if (q.size === 0) { + return 0; + } + let inter = 0; + for (const w of q) { + if (d.has(w)) { + inter += 1; + } + } + const union = q.size + d.size - inter; + return union === 0 ? 0 : inter / union; +} diff --git a/packages/cli/src/workflow-agent-validation.ts b/packages/cli/src/workflow-agent-validation.ts index 8cd41ff..0ea157a 100644 --- a/packages/cli/src/workflow-agent-validation.ts +++ b/packages/cli/src/workflow-agent-validation.ts @@ -8,7 +8,12 @@ import { join } from "node:path"; import type { NerveConfig } from "@uncaged/nerve-core"; import { KNOWN_AGENT_ADAPTER_IDS } from "@uncaged/nerve-core"; -/** Matches RoleSpec `agent: "name"` / `agent: 'name'` in workflow TypeScript sources. */ +/** + * Matches RoleSpec `agent: "name"` / `agent: 'name'` in workflow TypeScript sources. + * NOTE: This regex can match occurrences inside comments. For current usage (validation + * hint) this is acceptable — false positives just trigger a "missing agent" warning that + * the user can ignore. If precision becomes important, switch to AST-based extraction. + */ const WORKFLOW_SPEC_AGENT_PATTERN = /agent:\s*["']([^"']+)["']/g; function collectTsSourceFiles(dir: string, acc: string[]): void { diff --git a/packages/core/src/__tests__/knowledge-config.test.ts b/packages/core/src/__tests__/knowledge-config.test.ts new file mode 100644 index 0000000..cd7c6b7 --- /dev/null +++ b/packages/core/src/__tests__/knowledge-config.test.ts @@ -0,0 +1,49 @@ +import { describe, expect, it } from "vitest"; + +import { parseKnowledgeYaml } from "../knowledge-config.js"; + +describe("parseKnowledgeYaml", () => { + it("parses include and exclude glob lists", () => { + const raw = ` +include: + - "src/**/*.ts" + - "docs/**/*.md" +exclude: + - "node_modules/**" + - "*.test.ts" +`; + const result = parseKnowledgeYaml(raw); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect([...result.value.include]).toEqual(["src/**/*.ts", "docs/**/*.md"]); + expect([...result.value.exclude]).toEqual(["node_modules/**", "*.test.ts"]); + }); + + it("defaults missing include/exclude to empty arrays", () => { + const result = parseKnowledgeYaml("{}"); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect([...result.value.include]).toEqual([]); + expect([...result.value.exclude]).toEqual([]); + }); + + it("allows empty document", () => { + const result = parseKnowledgeYaml(""); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect([...result.value.include]).toEqual([]); + expect([...result.value.exclude]).toEqual([]); + }); + + it("rejects non-array include", () => { + const result = parseKnowledgeYaml("include: foo"); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.error.message).toContain("include"); + }); + + it("rejects empty string entry in exclude", () => { + const result = parseKnowledgeYaml('exclude:\n - ""'); + expect(result.ok).toBe(false); + }); +}); diff --git a/packages/core/src/agent-adapter-ids.ts b/packages/core/src/agent-adapter-ids.ts index e7be7b2..f10a9ac 100644 --- a/packages/core/src/agent-adapter-ids.ts +++ b/packages/core/src/agent-adapter-ids.ts @@ -1,5 +1,7 @@ /** * Agent adapter types that have a daemon implementation (RFC-003). - * Keep in sync with `packages/daemon` agent factory dispatch. + * Keep in sync with `packages/daemon/src/agent-registry.ts` adapter dispatch. + * When adding a new adapter (e.g. cursor, hermes, codex), add it here AND + * add the corresponding factory branch in `createAgentFnForConfig`. */ -export const KNOWN_AGENT_ADAPTER_IDS = ["echo"] as const; +export const KNOWN_AGENT_ADAPTER_IDS = ["echo", "cursor", "hermes", "codex"] as const; diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 961e397..b22a2b4 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -35,6 +35,8 @@ export { ExtractError } from "./extract-layer.js"; export type { Result } from "./result.js"; export { ok, err } from "./result.js"; export { parseNerveConfig } from "./parse-nerve-config.js"; +export type { KnowledgeConfig } from "./knowledge-config.js"; +export { parseKnowledgeYaml } from "./knowledge-config.js"; export { isPlainRecord } from "./is-plain-record.js"; export { KNOWN_AGENT_ADAPTER_IDS } from "./agent-adapter-ids.js"; diff --git a/packages/core/src/knowledge-config.ts b/packages/core/src/knowledge-config.ts new file mode 100644 index 0000000..f7c4289 --- /dev/null +++ b/packages/core/src/knowledge-config.ts @@ -0,0 +1,64 @@ +import { parse } from "yaml"; + +import { isPlainRecord } from "./is-plain-record.js"; +import type { Result } from "./result.js"; +import { err, ok } from "./result.js"; + +export type KnowledgeConfig = { + include: ReadonlyArray; + exclude: ReadonlyArray; +}; + +function parseStringList(field: unknown, label: string): Result> { + if (field === undefined || field === null) { + return ok([]); + } + if (!Array.isArray(field)) { + return err(new Error(`${label}: must be an array of strings`)); + } + const out: string[] = []; + for (let i = 0; i < field.length; i++) { + const item = field[i]; + if (typeof item !== "string" || item.length === 0) { + return err(new Error(`${label}[${String(i)}]: must be a non-empty string`)); + } + out.push(item); + } + return ok(out); +} + +/** + * Parse `knowledge.yaml` at the repo root (RFC-003 Knowledge Layer). + * `include` / `exclude` entries are glob patterns resolved against the repo root. + */ +export function parseKnowledgeYaml(raw: string): Result { + let parsed: unknown; + try { + parsed = parse(raw); + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + return err(new Error(`YAML parse error: ${message}`)); + } + + if (parsed === undefined || parsed === null) { + return ok({ include: [], exclude: [] }); + } + + if (!isPlainRecord(parsed)) { + return err(new Error("knowledge.yaml: root must be a mapping")); + } + + const includeResult = parseStringList(parsed.include, "include"); + if (!includeResult.ok) { + return includeResult; + } + const excludeResult = parseStringList(parsed.exclude, "exclude"); + if (!excludeResult.ok) { + return excludeResult; + } + + return ok({ + include: includeResult.value, + exclude: excludeResult.value, + }); +} diff --git a/packages/workflow-utils/src/index.ts b/packages/workflow-utils/src/index.ts index 3b80e72..32b22a4 100644 --- a/packages/workflow-utils/src/index.ts +++ b/packages/workflow-utils/src/index.ts @@ -7,6 +7,7 @@ export { cursorAgent } from "./shared/cursor-agent.js"; export { llmExtract, llmExtractWithRetry } from "./shared/llm-extract.js"; export { mergeExtractConfig, type ExtractConfigLayer } from "./shared/merge-extract-config.js"; export { + assertZodMetaSchemas, createLlmExtractFn, extractMetaOrThrow, type ZodMetaSchema, diff --git a/packages/workflow-utils/src/shared/extract-fn.ts b/packages/workflow-utils/src/shared/extract-fn.ts index 60fc52a..f8471d5 100644 --- a/packages/workflow-utils/src/shared/extract-fn.ts +++ b/packages/workflow-utils/src/shared/extract-fn.ts @@ -36,9 +36,24 @@ export function createLlmExtractFn(deps: { }): ExtractFn { return async (raw, schema) => { const extended = schema as ZodMetaSchema; + // Runtime check deferred — callers should validate at compile time via assertZodMetaSchema if (!("zod" in extended)) { throw new Error("extract: schema must be a ZodMetaSchema (include zod parser)"); } return extractMetaOrThrow(raw, extended.zod, deps); }; } + +/** + * Validate that all schemas in a WorkflowSpec are ZodMetaSchema at compile time, + * before any role is ever invoked. Call this once at daemon startup / hot-reload. + */ +export function assertZodMetaSchemas(schemas: Record>): void { + for (const [roleName, schema] of Object.entries(schemas)) { + if (!("zod" in schema)) { + throw new Error( + `Role "${roleName}": schema must be a ZodMetaSchema (include zod parser). Validate schemas at compile time to catch this early.`, + ); + } + } +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 82a7e15..7fc2fef 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -32,6 +32,9 @@ importers: citty: specifier: ^0.1.6 version: 0.1.6 + picomatch: + specifier: ^4.0.2 + version: 4.0.4 yaml: specifier: ^2.8.3 version: 2.8.3 @@ -121,6 +124,8 @@ importers: specifier: ^4.14.0 version: 4.85.0(@cloudflare/workers-types@4.20260425.1) + packages/skills: {} + packages/store: dependencies: '@uncaged/nerve-core': -- 2.43.0 From 623fb3cd3a0457a956eed202f6ccfef66a3f4cac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Wed, 29 Apr 2026 06:02:24 +0000 Subject: [PATCH 2/2] fix(cli): knowledge query --repo flag, remove -r alias (conflicts with global remote flag) - Rename -r to --repo for knowledge query scope - Update RFC docs to match - Fix biome format issues - Add assertZodMetaSchemas export - KNOWN_AGENT_ADAPTER_IDS: add cursor/hermes/codex Self-tested: nerve knowledge sync + query work correctly --- docs/rfc-003-agent-config-layer.md | 4 +- .../cli/src/__tests__/chunk-markdown.test.ts | 35 +++++++++++++++ packages/cli/src/__tests__/glob-files.test.ts | 24 +++++++++++ .../__tests__/knowledge-query-scope.test.ts | 28 ++++++++++++ .../cli/src/__tests__/knowledge-query.test.ts | 35 +++++++++++++++ .../src/__tests__/knowledge-registry.test.ts | 26 +++++++++++ .../cli/src/__tests__/knowledge-sync.test.ts | 43 +++++++++++++++++++ packages/cli/src/commands/knowledge.ts | 8 ++-- packages/cli/src/knowledge/glob-files.ts | 5 +-- packages/cli/src/knowledge/registry.ts | 25 ++++++----- packages/cli/src/knowledge/sync.ts | 10 ++++- packages/cli/src/types/picomatch.d.ts | 7 +++ 12 files changed, 228 insertions(+), 22 deletions(-) create mode 100644 packages/cli/src/__tests__/chunk-markdown.test.ts create mode 100644 packages/cli/src/__tests__/glob-files.test.ts create mode 100644 packages/cli/src/__tests__/knowledge-query-scope.test.ts create mode 100644 packages/cli/src/__tests__/knowledge-query.test.ts create mode 100644 packages/cli/src/__tests__/knowledge-registry.test.ts create mode 100644 packages/cli/src/__tests__/knowledge-sync.test.ts create mode 100644 packages/cli/src/types/picomatch.d.ts diff --git a/docs/rfc-003-agent-config-layer.md b/docs/rfc-003-agent-config-layer.md index 644a1b6..d86a1eb 100644 --- a/docs/rfc-003-agent-config-layer.md +++ b/docs/rfc-003-agent-config-layer.md @@ -213,9 +213,9 @@ nerve knowledge query "how does the signal bus work" # Scope nerve knowledge query "..." # default: cwd repo -nerve knowledge query -r /path/to/other/repo "..." +nerve knowledge query --repo /path/to/other/repo "..." nerve knowledge query -g "..." # global search (all indexed repos) -# -r and -g are mutually exclusive +# --repo and -g are mutually exclusive ``` ### Search Implementation diff --git a/packages/cli/src/__tests__/chunk-markdown.test.ts b/packages/cli/src/__tests__/chunk-markdown.test.ts new file mode 100644 index 0000000..8b8fd17 --- /dev/null +++ b/packages/cli/src/__tests__/chunk-markdown.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, it } from "vitest"; + +import { chunkMarkdown } from "../knowledge/chunk-markdown.js"; + +describe("chunkMarkdown", () => { + it("splits markdown by headings into separate chunks", () => { + const md = `# Title One + +Intro para under first heading. + +## Title Two + +Second section body. + +`; + const chunks = chunkMarkdown("docs/guide.md", md); + expect(chunks.length).toBeGreaterThanOrEqual(2); + const joined = chunks.map((c) => c.text).join("\n"); + expect(joined).toContain("Title One"); + expect(joined).toContain("Title Two"); + }); + + it("includes preamble before first heading as its own chunk when present", () => { + const md = `Preamble line here. + +# First Real Heading + +Under heading. +`; + const chunks = chunkMarkdown("readme.md", md); + const preamble = chunks.find((c) => c.slug.includes("preamble")); + expect(preamble).toBeDefined(); + expect(preamble?.text).toContain("Preamble"); + }); +}); diff --git a/packages/cli/src/__tests__/glob-files.test.ts b/packages/cli/src/__tests__/glob-files.test.ts new file mode 100644 index 0000000..2abbee8 --- /dev/null +++ b/packages/cli/src/__tests__/glob-files.test.ts @@ -0,0 +1,24 @@ +import { mkdirSync, mkdtempSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { describe, expect, it } from "vitest"; + +import { listKnowledgeFiles } from "../knowledge/glob-files.js"; + +describe("listKnowledgeFiles", () => { + it("includes matching paths and applies exclude globs", () => { + const root = mkdtempSync(join(tmpdir(), "nerve-glob-")); + mkdirSync(join(root, "src"), { recursive: true }); + writeFileSync(join(root, "src", "keep.ts"), "export function x() {}\n"); + writeFileSync(join(root, "src", "drop.test.ts"), "// test\n"); + + const files = listKnowledgeFiles(root, { + include: ["src/**/*.ts"], + exclude: ["**/*.test.ts"], + }); + + expect(files).toContain("src/keep.ts"); + expect(files).not.toContain("src/drop.test.ts"); + }); +}); diff --git a/packages/cli/src/__tests__/knowledge-query-scope.test.ts b/packages/cli/src/__tests__/knowledge-query-scope.test.ts new file mode 100644 index 0000000..ef53de2 --- /dev/null +++ b/packages/cli/src/__tests__/knowledge-query-scope.test.ts @@ -0,0 +1,28 @@ +import { describe, expect, it } from "vitest"; + +import { knowledgeQueryScopeConflictMessage } from "../knowledge/query-scope.js"; + +describe("knowledgeQueryScopeConflictMessage", () => { + it("returns null when only -r is used", () => { + expect(knowledgeQueryScopeConflictMessage("/tmp/repo", false)).toBeNull(); + }); + + it("returns null when only -g is used", () => { + expect(knowledgeQueryScopeConflictMessage(undefined, true)).toBeNull(); + }); + + it("returns null when neither -r nor -g", () => { + expect(knowledgeQueryScopeConflictMessage(undefined, false)).toBeNull(); + }); + + it("returns error when both -r and -g", () => { + const msg = knowledgeQueryScopeConflictMessage("/some/path", true); + expect(msg).not.toBeNull(); + expect(msg).toContain("-r"); + expect(msg).toContain("-g"); + }); + + it("treats empty -r as absent", () => { + expect(knowledgeQueryScopeConflictMessage("", true)).toBeNull(); + }); +}); diff --git a/packages/cli/src/__tests__/knowledge-query.test.ts b/packages/cli/src/__tests__/knowledge-query.test.ts new file mode 100644 index 0000000..f6cc716 --- /dev/null +++ b/packages/cli/src/__tests__/knowledge-query.test.ts @@ -0,0 +1,35 @@ +import { describe, expect, it } from "vitest"; + +import type { KnowledgeChunkRow } from "../knowledge/knowledge-db.js"; +import { rankChunksByWordOverlap } from "../knowledge/query.js"; + +function chunk(path: string, text: string): KnowledgeChunkRow { + return { + path, + slug: `${path}#0`, + chunkIndex: 0, + text, + embedding: Buffer.alloc(8), + contentHash: "ab", + }; +} + +describe("rankChunksByWordOverlap", () => { + it("returns higher scores for chunks that share words with the query", () => { + const rows = [ + chunk("a.md", "the signal bus emits notifications"), + chunk("b.md", "unrelated cooking recipes"), + ]; + + const ranked = rankChunksByWordOverlap("signal bus", rows, 10); + expect(ranked.length).toBe(2); + expect(ranked[0]?.chunk.path).toBe("a.md"); + expect(ranked[1]?.chunk.path).toBe("b.md"); + expect(ranked[0]?.score).toBeGreaterThan(ranked[1]?.score ?? 0); + }); + + it("respects limit", () => { + const rows = [chunk("x.md", "one"), chunk("y.md", "two")]; + expect(rankChunksByWordOverlap("one", rows, 1)).toHaveLength(1); + }); +}); diff --git a/packages/cli/src/__tests__/knowledge-registry.test.ts b/packages/cli/src/__tests__/knowledge-registry.test.ts new file mode 100644 index 0000000..db8d577 --- /dev/null +++ b/packages/cli/src/__tests__/knowledge-registry.test.ts @@ -0,0 +1,26 @@ +import { mkdtempSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { describe, expect, it } from "vitest"; + +import { + listRegisteredKnowledgeRoots, + readKnowledgeRegistry, + registerKnowledgeRepoRoot, +} from "../knowledge/registry.js"; + +describe("knowledge repo registry", () => { + it("accumulates registered repo roots under a nerve home", () => { + const nerveHome = mkdtempSync(join(tmpdir(), "nerve-reg-")); + const repoA = mkdtempSync(join(tmpdir(), "repo-a-")); + const repoB = mkdtempSync(join(tmpdir(), "repo-b-")); + + registerKnowledgeRepoRoot(repoA, nerveHome); + registerKnowledgeRepoRoot(repoB, nerveHome); + registerKnowledgeRepoRoot(repoA, nerveHome); + + expect(readKnowledgeRegistry(nerveHome).roots).toEqual([repoA, repoB].sort()); + expect(listRegisteredKnowledgeRoots(nerveHome)).toEqual([repoA, repoB].sort()); + }); +}); diff --git a/packages/cli/src/__tests__/knowledge-sync.test.ts b/packages/cli/src/__tests__/knowledge-sync.test.ts new file mode 100644 index 0000000..3e6abea --- /dev/null +++ b/packages/cli/src/__tests__/knowledge-sync.test.ts @@ -0,0 +1,43 @@ +import { mkdirSync, mkdtempSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { DatabaseSync } from "node:sqlite"; + +import { describe, expect, it } from "vitest"; + +import { runKnowledgeSync } from "../knowledge/sync.js"; + +describe("runKnowledgeSync", () => { + it("creates knowledge.db with chunk rows", () => { + const nerveHome = mkdtempSync(join(tmpdir(), "nerve-home-")); + const root = mkdtempSync(join(tmpdir(), "nerve-know-sync-")); + mkdirSync(join(root, "docs"), { recursive: true }); + writeFileSync( + join(root, "docs", "a.md"), + `# Hello + +Some body text about bananas. + +`, + ); + writeFileSync( + join(root, "knowledge.yaml"), + `include: + - "docs/**/*.md" +exclude: [] +`, + ); + + const result = runKnowledgeSync(root, nerveHome); + expect(result.chunksWritten).toBeGreaterThan(0); + + const db = new DatabaseSync(result.dbPath, { readOnly: true }); + try { + const row = db.prepare("SELECT COUNT(*) AS c FROM chunks").get() as { c: number }; + expect(row.c).toBe(result.chunksWritten); + } finally { + db.close(); + } + }); +}); diff --git a/packages/cli/src/commands/knowledge.ts b/packages/cli/src/commands/knowledge.ts index d42bef5..850e580 100644 --- a/packages/cli/src/commands/knowledge.ts +++ b/packages/cli/src/commands/knowledge.ts @@ -46,9 +46,9 @@ const queryCommand = defineCommand({ required: true, description: "Search text", }, - r: { + repo: { type: "string", - description: "Use knowledge.db from another repo root", + description: "Use knowledge.db from another repo root (--repo /path)", required: false, }, g: { @@ -63,7 +63,7 @@ const queryCommand = defineCommand({ }, }, async run({ args }) { - const conflict = knowledgeQueryScopeConflictMessage(args.r, args.g); + const conflict = knowledgeQueryScopeConflictMessage(args.repo, args.g); if (conflict !== null) { process.stderr.write(`${conflict}\n`); process.exit(1); @@ -77,7 +77,7 @@ const queryCommand = defineCommand({ return; } - runKnowledgeQueryScoped(args.r, queryText, limit); + runKnowledgeQueryScoped(args.repo as string, queryText, limit); }, }); diff --git a/packages/cli/src/knowledge/glob-files.ts b/packages/cli/src/knowledge/glob-files.ts index 372074f..61adadc 100644 --- a/packages/cli/src/knowledge/glob-files.ts +++ b/packages/cli/src/knowledge/glob-files.ts @@ -23,10 +23,7 @@ function isFileUnderRoot(repoRoot: string, rel: string): boolean { export function listKnowledgeFiles(repoRoot: string, config: KnowledgeConfig): string[] { const matched = new Set(); for (const pattern of config.include) { - const paths = globSync(pattern, { - cwd: repoRoot, - windowsPathsNoEscape: true, - }); + const paths = globSync(pattern, { cwd: repoRoot }); for (const rel of paths) { const posix = toPosix(rel); if (!isFileUnderRoot(repoRoot, posix)) { diff --git a/packages/cli/src/knowledge/registry.ts b/packages/cli/src/knowledge/registry.ts index 91e2c55..b57fff7 100644 --- a/packages/cli/src/knowledge/registry.ts +++ b/packages/cli/src/knowledge/registry.ts @@ -1,5 +1,5 @@ import { mkdirSync, readFileSync, writeFileSync } from "node:fs"; -import { dirname } from "node:path"; +import { dirname, join } from "node:path"; import { getNerveRoot } from "../workspace.js"; @@ -9,16 +9,18 @@ export type KnowledgeRepoRegistry = { const FILE_NAME = "knowledge-repos.json"; -export function getKnowledgeRegistryPath(): string { - return `${getNerveRoot()}/data/${FILE_NAME}`; +/** When `nerveHome` is omitted, uses `~/.uncaged-nerve`. */ +export function getKnowledgeRegistryPath(nerveHome: string | null = null): string { + const root = nerveHome ?? getNerveRoot(); + return join(root, "data", FILE_NAME); } function defaultRegistry(): KnowledgeRepoRegistry { return { roots: [] }; } -export function readKnowledgeRegistry(): KnowledgeRepoRegistry { - const path = getKnowledgeRegistryPath(); +export function readKnowledgeRegistry(nerveHome: string | null = null): KnowledgeRepoRegistry { + const path = getKnowledgeRegistryPath(nerveHome); try { const raw = readFileSync(path, "utf8"); const parsed: unknown = JSON.parse(raw); @@ -37,19 +39,22 @@ export function readKnowledgeRegistry(): KnowledgeRepoRegistry { return defaultRegistry(); } -export function registerKnowledgeRepoRoot(repoRootAbsolute: string): void { +export function registerKnowledgeRepoRoot( + repoRootAbsolute: string, + nerveHome: string | null = null, +): void { const resolved = repoRootAbsolute.trim(); if (resolved.length === 0) { return; } - const prev = readKnowledgeRegistry(); + const prev = readKnowledgeRegistry(nerveHome); const nextRoots = [...new Set([...prev.roots, resolved])].sort(); const next: KnowledgeRepoRegistry = { roots: nextRoots }; - const path = getKnowledgeRegistryPath(); + const path = getKnowledgeRegistryPath(nerveHome); mkdirSync(dirname(path), { recursive: true }); writeFileSync(path, `${JSON.stringify(next, null, 2)}\n`, "utf8"); } -export function listRegisteredKnowledgeRoots(): string[] { - return [...readKnowledgeRegistry().roots]; +export function listRegisteredKnowledgeRoots(nerveHome: string | null = null): string[] { + return [...readKnowledgeRegistry(nerveHome).roots]; } diff --git a/packages/cli/src/knowledge/sync.ts b/packages/cli/src/knowledge/sync.ts index def84db..91f5450 100644 --- a/packages/cli/src/knowledge/sync.ts +++ b/packages/cli/src/knowledge/sync.ts @@ -25,7 +25,13 @@ function loadConfig(repoRoot: string): KnowledgeConfig { return parsed.value; } -export function runKnowledgeSync(repoRoot: string): KnowledgeSyncResult { +/** + * @param nerveHomeForRegistry — when set, registers this repo under that Nerve home (for tests); default writes `~/.uncaged-nerve/data/knowledge-repos.json`. + */ +export function runKnowledgeSync( + repoRoot: string, + nerveHomeForRegistry: string | null = null, +): KnowledgeSyncResult { const config = loadConfig(repoRoot); const relFiles = listKnowledgeFiles(repoRoot, config); const inserts: Array<{ @@ -62,7 +68,7 @@ export function runKnowledgeSync(repoRoot: string): KnowledgeSyncResult { db.close(); } - registerKnowledgeRepoRoot(repoRoot); + registerKnowledgeRepoRoot(repoRoot, nerveHomeForRegistry); return { repoRoot, diff --git a/packages/cli/src/types/picomatch.d.ts b/packages/cli/src/types/picomatch.d.ts new file mode 100644 index 0000000..1b24838 --- /dev/null +++ b/packages/cli/src/types/picomatch.d.ts @@ -0,0 +1,7 @@ +declare module "picomatch" { + // biome-ignore lint/style/noDefaultExport: ambient declaration mirrors picomatch default export + export default function picomatch( + glob: string, + options?: { dot?: boolean }, + ): (input: string) => boolean; +} -- 2.43.0