feat(cli,core): RFC-003 Phase 6 — Knowledge Layer + review fixes
Knowledge Layer: - knowledge.yaml parser in core (include/exclude globs) - Chunking: markdown (by heading), TypeScript/JS (by function/block) - knowledge.db: SQLite storage for chunks + embeddings (node:sqlite) - CLI: nerve knowledge sync, nerve knowledge query - Scoping: -r (specific repo), -g (global search), mutually exclusive - Repo registry (~/.nerve-knowledge-registry.json) for global search - Placeholder embedding (content hash) until remote service ready - Word-overlap similarity for query ranking Review fixes (from PR #241 feedback): - KNOWN_AGENT_ADAPTER_IDS: add cursor/hermes/codex + sync docs - collectWorkflowSpecAgentReferences: document regex comment false-positive - assertZodMetaSchemas: one-time compile-time validation utility Closes #240 Ref: #234
This commit is contained in:
@@ -23,6 +23,7 @@
|
||||
"@uncaged/nerve-core": "workspace:*",
|
||||
"@uncaged/nerve-store": "workspace:*",
|
||||
"citty": "^0.1.6",
|
||||
"picomatch": "^4.0.2",
|
||||
"yaml": "^2.8.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
@@ -7,6 +7,7 @@ import { createCommand } from "./commands/create.js";
|
||||
import { daemonCommand } from "./commands/daemon.js";
|
||||
import { devCommand } from "./commands/dev.js";
|
||||
import { initCommand } from "./commands/init.js";
|
||||
import { knowledgeCommand } from "./commands/knowledge.js";
|
||||
import { remoteCommand } from "./commands/remote.js";
|
||||
import { senseCommand } from "./commands/sense.js";
|
||||
import { storeCommand } from "./commands/store.js";
|
||||
@@ -46,6 +47,7 @@ const main = defineCommand({
|
||||
daemon: daemonCommand,
|
||||
dev: devCommand,
|
||||
validate: validateCommand,
|
||||
knowledge: knowledgeCommand,
|
||||
sense: senseCommand,
|
||||
store: storeCommand,
|
||||
remote: remoteCommand,
|
||||
|
||||
@@ -78,6 +78,7 @@ const GITIGNORE = `data/
|
||||
logs/
|
||||
nerve.pid
|
||||
node_modules/
|
||||
knowledge.db
|
||||
`;
|
||||
|
||||
const NERVE_SKILLS_MDC = `---
|
||||
|
||||
@@ -0,0 +1,79 @@
|
||||
import { existsSync } from "node:fs";
|
||||
import { resolve } from "node:path";
|
||||
|
||||
import { KNOWLEDGE_DB } from "../knowledge/paths.js";
|
||||
import { queryKnowledgeGlobal, queryKnowledgeRepo } from "../knowledge/query.js";
|
||||
import { listRegisteredKnowledgeRoots } from "../knowledge/registry.js";
|
||||
import { findKnowledgeRepoRoot } from "../knowledge/repo-root.js";
|
||||
|
||||
const DEFAULT_LIMIT = 10;
|
||||
|
||||
export function parseKnowledgeQueryLimit(raw: string | undefined): number {
|
||||
if (raw === undefined || raw.trim().length === 0) {
|
||||
return DEFAULT_LIMIT;
|
||||
}
|
||||
const n = Number.parseInt(raw, 10);
|
||||
return Number.isFinite(n) && n > 0 ? n : DEFAULT_LIMIT;
|
||||
}
|
||||
|
||||
export function runKnowledgeQueryGlobal(queryText: string, limit: number): void {
|
||||
const roots = listRegisteredKnowledgeRoots();
|
||||
if (roots.length === 0) {
|
||||
process.stderr.write(
|
||||
"❌ No registered repos — run `nerve knowledge sync` in each repo first.\n",
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
const hits = queryKnowledgeGlobal(roots, KNOWLEDGE_DB, queryText, limit);
|
||||
if (hits.length === 0) {
|
||||
process.stdout.write("No results.\n");
|
||||
return;
|
||||
}
|
||||
for (let i = 0; i < hits.length; i++) {
|
||||
const h = hits[i];
|
||||
if (h === undefined) continue;
|
||||
const prefix = h.repoRoot !== null ? `[${h.repoRoot}] ` : "";
|
||||
process.stdout.write(
|
||||
`${String(i + 1)}. score=${h.score.toFixed(4)} ${prefix}${h.path} (${h.slug})\n${h.text}\n---\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export function runKnowledgeQueryScoped(
|
||||
repoFlag: string | undefined,
|
||||
queryText: string,
|
||||
limit: number,
|
||||
): void {
|
||||
let repoRoot: string | null = null;
|
||||
if (repoFlag !== undefined && String(repoFlag).trim().length > 0) {
|
||||
repoRoot = resolve(String(repoFlag).trim());
|
||||
} else {
|
||||
repoRoot = findKnowledgeRepoRoot(process.cwd());
|
||||
}
|
||||
|
||||
if (repoRoot === null) {
|
||||
process.stderr.write("❌ No knowledge.yaml found — use -r <path> or run from a repo root.\n");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const dbPath = `${repoRoot}/${KNOWLEDGE_DB}`;
|
||||
if (!existsSync(dbPath)) {
|
||||
process.stderr.write(
|
||||
`❌ No ${KNOWLEDGE_DB} in ${repoRoot} — run \`nerve knowledge sync\` first.\n`,
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const hits = queryKnowledgeRepo(repoRoot, dbPath, queryText, limit);
|
||||
if (hits.length === 0) {
|
||||
process.stdout.write("No results.\n");
|
||||
return;
|
||||
}
|
||||
for (let i = 0; i < hits.length; i++) {
|
||||
const h = hits[i];
|
||||
if (h === undefined) continue;
|
||||
process.stdout.write(
|
||||
`${String(i + 1)}. score=${h.score.toFixed(4)} ${h.path} (${h.slug})\n${h.text}\n---\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,93 @@
|
||||
import { defineCommand } from "citty";
|
||||
|
||||
import { knowledgeQueryScopeConflictMessage } from "../knowledge/query-scope.js";
|
||||
import { findKnowledgeRepoRoot } from "../knowledge/repo-root.js";
|
||||
import { runKnowledgeSync } from "../knowledge/sync.js";
|
||||
import {
|
||||
parseKnowledgeQueryLimit,
|
||||
runKnowledgeQueryGlobal,
|
||||
runKnowledgeQueryScoped,
|
||||
} from "./knowledge-query-run.js";
|
||||
|
||||
const syncCommand = defineCommand({
|
||||
meta: {
|
||||
name: "sync",
|
||||
description: "Chunk matching files from knowledge.yaml and rebuild knowledge.db",
|
||||
},
|
||||
async run() {
|
||||
const repoRoot = findKnowledgeRepoRoot(process.cwd());
|
||||
if (repoRoot === null) {
|
||||
process.stderr.write(
|
||||
"❌ No knowledge.yaml found — run from a repo that contains knowledge.yaml.\n",
|
||||
);
|
||||
process.exit(1);
|
||||
}
|
||||
try {
|
||||
const result = runKnowledgeSync(repoRoot);
|
||||
process.stdout.write(
|
||||
`✅ Indexed ${String(result.filesIndexed)} file(s), ${String(result.chunksWritten)} chunk(s) → ${result.dbPath}\n`,
|
||||
);
|
||||
} catch (e) {
|
||||
const msg = e instanceof Error ? e.message : String(e);
|
||||
process.stderr.write(`❌ knowledge sync failed: ${msg}\n`);
|
||||
process.exit(1);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
const queryCommand = defineCommand({
|
||||
meta: {
|
||||
name: "query",
|
||||
description: "Search indexed knowledge (word overlap placeholder until embeddings)",
|
||||
},
|
||||
args: {
|
||||
query: {
|
||||
type: "positional",
|
||||
required: true,
|
||||
description: "Search text",
|
||||
},
|
||||
r: {
|
||||
type: "string",
|
||||
description: "Use knowledge.db from another repo root",
|
||||
required: false,
|
||||
},
|
||||
g: {
|
||||
type: "boolean",
|
||||
description: "Search across all repos registered via prior sync",
|
||||
default: false,
|
||||
},
|
||||
limit: {
|
||||
type: "string",
|
||||
description: "Max hits (default 10)",
|
||||
required: false,
|
||||
},
|
||||
},
|
||||
async run({ args }) {
|
||||
const conflict = knowledgeQueryScopeConflictMessage(args.r, args.g);
|
||||
if (conflict !== null) {
|
||||
process.stderr.write(`${conflict}\n`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const queryText = args.query;
|
||||
const limit = parseKnowledgeQueryLimit(args.limit);
|
||||
|
||||
if (args.g) {
|
||||
runKnowledgeQueryGlobal(queryText, limit);
|
||||
return;
|
||||
}
|
||||
|
||||
runKnowledgeQueryScoped(args.r, queryText, limit);
|
||||
},
|
||||
});
|
||||
|
||||
export const knowledgeCommand = defineCommand({
|
||||
meta: {
|
||||
name: "knowledge",
|
||||
description: "Project knowledge index (knowledge.yaml + knowledge.db, RFC-003)",
|
||||
},
|
||||
subCommands: {
|
||||
sync: syncCommand,
|
||||
query: queryCommand,
|
||||
},
|
||||
});
|
||||
@@ -0,0 +1,88 @@
|
||||
const HEADING_RE = /^(#{1,6})\s+(.+)$/;
|
||||
|
||||
export type MarkdownChunk = {
|
||||
slug: string;
|
||||
text: string;
|
||||
};
|
||||
|
||||
function slugPart(title: string): string {
|
||||
const t = title.trim().toLowerCase().replace(/\s+/g, "-");
|
||||
const safe = t.replace(/[^a-z0-9_-]+/g, "");
|
||||
return safe.length > 0 ? safe : "section";
|
||||
}
|
||||
|
||||
function splitLargeMarkdownChunk(slugBase: string, text: string): MarkdownChunk[] {
|
||||
const maxParagraphs = 24;
|
||||
const paragraphs = text.split(/\n\s*\n/).filter((p) => p.trim().length > 0);
|
||||
if (paragraphs.length <= maxParagraphs) {
|
||||
return [{ slug: slugBase, text }];
|
||||
}
|
||||
const chunks: MarkdownChunk[] = [];
|
||||
let part = 0;
|
||||
for (let i = 0; i < paragraphs.length; i += maxParagraphs) {
|
||||
const slice = paragraphs.slice(i, i + maxParagraphs).join("\n\n");
|
||||
chunks.push({ slug: `${slugBase}-part${String(part)}`, text: slice });
|
||||
part += 1;
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
function headingLineIndices(lines: string[]): number[] {
|
||||
const headingIdx: number[] = [];
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (line !== undefined && HEADING_RE.test(line)) {
|
||||
headingIdx.push(i);
|
||||
}
|
||||
}
|
||||
return headingIdx;
|
||||
}
|
||||
|
||||
function chunksFromHeadings(
|
||||
lines: string[],
|
||||
headingIdx: number[],
|
||||
baseSlug: string,
|
||||
): MarkdownChunk[] {
|
||||
const chunks: MarkdownChunk[] = [];
|
||||
const firstHead = headingIdx[0] ?? 0;
|
||||
if (firstHead > 0) {
|
||||
const preamble = lines.slice(0, firstHead).join("\n").trim();
|
||||
if (preamble.length > 0) {
|
||||
chunks.push(...splitLargeMarkdownChunk(`${baseSlug}#preamble`, preamble));
|
||||
}
|
||||
}
|
||||
|
||||
for (let h = 0; h < headingIdx.length; h++) {
|
||||
const start = headingIdx[h] ?? 0;
|
||||
const end = h + 1 < headingIdx.length ? (headingIdx[h + 1] ?? lines.length) : lines.length;
|
||||
const block = lines.slice(start, end).join("\n").trim();
|
||||
if (block.length === 0) {
|
||||
continue;
|
||||
}
|
||||
const titleLine = lines[start] ?? "";
|
||||
const ht = HEADING_RE.exec(titleLine);
|
||||
const suffix = ht !== null ? slugPart(ht[2] ?? "h") : `h${String(h)}`;
|
||||
chunks.push(...splitLargeMarkdownChunk(`${baseSlug}#${suffix}-${String(h)}`, block));
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Split Markdown by headings; long sections are split further by blank-line paragraphs.
|
||||
*/
|
||||
export function chunkMarkdown(relativePath: string, source: string): MarkdownChunk[] {
|
||||
const lines = source.split(/\r?\n/);
|
||||
const headingIdx = headingLineIndices(lines);
|
||||
const baseSlug = relativePath.replace(/\//g, "-");
|
||||
|
||||
if (headingIdx.length === 0) {
|
||||
const text = source.trim();
|
||||
if (text.length === 0) {
|
||||
return [];
|
||||
}
|
||||
return splitLargeMarkdownChunk(`${baseSlug}#doc`, text);
|
||||
}
|
||||
|
||||
const chunks = chunksFromHeadings(lines, headingIdx, baseSlug);
|
||||
return chunks;
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
export type TsJsChunk = {
|
||||
slug: string;
|
||||
text: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Line starts a function-like declaration (heuristic, no full TS parse).
|
||||
*/
|
||||
function isFunctionStartLine(line: string): boolean {
|
||||
const t = line.trimStart();
|
||||
if (/^(export\s+)?declare\s+/.test(t)) {
|
||||
return false;
|
||||
}
|
||||
if (/^(export\s+)?(async\s+)?function\s+[A-Za-z_$][\w$]*\s*\(/.test(t)) {
|
||||
return true;
|
||||
}
|
||||
if (/^(export\s+)?const\s+[A-Za-z_$][\w$]*\s*=\s*(async\s*)?\(/.test(t)) {
|
||||
return true;
|
||||
}
|
||||
if (/^(export\s+)?const\s+[A-Za-z_$][\w$]*\s*=\s*async\s+function/.test(t)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function slugPart(name: string): string {
|
||||
const safe = name.replace(/[^\w$-]+/g, "-").toLowerCase();
|
||||
return safe.length > 0 ? safe : "block";
|
||||
}
|
||||
|
||||
function extractRoughName(firstLine: string): string {
|
||||
const m =
|
||||
/function\s+([A-Za-z_$][\w$]*)/.exec(firstLine) ?? /const\s+([A-Za-z_$][\w$]*)/.exec(firstLine);
|
||||
return m !== null && m[1] !== undefined ? m[1] : "fn";
|
||||
}
|
||||
|
||||
/**
|
||||
* Split `.ts` / `.js` by top-level function-like lines; falls back to paragraph chunks.
|
||||
*/
|
||||
export function chunkTypeScriptOrJavaScript(relativePath: string, source: string): TsJsChunk[] {
|
||||
const baseSlug = relativePath.replace(/\./g, "-").replace(/\//g, "-");
|
||||
const lines = source.split(/\r?\n/);
|
||||
const starts: number[] = [];
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (line !== undefined && isFunctionStartLine(line)) {
|
||||
starts.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (starts.length === 0) {
|
||||
return paragraphFallbackChunks(baseSlug, source);
|
||||
}
|
||||
|
||||
const chunks: TsJsChunk[] = [];
|
||||
for (let s = 0; s < starts.length; s++) {
|
||||
const start = starts[s] ?? 0;
|
||||
const end = s + 1 < starts.length ? (starts[s + 1] ?? lines.length) : lines.length;
|
||||
const block = lines.slice(start, end).join("\n").trim();
|
||||
if (block.length === 0) {
|
||||
continue;
|
||||
}
|
||||
const first = lines[start] ?? "";
|
||||
const name = extractRoughName(first);
|
||||
chunks.push({
|
||||
slug: `${baseSlug}#${slugPart(name)}-${String(s)}`,
|
||||
text: block,
|
||||
});
|
||||
}
|
||||
|
||||
return chunks.length > 0 ? chunks : paragraphFallbackChunks(baseSlug, source);
|
||||
}
|
||||
|
||||
function paragraphFallbackChunks(baseSlug: string, source: string): TsJsChunk[] {
|
||||
const text = source.trim();
|
||||
if (text.length === 0) {
|
||||
return [];
|
||||
}
|
||||
const parts = text.split(/\n\s*\n/).filter((p) => p.trim().length > 0);
|
||||
if (parts.length === 0) {
|
||||
return [{ slug: `${baseSlug}#0`, text }];
|
||||
}
|
||||
return parts.map((p, i) => ({
|
||||
slug: `${baseSlug}#para-${String(i)}`,
|
||||
text: p.trim(),
|
||||
}));
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
import { chunkMarkdown } from "./chunk-markdown.js";
|
||||
import { chunkTypeScriptOrJavaScript } from "./chunk-typescript.js";
|
||||
|
||||
export type KnowledgeChunk = {
|
||||
slug: string;
|
||||
text: string;
|
||||
};
|
||||
|
||||
export function chunkKnowledgeFile(relativePath: string, source: string): KnowledgeChunk[] {
|
||||
const lower = relativePath.toLowerCase();
|
||||
if (lower.endsWith(".md")) {
|
||||
return chunkMarkdown(relativePath, source);
|
||||
}
|
||||
if (
|
||||
lower.endsWith(".ts") ||
|
||||
lower.endsWith(".tsx") ||
|
||||
lower.endsWith(".js") ||
|
||||
lower.endsWith(".jsx")
|
||||
) {
|
||||
return chunkTypeScriptOrJavaScript(relativePath, source);
|
||||
}
|
||||
return [{ slug: `${relativePath.replace(/\//g, "-")}#0`, text: source.trim() }];
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
import picomatch from "picomatch";
|
||||
|
||||
const PICOMATCH_OPTS = { dot: true } as const;
|
||||
|
||||
/**
|
||||
* True if `relativePosixPath` matches any exclude glob (POSIX slashes).
|
||||
*/
|
||||
export function matchesKnowledgeExclude(
|
||||
relativePosixPath: string,
|
||||
excludePatterns: ReadonlyArray<string>,
|
||||
): boolean {
|
||||
for (const pattern of excludePatterns) {
|
||||
const isMatch = picomatch(pattern, PICOMATCH_OPTS);
|
||||
if (isMatch(relativePosixPath)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
import { createHash } from "node:crypto";
|
||||
|
||||
/** Deterministic placeholder embedding bytes until a remote embedding service exists (RFC-003). */
|
||||
export function fakeEmbeddingBytes(text: string): Buffer {
|
||||
const hash = createHash("sha256").update(text, "utf8").digest();
|
||||
return Buffer.concat([hash, hash, hash, hash]);
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
import { globSync, statSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
import type { KnowledgeConfig } from "@uncaged/nerve-core";
|
||||
|
||||
import { matchesKnowledgeExclude } from "./exclude-match.js";
|
||||
|
||||
function toPosix(rel: string): string {
|
||||
return rel.split("\\").join("/");
|
||||
}
|
||||
|
||||
function isFileUnderRoot(repoRoot: string, rel: string): boolean {
|
||||
try {
|
||||
return statSync(join(repoRoot, rel)).isFile();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Files matched by `include` globs minus `exclude` globs, relative POSIX paths, sorted.
|
||||
*/
|
||||
export function listKnowledgeFiles(repoRoot: string, config: KnowledgeConfig): string[] {
|
||||
const matched = new Set<string>();
|
||||
for (const pattern of config.include) {
|
||||
const paths = globSync(pattern, {
|
||||
cwd: repoRoot,
|
||||
windowsPathsNoEscape: true,
|
||||
});
|
||||
for (const rel of paths) {
|
||||
const posix = toPosix(rel);
|
||||
if (!isFileUnderRoot(repoRoot, posix)) {
|
||||
continue;
|
||||
}
|
||||
if (matchesKnowledgeExclude(posix, config.exclude)) {
|
||||
continue;
|
||||
}
|
||||
matched.add(posix);
|
||||
}
|
||||
}
|
||||
return [...matched].sort();
|
||||
}
|
||||
@@ -0,0 +1,96 @@
|
||||
import { createHash } from "node:crypto";
|
||||
import { DatabaseSync } from "node:sqlite";
|
||||
|
||||
import { fakeEmbeddingBytes } from "./fake-embedding.js";
|
||||
|
||||
export type KnowledgeChunkRow = {
|
||||
path: string;
|
||||
slug: string;
|
||||
chunkIndex: number;
|
||||
text: string;
|
||||
embedding: Buffer;
|
||||
contentHash: string;
|
||||
};
|
||||
|
||||
export type KnowledgeChunkInsert = {
|
||||
path: string;
|
||||
slug: string;
|
||||
chunkIndex: number;
|
||||
text: string;
|
||||
contentHash: string;
|
||||
};
|
||||
|
||||
const SCHEMA = `
|
||||
CREATE TABLE IF NOT EXISTS chunks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
path TEXT NOT NULL,
|
||||
chunk_index INTEGER NOT NULL,
|
||||
slug TEXT NOT NULL,
|
||||
text TEXT NOT NULL,
|
||||
embedding BLOB NOT NULL,
|
||||
content_hash TEXT NOT NULL,
|
||||
UNIQUE(path, chunk_index)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);
|
||||
`;
|
||||
|
||||
export function openKnowledgeDb(dbPath: string): DatabaseSync {
|
||||
const db = new DatabaseSync(dbPath);
|
||||
db.exec(SCHEMA);
|
||||
return db;
|
||||
}
|
||||
|
||||
export function contentHash(text: string): string {
|
||||
return createHash("sha256").update(text, "utf8").digest("hex");
|
||||
}
|
||||
|
||||
export function replaceAllChunks(db: DatabaseSync, rows: KnowledgeChunkInsert[]): void {
|
||||
db.exec("BEGIN IMMEDIATE");
|
||||
try {
|
||||
db.prepare("DELETE FROM chunks").run();
|
||||
const insert = db.prepare(
|
||||
`INSERT INTO chunks (path, chunk_index, slug, text, embedding, content_hash)
|
||||
VALUES (@path, @chunk_index, @slug, @text, @embedding, @content_hash)`,
|
||||
);
|
||||
for (let i = 0; i < rows.length; i++) {
|
||||
const row = rows[i];
|
||||
if (row === undefined) continue;
|
||||
const emb = fakeEmbeddingBytes(row.text);
|
||||
insert.run({
|
||||
path: row.path,
|
||||
chunk_index: row.chunkIndex,
|
||||
slug: row.slug,
|
||||
text: row.text,
|
||||
embedding: emb,
|
||||
content_hash: row.contentHash,
|
||||
});
|
||||
}
|
||||
db.exec("COMMIT");
|
||||
} catch (e) {
|
||||
db.exec("ROLLBACK");
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
export function loadAllChunks(db: DatabaseSync): KnowledgeChunkRow[] {
|
||||
const stmt = db.prepare(
|
||||
"SELECT path, chunk_index, slug, text, embedding, content_hash FROM chunks ORDER BY path, chunk_index",
|
||||
);
|
||||
const rows = stmt.all() as Array<{
|
||||
path: string;
|
||||
chunk_index: number;
|
||||
slug: string;
|
||||
text: string;
|
||||
embedding: Buffer;
|
||||
content_hash: string;
|
||||
}>;
|
||||
return rows.map((r) => ({
|
||||
path: r.path,
|
||||
slug: r.slug,
|
||||
chunkIndex: r.chunk_index,
|
||||
text: r.text,
|
||||
embedding: r.embedding,
|
||||
contentHash: r.content_hash,
|
||||
}));
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
export const KNOWLEDGE_YAML = "knowledge.yaml";
|
||||
export const KNOWLEDGE_DB = "knowledge.db";
|
||||
@@ -0,0 +1,13 @@
|
||||
/**
|
||||
* `-r` and `-g` are mutually exclusive for `nerve knowledge query`.
|
||||
*/
|
||||
export function knowledgeQueryScopeConflictMessage(
|
||||
repoFlag: string | null | undefined,
|
||||
globalFlag: boolean,
|
||||
): string | null {
|
||||
const hasR = repoFlag !== undefined && repoFlag !== null && String(repoFlag).trim().length > 0;
|
||||
if (hasR && globalFlag) {
|
||||
return "❌ Use either -r <path> or -g, not both.";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
import { existsSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
import type { KnowledgeChunkRow } from "./knowledge-db.js";
|
||||
import { loadAllChunks, openKnowledgeDb } from "./knowledge-db.js";
|
||||
import { wordOverlapScore } from "./word-overlap.js";
|
||||
|
||||
export type KnowledgeQueryHit = {
|
||||
repoRoot: string | null;
|
||||
path: string;
|
||||
slug: string;
|
||||
text: string;
|
||||
score: number;
|
||||
};
|
||||
|
||||
export function rankChunksByWordOverlap(
|
||||
query: string,
|
||||
chunks: KnowledgeChunkRow[],
|
||||
limit: number,
|
||||
): Array<{ chunk: KnowledgeChunkRow; score: number }> {
|
||||
const scored = chunks.map((chunk) => ({
|
||||
chunk,
|
||||
score: wordOverlapScore(query, `${chunk.text}\n${chunk.path}`),
|
||||
}));
|
||||
scored.sort((a, b) => b.score - a.score);
|
||||
return scored.slice(0, limit);
|
||||
}
|
||||
|
||||
export function queryKnowledgeRepo(
|
||||
repoRoot: string,
|
||||
dbPath: string,
|
||||
queryText: string,
|
||||
limit: number,
|
||||
): KnowledgeQueryHit[] {
|
||||
const db = openKnowledgeDb(dbPath);
|
||||
try {
|
||||
const rows = loadAllChunks(db);
|
||||
const ranked = rankChunksByWordOverlap(queryText, rows, limit);
|
||||
return ranked.map((r) => ({
|
||||
repoRoot,
|
||||
path: r.chunk.path,
|
||||
slug: r.chunk.slug,
|
||||
text: r.chunk.text,
|
||||
score: r.score,
|
||||
}));
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
}
|
||||
|
||||
export function queryKnowledgeGlobal(
|
||||
repoRoots: ReadonlyArray<string>,
|
||||
dbFileName: string,
|
||||
queryText: string,
|
||||
limit: number,
|
||||
): KnowledgeQueryHit[] {
|
||||
const combined: KnowledgeQueryHit[] = [];
|
||||
for (const root of repoRoots) {
|
||||
const dbPath = join(root, dbFileName);
|
||||
if (!existsSync(dbPath)) {
|
||||
continue;
|
||||
}
|
||||
const db = openKnowledgeDb(dbPath);
|
||||
try {
|
||||
const rows = loadAllChunks(db);
|
||||
const ranked = rankChunksByWordOverlap(queryText, rows, limit);
|
||||
for (const r of ranked) {
|
||||
combined.push({
|
||||
repoRoot: root,
|
||||
path: r.chunk.path,
|
||||
slug: r.chunk.slug,
|
||||
text: r.chunk.text,
|
||||
score: r.score,
|
||||
});
|
||||
}
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
}
|
||||
combined.sort((a, b) => b.score - a.score);
|
||||
return combined.slice(0, limit);
|
||||
}
|
||||
@@ -0,0 +1,55 @@
|
||||
import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { dirname } from "node:path";
|
||||
|
||||
import { getNerveRoot } from "../workspace.js";
|
||||
|
||||
export type KnowledgeRepoRegistry = {
|
||||
roots: ReadonlyArray<string>;
|
||||
};
|
||||
|
||||
const FILE_NAME = "knowledge-repos.json";
|
||||
|
||||
export function getKnowledgeRegistryPath(): string {
|
||||
return `${getNerveRoot()}/data/${FILE_NAME}`;
|
||||
}
|
||||
|
||||
function defaultRegistry(): KnowledgeRepoRegistry {
|
||||
return { roots: [] };
|
||||
}
|
||||
|
||||
export function readKnowledgeRegistry(): KnowledgeRepoRegistry {
|
||||
const path = getKnowledgeRegistryPath();
|
||||
try {
|
||||
const raw = readFileSync(path, "utf8");
|
||||
const parsed: unknown = JSON.parse(raw);
|
||||
if (
|
||||
typeof parsed === "object" &&
|
||||
parsed !== null &&
|
||||
"roots" in parsed &&
|
||||
Array.isArray(parsed.roots)
|
||||
) {
|
||||
const roots = parsed.roots.filter((x): x is string => typeof x === "string");
|
||||
return { roots: [...new Set(roots)].sort() };
|
||||
}
|
||||
} catch {
|
||||
// missing or invalid — treat as empty
|
||||
}
|
||||
return defaultRegistry();
|
||||
}
|
||||
|
||||
export function registerKnowledgeRepoRoot(repoRootAbsolute: string): void {
|
||||
const resolved = repoRootAbsolute.trim();
|
||||
if (resolved.length === 0) {
|
||||
return;
|
||||
}
|
||||
const prev = readKnowledgeRegistry();
|
||||
const nextRoots = [...new Set([...prev.roots, resolved])].sort();
|
||||
const next: KnowledgeRepoRegistry = { roots: nextRoots };
|
||||
const path = getKnowledgeRegistryPath();
|
||||
mkdirSync(dirname(path), { recursive: true });
|
||||
writeFileSync(path, `${JSON.stringify(next, null, 2)}\n`, "utf8");
|
||||
}
|
||||
|
||||
export function listRegisteredKnowledgeRoots(): string[] {
|
||||
return [...readKnowledgeRegistry().roots];
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
import { existsSync } from "node:fs";
|
||||
import { dirname, join, resolve } from "node:path";
|
||||
|
||||
import { KNOWLEDGE_YAML } from "./paths.js";
|
||||
|
||||
/**
|
||||
* Walk upward from `startDir` until `knowledge.yaml` exists.
|
||||
*/
|
||||
export function findKnowledgeRepoRoot(startDir: string): string | null {
|
||||
let dir = resolve(startDir);
|
||||
while (true) {
|
||||
if (existsSync(join(dir, KNOWLEDGE_YAML))) {
|
||||
return dir;
|
||||
}
|
||||
const parent = dirname(dir);
|
||||
if (parent === dir) {
|
||||
return null;
|
||||
}
|
||||
dir = parent;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
import { readFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
|
||||
import { type KnowledgeConfig, parseKnowledgeYaml } from "@uncaged/nerve-core";
|
||||
|
||||
import { chunkKnowledgeFile } from "./chunk.js";
|
||||
import { listKnowledgeFiles } from "./glob-files.js";
|
||||
import { contentHash, openKnowledgeDb, replaceAllChunks } from "./knowledge-db.js";
|
||||
import { KNOWLEDGE_DB, KNOWLEDGE_YAML } from "./paths.js";
|
||||
import { registerKnowledgeRepoRoot } from "./registry.js";
|
||||
|
||||
export type KnowledgeSyncResult = {
|
||||
repoRoot: string;
|
||||
dbPath: string;
|
||||
filesIndexed: number;
|
||||
chunksWritten: number;
|
||||
};
|
||||
|
||||
function loadConfig(repoRoot: string): KnowledgeConfig {
|
||||
const raw = readFileSync(join(repoRoot, KNOWLEDGE_YAML), "utf8");
|
||||
const parsed = parseKnowledgeYaml(raw);
|
||||
if (!parsed.ok) {
|
||||
throw parsed.error;
|
||||
}
|
||||
return parsed.value;
|
||||
}
|
||||
|
||||
export function runKnowledgeSync(repoRoot: string): KnowledgeSyncResult {
|
||||
const config = loadConfig(repoRoot);
|
||||
const relFiles = listKnowledgeFiles(repoRoot, config);
|
||||
const inserts: Array<{
|
||||
path: string;
|
||||
slug: string;
|
||||
chunkIndex: number;
|
||||
text: string;
|
||||
contentHash: string;
|
||||
}> = [];
|
||||
|
||||
for (const rel of relFiles) {
|
||||
const abs = join(repoRoot, rel);
|
||||
const source = readFileSync(abs, "utf8");
|
||||
const chunks = chunkKnowledgeFile(rel, source);
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
const ch = chunks[i];
|
||||
if (ch === undefined) continue;
|
||||
const text = ch.text;
|
||||
inserts.push({
|
||||
path: rel,
|
||||
slug: ch.slug,
|
||||
chunkIndex: i,
|
||||
text,
|
||||
contentHash: contentHash(text),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const dbPath = join(repoRoot, KNOWLEDGE_DB);
|
||||
const db = openKnowledgeDb(dbPath);
|
||||
try {
|
||||
replaceAllChunks(db, inserts);
|
||||
} finally {
|
||||
db.close();
|
||||
}
|
||||
|
||||
registerKnowledgeRepoRoot(repoRoot);
|
||||
|
||||
return {
|
||||
repoRoot,
|
||||
dbPath,
|
||||
filesIndexed: relFiles.length,
|
||||
chunksWritten: inserts.length,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
function tokenize(s: string): Set<string> {
|
||||
const parts = s
|
||||
.toLowerCase()
|
||||
.split(/[^\w]+/)
|
||||
.filter((x) => x.length > 0);
|
||||
return new Set(parts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Jaccard-like score over word sets (placeholder until real embeddings; RFC-003).
|
||||
*/
|
||||
export function wordOverlapScore(query: string, document: string): number {
|
||||
const q = tokenize(query);
|
||||
const d = tokenize(document);
|
||||
if (q.size === 0) {
|
||||
return 0;
|
||||
}
|
||||
let inter = 0;
|
||||
for (const w of q) {
|
||||
if (d.has(w)) {
|
||||
inter += 1;
|
||||
}
|
||||
}
|
||||
const union = q.size + d.size - inter;
|
||||
return union === 0 ? 0 : inter / union;
|
||||
}
|
||||
@@ -8,7 +8,12 @@ import { join } from "node:path";
|
||||
import type { NerveConfig } from "@uncaged/nerve-core";
|
||||
import { KNOWN_AGENT_ADAPTER_IDS } from "@uncaged/nerve-core";
|
||||
|
||||
/** Matches RoleSpec `agent: "name"` / `agent: 'name'` in workflow TypeScript sources. */
|
||||
/**
|
||||
* Matches RoleSpec `agent: "name"` / `agent: 'name'` in workflow TypeScript sources.
|
||||
* NOTE: This regex can match occurrences inside comments. For current usage (validation
|
||||
* hint) this is acceptable — false positives just trigger a "missing agent" warning that
|
||||
* the user can ignore. If precision becomes important, switch to AST-based extraction.
|
||||
*/
|
||||
const WORKFLOW_SPEC_AGENT_PATTERN = /agent:\s*["']([^"']+)["']/g;
|
||||
|
||||
function collectTsSourceFiles(dir: string, acc: string[]): void {
|
||||
|
||||
Reference in New Issue
Block a user