feat: add knowledge-extraction BFS workflow

Three-role workflow (questioner → answerer → explorer) that iterates
over .knowledge/ cards to discover and fill knowledge gaps via BFS.

- questioner: createLlmRole, reads card, asks 3 technical questions
- answerer: spawnSafe nerve knowledge query, judges answers
- explorer: reads code, writes/patches .knowledge cards, runs sync
- moderator: BFS queue from message history, stagnation rule

Closes #266
This commit is contained in:
小橘 2026-04-30 02:27:10 +00:00
parent 8774d71d57
commit 1c512435de
13 changed files with 597 additions and 0 deletions

View File

@ -41,3 +41,6 @@ workflows:
solve-issue: solve-issue:
concurrency: 1 concurrency: 1
overflow: queue overflow: queue
knowledge-extraction:
concurrency: 1
overflow: queue

28
pnpm-lock.yaml generated
View File

@ -174,6 +174,34 @@ importers:
specifier: ^5.7.0 specifier: ^5.7.0
version: 5.9.3 version: 5.9.3
workflows/knowledge-extraction:
dependencies:
'@uncaged/nerve-adapter-cursor':
specifier: link:../../../repos/nerve/packages/adapter-cursor
version: link:../../../repos/nerve/packages/adapter-cursor
'@uncaged/nerve-adapter-hermes':
specifier: link:../../../repos/nerve/packages/adapter-hermes
version: link:../../../repos/nerve/packages/adapter-hermes
'@uncaged/nerve-core':
specifier: link:../../../repos/nerve/packages/core
version: link:../../../repos/nerve/packages/core
'@uncaged/nerve-workflow-utils':
specifier: link:../../../repos/nerve/packages/workflow-utils
version: link:../../../repos/nerve/packages/workflow-utils
zod:
specifier: ^4.3.6
version: 4.3.6
devDependencies:
'@types/node':
specifier: ^22.0.0
version: 22.19.17
esbuild:
specifier: ^0.27.0
version: 0.27.7
typescript:
specifier: ^5.7.0
version: 5.9.3
workflows/solve-issue: workflows/solve-issue:
dependencies: dependencies:
'@uncaged/nerve-adapter-cursor': '@uncaged/nerve-adapter-cursor':

View File

@ -0,0 +1,3 @@
node_modules/
dist/
false/

View File

@ -0,0 +1,31 @@
import type { AgentFn, WorkflowDefinition } from "@uncaged/nerve-core";
import type { LlmExtractorConfig } from "@uncaged/nerve-workflow-utils";
import { moderator } from "./moderator.js";
import type { WorkflowMeta } from "./moderator.js";
import { createAnswererRole } from "./roles/answerer.js";
import { createExplorerRole } from "./roles/explorer.js";
import { createQuestionerRole } from "./roles/questioner.js";
export type CreateKnowledgeExtractionDeps = {
defaultAdapter: AgentFn;
adapters?: Partial<Record<keyof WorkflowMeta, AgentFn>>;
extract: LlmExtractorConfig;
};
export function createKnowledgeExtractionWorkflow({
defaultAdapter,
adapters,
extract,
}: CreateKnowledgeExtractionDeps): WorkflowDefinition<WorkflowMeta> {
const a = (role: keyof WorkflowMeta) => adapters?.[role] ?? defaultAdapter;
return {
name: "knowledge-extraction",
roles: {
questioner: createQuestionerRole({ extract }),
answerer: createAnswererRole({ extract }),
explorer: createExplorerRole(a("explorer"), { extract }),
},
moderator,
};
}

View File

@ -0,0 +1,30 @@
import { join } from "node:path";
import { createCursorAdapter } from "@uncaged/nerve-adapter-cursor";
import { hermesAdapter } from "@uncaged/nerve-adapter-hermes";
import { createKnowledgeExtractionWorkflow } from "./build.js";
import { resolveDashScopeProvider } from "../solve-issue/lib/provider.js";
const HOME = process.env.HOME ?? "/home/azureuser";
const NERVE_ROOT = join(HOME, ".uncaged-nerve");
const provider = await resolveDashScopeProvider(NERVE_ROOT);
if (provider === null) {
throw new Error("Set DASHSCOPE_API_KEY and DASHSCOPE_BASE_URL (or cfg get equivalents)");
}
const CURSOR_TIMEOUT_MS = 300_000;
const workflow = createKnowledgeExtractionWorkflow({
defaultAdapter: hermesAdapter,
adapters: {
explorer: createCursorAdapter({
type: "cursor",
model: "auto",
timeout: CURSOR_TIMEOUT_MS,
}),
},
extract: { provider },
});
export default workflow;

View File

@ -0,0 +1,74 @@
import type { Dirent } from "node:fs";
import { readdir } from "node:fs/promises";
import { join } from "node:path";
import type { StartStep, WorkflowMessage } from "@uncaged/nerve-core";
import type { ExplorerMeta } from "../roles/explorer.js";
import type { QuestionerMeta } from "../roles/questioner.js";
async function walkMarkdownFiles(rootDir: string, base: string): Promise<string[]> {
const out: string[] = [];
let entries: Dirent[];
try {
entries = (await readdir(rootDir, { withFileTypes: true })) as Dirent[];
} catch {
return out;
}
for (const e of entries) {
const name = e.name;
const rel = base ? `${base}/${name}` : name;
const full = join(rootDir, name);
if (e.isDirectory()) {
out.push(...(await walkMarkdownFiles(full, rel)));
} else if (e.isFile() && name.endsWith(".md")) {
out.push(rel.replace(/\\/g, "/"));
}
}
return out;
}
/** Enumerate all markdown files under `.knowledge/` as repo-relative paths; seed line first if present. */
export async function bootstrapKnowledgeQueue(cwd: string, startContent: string): Promise<string[]> {
const knowledgeDir = join(cwd, ".knowledge");
const relFiles = await walkMarkdownFiles(knowledgeDir, "");
const paths = relFiles.map((f) => `.knowledge/${f}`);
const seed = startContent.trim().split(/\r?\n/u)[0]?.trim() ?? "";
if (paths.length === 0 && seed.length > 0) {
return [seed];
}
if (seed.length > 0 && paths.includes(seed)) {
return [seed, ...paths.filter((p) => p !== seed)];
}
if (seed.length > 0 && !paths.includes(seed)) {
return [seed, ...paths];
}
return [...paths].sort();
}
function lastIndexOfRole(messages: WorkflowMessage[], role: string): number {
for (let i = messages.length - 1; i >= 0; i--) {
if (messages[i].role === role) return i;
}
return -1;
}
/** Next queue for questioner: bootstrap, or continue after answerer / explorer. */
export async function resolveQueueForQuestioner(
start: StartStep,
messages: WorkflowMessage[],
cwd: string,
): Promise<string[]> {
const lastQi = lastIndexOfRole(messages, "questioner");
if (lastQi === -1) {
return bootstrapKnowledgeQueue(cwd, start.content);
}
const qMeta = messages[lastQi].meta as QuestionerMeta;
const tail = messages.slice(lastQi + 1);
const explorerMsg = tail.find((m) => m.role === "explorer");
if (explorerMsg) {
const eMeta = explorerMsg.meta as ExplorerMeta;
return [...qMeta.remaining_queue, ...eMeta.new_cards];
}
return qMeta.remaining_queue;
}

View File

@ -0,0 +1,8 @@
import type { StartStep } from "@uncaged/nerve-core";
type StartMetaWithWorkdir = StartStep["meta"] & { workdir?: string | null };
export function resolveWorkdir(start: StartStep): string {
const m = start.meta as StartMetaWithWorkdir;
return m.workdir ?? process.cwd();
}

View File

@ -0,0 +1,84 @@
import { END } from "@uncaged/nerve-core";
import type { Moderator, ModeratorContext } from "@uncaged/nerve-core";
import type { AnswererMeta } from "./roles/answerer.js";
import type { ExplorerMeta } from "./roles/explorer.js";
import type { QuestionerMeta } from "./roles/questioner.js";
export type WorkflowMeta = {
questioner: QuestionerMeta;
answerer: AnswererMeta;
explorer: ExplorerMeta;
};
type Steps = ModeratorContext<WorkflowMeta>["steps"];
function lastQuestionerRemaining(steps: Steps): QuestionerMeta | undefined {
for (let i = steps.length - 1; i >= 0; i--) {
const s = steps[i];
if (s.role === "questioner") return s.meta;
}
return undefined;
}
/** End when the last two explorer invocations both added no new cards (issue #266 stagnation rule). */
function lastTwoExplorerRunsBothEmpty(steps: Steps): boolean {
const explorerSteps = steps.filter((s) => s.role === "explorer");
if (explorerSteps.length < 2) return false;
const e1 = explorerSteps[explorerSteps.length - 1].meta as ExplorerMeta;
const e2 = explorerSteps[explorerSteps.length - 2].meta as ExplorerMeta;
return e1.new_cards.length === 0 && e2.new_cards.length === 0;
}
function queueAfterSkippedExplorer(steps: Steps): string[] {
const q = lastQuestionerRemaining(steps);
return q?.remaining_queue ?? [];
}
function queueAfterExplorerStep(steps: Steps): string[] {
const last = steps[steps.length - 1];
if (!last || last.role !== "explorer") return [];
const q = lastQuestionerRemaining(steps);
if (!q) return [];
const e = last.meta as ExplorerMeta;
return [...q.remaining_queue, ...e.new_cards];
}
export const moderator: Moderator<WorkflowMeta> = (context) => {
const { steps } = context;
if (steps.length === 0) {
return "questioner";
}
const last = steps[steps.length - 1];
if (last.role === "questioner") {
return "answerer";
}
if (last.role === "answerer") {
const am = last.meta as AnswererMeta;
if (am.has_unanswered) {
return "explorer";
}
const q = queueAfterSkippedExplorer(steps);
if (q.length === 0) {
return END;
}
return "questioner";
}
if (last.role === "explorer") {
if (lastTwoExplorerRunsBothEmpty(steps)) {
return END;
}
const q = queueAfterExplorerStep(steps);
if (q.length === 0) {
return END;
}
return "questioner";
}
return END;
};

View File

@ -0,0 +1,21 @@
{
"name": "knowledge-extraction-workflow",
"version": "0.0.1",
"private": true,
"type": "module",
"scripts": {
"build": "esbuild index.ts --bundle --platform=node --format=esm --outdir=dist --packages=external"
},
"dependencies": {
"@uncaged/nerve-adapter-cursor": "latest",
"@uncaged/nerve-adapter-hermes": "latest",
"@uncaged/nerve-core": "latest",
"@uncaged/nerve-workflow-utils": "latest",
"zod": "^4.3.6"
},
"devDependencies": {
"@types/node": "^22.0.0",
"esbuild": "^0.27.0",
"typescript": "^5.7.0"
}
}

View File

@ -0,0 +1,104 @@
import type { Role, StartStep, WorkflowMessage } from "@uncaged/nerve-core";
import type { LlmExtractorConfig } from "@uncaged/nerve-workflow-utils";
import { llmExtract, nerveCommandEnv, spawnSafe } from "@uncaged/nerve-workflow-utils";
import { z } from "zod";
import { resolveWorkdir } from "../lib/workdir.js";
import type { QuestionerMeta } from "./questioner.js";
export const answererMetaSchema = z.object({
results: z.array(
z.object({
id: z.string(),
found: z.boolean(),
source: z.string(),
note: z.string(),
}),
),
has_unanswered: z.boolean(),
});
export type AnswererMeta = z.infer<typeof answererMetaSchema>;
export type CreateAnswererRoleDeps = {
extract: LlmExtractorConfig;
};
function lastQuestionerMeta(messages: WorkflowMessage[]): QuestionerMeta | undefined {
for (let i = messages.length - 1; i >= 0; i--) {
if (messages[i].role === "questioner") {
return messages[i].meta as QuestionerMeta;
}
}
return undefined;
}
export function createAnswererRole(deps: CreateAnswererRoleDeps): Role<AnswererMeta> {
const { extract } = deps;
return async (start: StartStep, messages: WorkflowMessage[]) => {
const cwd = resolveWorkdir(start);
const qm = lastQuestionerMeta(messages);
if (!qm || qm.questions.length === 0) {
return {
content: "answerer: no questions from questioner; skipping CLI lookup.",
meta: { results: [], has_unanswered: false },
};
}
const blocks: string[] = [];
for (const q of qm.questions) {
if (start.meta.dryRun) {
blocks.push(`### ${q.id}\n[dryRun] skipped nerve knowledge query\n`);
continue;
}
const res = await spawnSafe(
"nerve",
["knowledge", "query", q.question],
{
cwd,
env: nerveCommandEnv(),
timeoutMs: 120_000,
dryRun: false,
abortSignal: null,
},
);
if (res.ok) {
blocks.push(`### ${q.id} (${q.domain})\nQuestion: ${q.question}\n---\n${res.value.stdout}\n`);
} else {
const err = res.error;
const detail =
err.kind === "non_zero_exit"
? `exit ${err.exitCode}\n${err.stderr}`
: err.kind === "timeout"
? `timeout\n${err.stderr}`
: err.kind === "spawn_failed"
? err.message
: "aborted";
blocks.push(`### ${q.id}\nnerve knowledge query failed: ${detail}\n`);
}
}
const bundle = [
"You are the **answerer**. You MUST NOT read repository source code — only the CLI retrieval excerpts below.",
"For each question id, decide whether the knowledge base already answers it.",
"Set found=true only when the excerpt supports a confident answer; otherwise found=false.",
"Set has_unanswered=true if any question remains unanswered by the knowledge base.",
"",
...blocks,
].join("\n");
const metaR = await llmExtract({
text: bundle,
schema: answererMetaSchema,
provider: extract.provider,
dryRun: start.meta.dryRun,
});
if (!metaR.ok) {
throw new Error(`answerer llmExtract: ${JSON.stringify(metaR.error)}`);
}
return { content: bundle, meta: metaR.value };
};
}

View File

@ -0,0 +1,92 @@
import type { AgentFn, Role, StartStep, WorkflowMessage } from "@uncaged/nerve-core";
import type { LlmExtractorConfig } from "@uncaged/nerve-workflow-utils";
import { createRole } from "@uncaged/nerve-workflow-utils";
import { z } from "zod";
import { resolveWorkdir } from "../lib/workdir.js";
import type { AnswererMeta } from "./answerer.js";
import type { QuestionerMeta } from "./questioner.js";
export const explorerMetaSchema = z.object({
patches: z.array(
z.object({
card: z.string(),
section: z.string(),
}),
),
new_cards: z.array(z.string()),
});
export type ExplorerMeta = z.infer<typeof explorerMetaSchema>;
export type CreateExplorerRoleDeps = {
extract: LlmExtractorConfig;
};
function lastMeta<M>(messages: WorkflowMessage[], role: string): M | undefined {
for (let i = messages.length - 1; i >= 0; i--) {
if (messages[i].role === role) {
return messages[i].meta as M;
}
}
return undefined;
}
export function explorerPrompt(start: StartStep, messages: WorkflowMessage[]): string {
const threadId = start.meta.threadId;
const qm = lastMeta<QuestionerMeta>(messages, "questioner");
const am = lastMeta<AnswererMeta>(messages, "answerer");
const cwd = resolveWorkdir(start);
const unanswered =
am?.results.filter((r) => !r.found).map((r) => r.id) ?? [];
return `You are the **explorer** in a knowledge-extraction workflow.
## Context
- Thread: \`nerve thread ${threadId}\`
- Working directory (repo root for paths): ${cwd}
- Current knowledge card (questioner): ${qm?.card ?? "(unknown)"}
## Unanswered question ids
${JSON.stringify(unanswered)}
Use the prior answerer results in the thread to map ids to full question text when you read messages above.
## Task
For each unanswered question, **read the codebase** as needed, then either:
- Add a new markdown file under \`.knowledge/\`, or
- Patch an existing card (prefer updating the card listed above when appropriate).
After any write or patch to \`.knowledge\`, run:
\`\`\`bash
nerve knowledge sync
\`\`\`
from this repo root (${cwd}), and fix failures until sync succeeds.
## Output meta
Report \`patches\` as { card, section } entries for cards you edited (section is a short heading or path hint).
Report \`new_cards\` as repo-relative paths for brand-new files you created (e.g. \`.knowledge/new-topic.md\`).
Do not claim work you did not perform.`;
}
export function createExplorerRole(
adapter: AgentFn,
{ extract }: CreateExplorerRoleDeps,
): Role<ExplorerMeta> {
return createRole(
adapter,
async (innerStart: StartStep, msgs: WorkflowMessage[]) => explorerPrompt(innerStart, msgs),
explorerMetaSchema,
extract,
);
}

View File

@ -0,0 +1,106 @@
import { readFile } from "node:fs/promises";
import { join } from "node:path";
import type { Role, StartStep, WorkflowMessage } from "@uncaged/nerve-core";
import type { LlmExtractorConfig } from "@uncaged/nerve-workflow-utils";
import { createLlmRole } from "@uncaged/nerve-workflow-utils";
import { z } from "zod";
import { resolveQueueForQuestioner } from "../lib/knowledge-queue.js";
import { resolveWorkdir } from "../lib/workdir.js";
const questionerExtractSchema = z.object({
questions: z
.array(
z.object({
id: z.string(),
question: z.string(),
domain: z.string(),
}),
)
.length(3),
});
export type QuestionerMeta = {
/** Empty when no .knowledge cards and no work to do. */
card: string;
questions: { id: string; question: string; domain: string }[];
remaining_queue: string[];
};
export type CreateQuestionerRoleDeps = {
extract: LlmExtractorConfig;
};
function questionerSystem(): string {
return `You are the **questioner** in a knowledge-extraction workflow.
Read the given markdown knowledge card. Propose exactly **three** technical questions that are **not** already answered or covered by that card.
Rules:
- Questions must be concrete and technical.
- Each question needs a stable string id (e.g. q1, q2, q3), a short domain label (e.g. routing, storage), and the question text.
- Do not assume access to other files or tools reason only from the card content shown.`;
}
function questionerUser(card: string, cardBody: string, remainingHint: string[]): string {
return `Current card path: ${card}
Remaining queue after this card (paths, may be empty): ${JSON.stringify(remainingHint)}
--- Card content ---
${cardBody}`;
}
export function createQuestionerRole(adapterExtract: CreateQuestionerRoleDeps): Role<QuestionerMeta> {
const { extract } = adapterExtract;
return async (start: StartStep, messages: WorkflowMessage[]) => {
const cwd = resolveWorkdir(start);
const queue = await resolveQueueForQuestioner(start, messages, cwd);
if (queue.length === 0) {
return {
content:
"questioner: no `.knowledge` markdown files found and no seed path in the trigger prompt; queue is empty.",
meta: {
card: "",
questions: [],
remaining_queue: [],
},
};
}
const card = queue[0]!;
const remaining_queue = queue.slice(1);
let cardBody: string;
try {
cardBody = await readFile(join(cwd, card), "utf8");
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
throw new Error(`questioner: failed to read ${card}: ${msg}`);
}
const inner = createLlmRole({
provider: extract.provider,
prompt: async () => [
{ role: "system", content: questionerSystem() },
{ role: "user", content: questionerUser(card, cardBody, remaining_queue) },
],
extract: {
schema: questionerExtractSchema,
provider: extract.provider,
},
});
const r = await inner(start, messages);
return {
content: r.content,
meta: {
card,
questions: r.meta.questions,
remaining_queue,
},
};
};
}

View File

@ -0,0 +1,13 @@
{
"compilerOptions": {
"target": "ES2022",
"lib": ["ES2022"],
"module": "NodeNext",
"moduleResolution": "NodeNext",
"strict": true,
"skipLibCheck": true,
"noEmit": true,
"types": ["node"]
},
"include": ["./**/*.ts"]
}