diff --git a/nerve.yaml b/nerve.yaml index fafca2d..0e2ccc2 100644 --- a/nerve.yaml +++ b/nerve.yaml @@ -41,3 +41,6 @@ workflows: solve-issue: concurrency: 1 overflow: queue + knowledge-extraction: + concurrency: 1 + overflow: queue diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4ba752f..a0abac9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -174,6 +174,34 @@ importers: specifier: ^5.7.0 version: 5.9.3 + workflows/knowledge-extraction: + dependencies: + '@uncaged/nerve-adapter-cursor': + specifier: link:../../../repos/nerve/packages/adapter-cursor + version: link:../../../repos/nerve/packages/adapter-cursor + '@uncaged/nerve-adapter-hermes': + specifier: link:../../../repos/nerve/packages/adapter-hermes + version: link:../../../repos/nerve/packages/adapter-hermes + '@uncaged/nerve-core': + specifier: link:../../../repos/nerve/packages/core + version: link:../../../repos/nerve/packages/core + '@uncaged/nerve-workflow-utils': + specifier: link:../../../repos/nerve/packages/workflow-utils + version: link:../../../repos/nerve/packages/workflow-utils + zod: + specifier: ^4.3.6 + version: 4.3.6 + devDependencies: + '@types/node': + specifier: ^22.0.0 + version: 22.19.17 + esbuild: + specifier: ^0.27.0 + version: 0.27.7 + typescript: + specifier: ^5.7.0 + version: 5.9.3 + workflows/solve-issue: dependencies: '@uncaged/nerve-adapter-cursor': diff --git a/workflows/knowledge-extraction/.gitignore b/workflows/knowledge-extraction/.gitignore new file mode 100644 index 0000000..2194e3f --- /dev/null +++ b/workflows/knowledge-extraction/.gitignore @@ -0,0 +1,3 @@ +node_modules/ +dist/ +false/ diff --git a/workflows/knowledge-extraction/build.ts b/workflows/knowledge-extraction/build.ts new file mode 100644 index 0000000..992a64b --- /dev/null +++ b/workflows/knowledge-extraction/build.ts @@ -0,0 +1,31 @@ +import type { AgentFn, WorkflowDefinition } from "@uncaged/nerve-core"; +import type { LlmExtractorConfig } from "@uncaged/nerve-workflow-utils"; + +import { moderator } from "./moderator.js"; +import type { WorkflowMeta } from "./moderator.js"; +import { createAnswererRole } from "./roles/answerer.js"; +import { createExplorerRole } from "./roles/explorer.js"; +import { createQuestionerRole } from "./roles/questioner.js"; + +export type CreateKnowledgeExtractionDeps = { + defaultAdapter: AgentFn; + adapters?: Partial>; + extract: LlmExtractorConfig; +}; + +export function createKnowledgeExtractionWorkflow({ + defaultAdapter, + adapters, + extract, +}: CreateKnowledgeExtractionDeps): WorkflowDefinition { + const a = (role: keyof WorkflowMeta) => adapters?.[role] ?? defaultAdapter; + return { + name: "knowledge-extraction", + roles: { + questioner: createQuestionerRole({ extract }), + answerer: createAnswererRole({ extract }), + explorer: createExplorerRole(a("explorer"), { extract }), + }, + moderator, + }; +} diff --git a/workflows/knowledge-extraction/index.ts b/workflows/knowledge-extraction/index.ts new file mode 100644 index 0000000..1812f03 --- /dev/null +++ b/workflows/knowledge-extraction/index.ts @@ -0,0 +1,30 @@ +import { join } from "node:path"; +import { createCursorAdapter } from "@uncaged/nerve-adapter-cursor"; +import { hermesAdapter } from "@uncaged/nerve-adapter-hermes"; +import { createKnowledgeExtractionWorkflow } from "./build.js"; +import { resolveDashScopeProvider } from "../solve-issue/lib/provider.js"; + +const HOME = process.env.HOME ?? "/home/azureuser"; +const NERVE_ROOT = join(HOME, ".uncaged-nerve"); + +const provider = await resolveDashScopeProvider(NERVE_ROOT); + +if (provider === null) { + throw new Error("Set DASHSCOPE_API_KEY and DASHSCOPE_BASE_URL (or cfg get equivalents)"); +} + +const CURSOR_TIMEOUT_MS = 300_000; + +const workflow = createKnowledgeExtractionWorkflow({ + defaultAdapter: hermesAdapter, + adapters: { + explorer: createCursorAdapter({ + type: "cursor", + model: "auto", + timeout: CURSOR_TIMEOUT_MS, + }), + }, + extract: { provider }, +}); + +export default workflow; diff --git a/workflows/knowledge-extraction/lib/knowledge-queue.ts b/workflows/knowledge-extraction/lib/knowledge-queue.ts new file mode 100644 index 0000000..899314a --- /dev/null +++ b/workflows/knowledge-extraction/lib/knowledge-queue.ts @@ -0,0 +1,74 @@ +import type { Dirent } from "node:fs"; +import { readdir } from "node:fs/promises"; +import { join } from "node:path"; + +import type { StartStep, WorkflowMessage } from "@uncaged/nerve-core"; + +import type { ExplorerMeta } from "../roles/explorer.js"; +import type { QuestionerMeta } from "../roles/questioner.js"; + +async function walkMarkdownFiles(rootDir: string, base: string): Promise { + const out: string[] = []; + let entries: Dirent[]; + try { + entries = (await readdir(rootDir, { withFileTypes: true })) as Dirent[]; + } catch { + return out; + } + for (const e of entries) { + const name = e.name; + const rel = base ? `${base}/${name}` : name; + const full = join(rootDir, name); + if (e.isDirectory()) { + out.push(...(await walkMarkdownFiles(full, rel))); + } else if (e.isFile() && name.endsWith(".md")) { + out.push(rel.replace(/\\/g, "/")); + } + } + return out; +} + +/** Enumerate all markdown files under `.knowledge/` as repo-relative paths; seed line first if present. */ +export async function bootstrapKnowledgeQueue(cwd: string, startContent: string): Promise { + const knowledgeDir = join(cwd, ".knowledge"); + const relFiles = await walkMarkdownFiles(knowledgeDir, ""); + const paths = relFiles.map((f) => `.knowledge/${f}`); + const seed = startContent.trim().split(/\r?\n/u)[0]?.trim() ?? ""; + if (paths.length === 0 && seed.length > 0) { + return [seed]; + } + if (seed.length > 0 && paths.includes(seed)) { + return [seed, ...paths.filter((p) => p !== seed)]; + } + if (seed.length > 0 && !paths.includes(seed)) { + return [seed, ...paths]; + } + return [...paths].sort(); +} + +function lastIndexOfRole(messages: WorkflowMessage[], role: string): number { + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i].role === role) return i; + } + return -1; +} + +/** Next queue for questioner: bootstrap, or continue after answerer / explorer. */ +export async function resolveQueueForQuestioner( + start: StartStep, + messages: WorkflowMessage[], + cwd: string, +): Promise { + const lastQi = lastIndexOfRole(messages, "questioner"); + if (lastQi === -1) { + return bootstrapKnowledgeQueue(cwd, start.content); + } + const qMeta = messages[lastQi].meta as QuestionerMeta; + const tail = messages.slice(lastQi + 1); + const explorerMsg = tail.find((m) => m.role === "explorer"); + if (explorerMsg) { + const eMeta = explorerMsg.meta as ExplorerMeta; + return [...qMeta.remaining_queue, ...eMeta.new_cards]; + } + return qMeta.remaining_queue; +} diff --git a/workflows/knowledge-extraction/lib/workdir.ts b/workflows/knowledge-extraction/lib/workdir.ts new file mode 100644 index 0000000..28bef95 --- /dev/null +++ b/workflows/knowledge-extraction/lib/workdir.ts @@ -0,0 +1,8 @@ +import type { StartStep } from "@uncaged/nerve-core"; + +type StartMetaWithWorkdir = StartStep["meta"] & { workdir?: string | null }; + +export function resolveWorkdir(start: StartStep): string { + const m = start.meta as StartMetaWithWorkdir; + return m.workdir ?? process.cwd(); +} diff --git a/workflows/knowledge-extraction/moderator.ts b/workflows/knowledge-extraction/moderator.ts new file mode 100644 index 0000000..019e162 --- /dev/null +++ b/workflows/knowledge-extraction/moderator.ts @@ -0,0 +1,84 @@ +import { END } from "@uncaged/nerve-core"; +import type { Moderator, ModeratorContext } from "@uncaged/nerve-core"; + +import type { AnswererMeta } from "./roles/answerer.js"; +import type { ExplorerMeta } from "./roles/explorer.js"; +import type { QuestionerMeta } from "./roles/questioner.js"; + +export type WorkflowMeta = { + questioner: QuestionerMeta; + answerer: AnswererMeta; + explorer: ExplorerMeta; +}; + +type Steps = ModeratorContext["steps"]; + +function lastQuestionerRemaining(steps: Steps): QuestionerMeta | undefined { + for (let i = steps.length - 1; i >= 0; i--) { + const s = steps[i]; + if (s.role === "questioner") return s.meta; + } + return undefined; +} + +/** End when the last two explorer invocations both added no new cards (issue #266 stagnation rule). */ +function lastTwoExplorerRunsBothEmpty(steps: Steps): boolean { + const explorerSteps = steps.filter((s) => s.role === "explorer"); + if (explorerSteps.length < 2) return false; + const e1 = explorerSteps[explorerSteps.length - 1].meta as ExplorerMeta; + const e2 = explorerSteps[explorerSteps.length - 2].meta as ExplorerMeta; + return e1.new_cards.length === 0 && e2.new_cards.length === 0; +} + +function queueAfterSkippedExplorer(steps: Steps): string[] { + const q = lastQuestionerRemaining(steps); + return q?.remaining_queue ?? []; +} + +function queueAfterExplorerStep(steps: Steps): string[] { + const last = steps[steps.length - 1]; + if (!last || last.role !== "explorer") return []; + const q = lastQuestionerRemaining(steps); + if (!q) return []; + const e = last.meta as ExplorerMeta; + return [...q.remaining_queue, ...e.new_cards]; +} + +export const moderator: Moderator = (context) => { + const { steps } = context; + + if (steps.length === 0) { + return "questioner"; + } + + const last = steps[steps.length - 1]; + + if (last.role === "questioner") { + return "answerer"; + } + + if (last.role === "answerer") { + const am = last.meta as AnswererMeta; + if (am.has_unanswered) { + return "explorer"; + } + const q = queueAfterSkippedExplorer(steps); + if (q.length === 0) { + return END; + } + return "questioner"; + } + + if (last.role === "explorer") { + if (lastTwoExplorerRunsBothEmpty(steps)) { + return END; + } + const q = queueAfterExplorerStep(steps); + if (q.length === 0) { + return END; + } + return "questioner"; + } + + return END; +}; diff --git a/workflows/knowledge-extraction/package.json b/workflows/knowledge-extraction/package.json new file mode 100644 index 0000000..182e008 --- /dev/null +++ b/workflows/knowledge-extraction/package.json @@ -0,0 +1,21 @@ +{ + "name": "knowledge-extraction-workflow", + "version": "0.0.1", + "private": true, + "type": "module", + "scripts": { + "build": "esbuild index.ts --bundle --platform=node --format=esm --outdir=dist --packages=external" + }, + "dependencies": { + "@uncaged/nerve-adapter-cursor": "latest", + "@uncaged/nerve-adapter-hermes": "latest", + "@uncaged/nerve-core": "latest", + "@uncaged/nerve-workflow-utils": "latest", + "zod": "^4.3.6" + }, + "devDependencies": { + "@types/node": "^22.0.0", + "esbuild": "^0.27.0", + "typescript": "^5.7.0" + } +} diff --git a/workflows/knowledge-extraction/roles/answerer.ts b/workflows/knowledge-extraction/roles/answerer.ts new file mode 100644 index 0000000..fdf93cd --- /dev/null +++ b/workflows/knowledge-extraction/roles/answerer.ts @@ -0,0 +1,104 @@ +import type { Role, StartStep, WorkflowMessage } from "@uncaged/nerve-core"; +import type { LlmExtractorConfig } from "@uncaged/nerve-workflow-utils"; +import { llmExtract, nerveCommandEnv, spawnSafe } from "@uncaged/nerve-workflow-utils"; +import { z } from "zod"; + +import { resolveWorkdir } from "../lib/workdir.js"; + +import type { QuestionerMeta } from "./questioner.js"; + +export const answererMetaSchema = z.object({ + results: z.array( + z.object({ + id: z.string(), + found: z.boolean(), + source: z.string(), + note: z.string(), + }), + ), + has_unanswered: z.boolean(), +}); + +export type AnswererMeta = z.infer; + +export type CreateAnswererRoleDeps = { + extract: LlmExtractorConfig; +}; + +function lastQuestionerMeta(messages: WorkflowMessage[]): QuestionerMeta | undefined { + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i].role === "questioner") { + return messages[i].meta as QuestionerMeta; + } + } + return undefined; +} + +export function createAnswererRole(deps: CreateAnswererRoleDeps): Role { + const { extract } = deps; + + return async (start: StartStep, messages: WorkflowMessage[]) => { + const cwd = resolveWorkdir(start); + const qm = lastQuestionerMeta(messages); + if (!qm || qm.questions.length === 0) { + return { + content: "answerer: no questions from questioner; skipping CLI lookup.", + meta: { results: [], has_unanswered: false }, + }; + } + + const blocks: string[] = []; + for (const q of qm.questions) { + if (start.meta.dryRun) { + blocks.push(`### ${q.id}\n[dryRun] skipped nerve knowledge query\n`); + continue; + } + const res = await spawnSafe( + "nerve", + ["knowledge", "query", q.question], + { + cwd, + env: nerveCommandEnv(), + timeoutMs: 120_000, + dryRun: false, + abortSignal: null, + }, + ); + if (res.ok) { + blocks.push(`### ${q.id} (${q.domain})\nQuestion: ${q.question}\n---\n${res.value.stdout}\n`); + } else { + const err = res.error; + const detail = + err.kind === "non_zero_exit" + ? `exit ${err.exitCode}\n${err.stderr}` + : err.kind === "timeout" + ? `timeout\n${err.stderr}` + : err.kind === "spawn_failed" + ? err.message + : "aborted"; + blocks.push(`### ${q.id}\nnerve knowledge query failed: ${detail}\n`); + } + } + + const bundle = [ + "You are the **answerer**. You MUST NOT read repository source code — only the CLI retrieval excerpts below.", + "For each question id, decide whether the knowledge base already answers it.", + "Set found=true only when the excerpt supports a confident answer; otherwise found=false.", + "Set has_unanswered=true if any question remains unanswered by the knowledge base.", + "", + ...blocks, + ].join("\n"); + + const metaR = await llmExtract({ + text: bundle, + schema: answererMetaSchema, + provider: extract.provider, + dryRun: start.meta.dryRun, + }); + if (!metaR.ok) { + throw new Error(`answerer llmExtract: ${JSON.stringify(metaR.error)}`); + } + + return { content: bundle, meta: metaR.value }; + }; +} diff --git a/workflows/knowledge-extraction/roles/explorer.ts b/workflows/knowledge-extraction/roles/explorer.ts new file mode 100644 index 0000000..18aa13a --- /dev/null +++ b/workflows/knowledge-extraction/roles/explorer.ts @@ -0,0 +1,92 @@ +import type { AgentFn, Role, StartStep, WorkflowMessage } from "@uncaged/nerve-core"; +import type { LlmExtractorConfig } from "@uncaged/nerve-workflow-utils"; +import { createRole } from "@uncaged/nerve-workflow-utils"; +import { z } from "zod"; + +import { resolveWorkdir } from "../lib/workdir.js"; + +import type { AnswererMeta } from "./answerer.js"; +import type { QuestionerMeta } from "./questioner.js"; + +export const explorerMetaSchema = z.object({ + patches: z.array( + z.object({ + card: z.string(), + section: z.string(), + }), + ), + new_cards: z.array(z.string()), +}); + +export type ExplorerMeta = z.infer; + +export type CreateExplorerRoleDeps = { + extract: LlmExtractorConfig; +}; + +function lastMeta(messages: WorkflowMessage[], role: string): M | undefined { + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i].role === role) { + return messages[i].meta as M; + } + } + return undefined; +} + +export function explorerPrompt(start: StartStep, messages: WorkflowMessage[]): string { + const threadId = start.meta.threadId; + const qm = lastMeta(messages, "questioner"); + const am = lastMeta(messages, "answerer"); + const cwd = resolveWorkdir(start); + + const unanswered = + am?.results.filter((r) => !r.found).map((r) => r.id) ?? []; + + return `You are the **explorer** in a knowledge-extraction workflow. + +## Context + +- Thread: \`nerve thread ${threadId}\` +- Working directory (repo root for paths): ${cwd} +- Current knowledge card (questioner): ${qm?.card ?? "(unknown)"} + +## Unanswered question ids + +${JSON.stringify(unanswered)} + +Use the prior answerer results in the thread to map ids to full question text when you read messages above. + +## Task + +For each unanswered question, **read the codebase** as needed, then either: + +- Add a new markdown file under \`.knowledge/\`, or +- Patch an existing card (prefer updating the card listed above when appropriate). + +After any write or patch to \`.knowledge\`, run: + +\`\`\`bash +nerve knowledge sync +\`\`\` + +from this repo root (${cwd}), and fix failures until sync succeeds. + +## Output meta + +Report \`patches\` as { card, section } entries for cards you edited (section is a short heading or path hint). +Report \`new_cards\` as repo-relative paths for brand-new files you created (e.g. \`.knowledge/new-topic.md\`). + +Do not claim work you did not perform.`; +} + +export function createExplorerRole( + adapter: AgentFn, + { extract }: CreateExplorerRoleDeps, +): Role { + return createRole( + adapter, + async (innerStart: StartStep, msgs: WorkflowMessage[]) => explorerPrompt(innerStart, msgs), + explorerMetaSchema, + extract, + ); +} diff --git a/workflows/knowledge-extraction/roles/questioner.ts b/workflows/knowledge-extraction/roles/questioner.ts new file mode 100644 index 0000000..8d2261c --- /dev/null +++ b/workflows/knowledge-extraction/roles/questioner.ts @@ -0,0 +1,106 @@ +import { readFile } from "node:fs/promises"; +import { join } from "node:path"; + +import type { Role, StartStep, WorkflowMessage } from "@uncaged/nerve-core"; +import type { LlmExtractorConfig } from "@uncaged/nerve-workflow-utils"; +import { createLlmRole } from "@uncaged/nerve-workflow-utils"; +import { z } from "zod"; + +import { resolveQueueForQuestioner } from "../lib/knowledge-queue.js"; +import { resolveWorkdir } from "../lib/workdir.js"; + +const questionerExtractSchema = z.object({ + questions: z + .array( + z.object({ + id: z.string(), + question: z.string(), + domain: z.string(), + }), + ) + .length(3), +}); + +export type QuestionerMeta = { + /** Empty when no .knowledge cards and no work to do. */ + card: string; + questions: { id: string; question: string; domain: string }[]; + remaining_queue: string[]; +}; + +export type CreateQuestionerRoleDeps = { + extract: LlmExtractorConfig; +}; + +function questionerSystem(): string { + return `You are the **questioner** in a knowledge-extraction workflow. + +Read the given markdown knowledge card. Propose exactly **three** technical questions that are **not** already answered or covered by that card. + +Rules: +- Questions must be concrete and technical. +- Each question needs a stable string id (e.g. q1, q2, q3), a short domain label (e.g. routing, storage), and the question text. +- Do not assume access to other files or tools — reason only from the card content shown.`; +} + +function questionerUser(card: string, cardBody: string, remainingHint: string[]): string { + return `Current card path: ${card} + +Remaining queue after this card (paths, may be empty): ${JSON.stringify(remainingHint)} + +--- Card content --- + +${cardBody}`; +} + +export function createQuestionerRole(adapterExtract: CreateQuestionerRoleDeps): Role { + const { extract } = adapterExtract; + + return async (start: StartStep, messages: WorkflowMessage[]) => { + const cwd = resolveWorkdir(start); + const queue = await resolveQueueForQuestioner(start, messages, cwd); + if (queue.length === 0) { + return { + content: + "questioner: no `.knowledge` markdown files found and no seed path in the trigger prompt; queue is empty.", + meta: { + card: "", + questions: [], + remaining_queue: [], + }, + }; + } + + const card = queue[0]!; + const remaining_queue = queue.slice(1); + let cardBody: string; + try { + cardBody = await readFile(join(cwd, card), "utf8"); + } catch (e) { + const msg = e instanceof Error ? e.message : String(e); + throw new Error(`questioner: failed to read ${card}: ${msg}`); + } + + const inner = createLlmRole({ + provider: extract.provider, + prompt: async () => [ + { role: "system", content: questionerSystem() }, + { role: "user", content: questionerUser(card, cardBody, remaining_queue) }, + ], + extract: { + schema: questionerExtractSchema, + provider: extract.provider, + }, + }); + + const r = await inner(start, messages); + return { + content: r.content, + meta: { + card, + questions: r.meta.questions, + remaining_queue, + }, + }; + }; +} diff --git a/workflows/knowledge-extraction/tsconfig.json b/workflows/knowledge-extraction/tsconfig.json new file mode 100644 index 0000000..fc00159 --- /dev/null +++ b/workflows/knowledge-extraction/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "target": "ES2022", + "lib": ["ES2022"], + "module": "NodeNext", + "moduleResolution": "NodeNext", + "strict": true, + "skipLibCheck": true, + "noEmit": true, + "types": ["node"] + }, + "include": ["./**/*.ts"] +}