From dc1e96d8f300b51c71adb7ceed3bee97e1b8a06a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= <xiaoju@shazhou.work>
Date: Thu, 30 Apr 2026 13:04:29 +0000
Subject: [PATCH] refactor(workflows): rename knowledge-extraction to
 extract-knowledge

Align WorkflowDefinition name, nerve.yaml, role prompts, and lockfile path with extract-knowledge.

Refs #285

Made-with: Cursor
---
 nerve.yaml                                    |   2 +-
 pnpm-lock.yaml                                |   2 +-
 workflows/extract-knowledge/build.ts          |  33 ++++++
 workflows/extract-knowledge/roles/explorer.ts |  93 +++++++++++++++
 .../extract-knowledge/roles/questioner.ts     | 108 ++++++++++++++++++
 5 files changed, 236 insertions(+), 2 deletions(-)
 create mode 100644 workflows/extract-knowledge/build.ts
 create mode 100644 workflows/extract-knowledge/roles/explorer.ts
 create mode 100644 workflows/extract-knowledge/roles/questioner.ts
diff --git a/nerve.yaml b/nerve.yaml
index 0e2ccc2..df951cb 100644
--- a/nerve.yaml
+++ b/nerve.yaml
@@ -41,6 +41,6 @@ workflows:
   solve-issue:
     concurrency: 1
     overflow: queue
-  knowledge-extraction:
+  extract-knowledge:
     concurrency: 1
     overflow: queue
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 94e2b4d..13d87af 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -183,7 +183,7 @@ importers:
         specifier: ^5.7.0
         version: 5.9.3
 
-  workflows/knowledge-extraction:
+  workflows/extract-knowledge:
     dependencies:
       '@uncaged/nerve-adapter-cursor':
         specifier: link:../../../repos/nerve/packages/adapter-cursor
diff --git a/workflows/extract-knowledge/build.ts b/workflows/extract-knowledge/build.ts
new file mode 100644
index 0000000..d507a31
--- /dev/null
+++ b/workflows/extract-knowledge/build.ts
@@ -0,0 +1,33 @@
+import type { AgentFn, WorkflowDefinition } from "@uncaged/nerve-core";
+import type { LlmExtractorConfig } from "@uncaged/nerve-workflow-utils";
+import { createLlmAdapter } from "@uncaged/nerve-workflow-utils";
+
+import { moderator } from "./moderator.js";
+import type { WorkflowMeta } from "./moderator.js";
+import { createAnswererRole } from "./roles/answerer.js";
+import { createExplorerRole } from "./roles/explorer.js";
+import { createQuestionerRole } from "./roles/questioner.js";
+
+export type CreateKnowledgeExtractionDeps = {
+  defaultAdapter: AgentFn;
+  adapters?: Partial<Record<keyof WorkflowMeta, AgentFn>>;
+  extract: LlmExtractorConfig;
+};
+
+export function createKnowledgeExtractionWorkflow({
+  defaultAdapter,
+  adapters,
+  extract,
+}: CreateKnowledgeExtractionDeps): WorkflowDefinition<WorkflowMeta> {
+  const a = (role: keyof WorkflowMeta) => adapters?.[role] ?? defaultAdapter;
+  const llmAdapter = createLlmAdapter(extract.provider);
+  return {
+    name: "extract-knowledge",
+    roles: {
+      questioner: createQuestionerRole(adapters?.questioner ?? llmAdapter, { extract }),
+      answerer: createAnswererRole(adapters?.answerer ?? llmAdapter, { extract }),
+      explorer: createExplorerRole(a("explorer"), { extract }),
+    },
+    moderator,
+  };
+}
diff --git a/workflows/extract-knowledge/roles/explorer.ts b/workflows/extract-knowledge/roles/explorer.ts
new file mode 100644
index 0000000..a2f9a8d
--- /dev/null
+++ b/workflows/extract-knowledge/roles/explorer.ts
@@ -0,0 +1,93 @@
+import type { AgentFn, Role, ThreadContext, WorkflowMessage } from "@uncaged/nerve-core";
+import type { LlmExtractorConfig } from "@uncaged/nerve-workflow-utils";
+import { createRole } from "@uncaged/nerve-workflow-utils";
+import { z } from "zod";
+
+import { resolveWorkdir } from "../lib/workdir.js";
+
+import type { AnswererMeta } from "./answerer.js";
+import type { QuestionerMeta } from "./questioner.js";
+
+export const explorerMetaSchema = z.object({
+  patches: z.array(
+    z.object({
+      card: z.string(),
+      section: z.string(),
+    }),
+  ),
+  new_cards: z.array(z.string()),
+});
+
+export type ExplorerMeta = z.infer<typeof explorerMetaSchema>;
+
+export type CreateExplorerRoleDeps = {
+  extract: LlmExtractorConfig;
+};
+
+function lastMeta<M>(messages: WorkflowMessage[], role: string): M | undefined {
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i].role === role) {
+      return messages[i].meta as M;
+    }
+  }
+  return undefined;
+}
+
+export function explorerPrompt(ctx: ThreadContext): string {
+  const messages = ctx.steps as unknown as WorkflowMessage[];
+  const threadId = ctx.start.meta.threadId;
+  const qm = lastMeta<QuestionerMeta>(messages, "questioner");
+  const am = lastMeta<AnswererMeta>(messages, "answerer");
+  const cwd = resolveWorkdir(ctx.start);
+
+  const unanswered =
+    am?.results.filter((r) => !r.found).map((r) => r.id) ?? [];
+
+  return `You are the **explorer** in an extract-knowledge workflow.
+
+## Context
+
+- Thread: \`nerve thread ${threadId}\`
+- Working directory (repo root for paths): ${cwd}
+- Current knowledge card (questioner): ${qm?.card ?? "(unknown)"}
+
+## Unanswered question ids
+
+${JSON.stringify(unanswered)}
+
+Use the prior answerer results in the thread to map ids to full question text when you read messages above.
+
+## Task
+
+For each unanswered question, **read the codebase** as needed, then either:
+
+- Add a new markdown file under \`.knowledge/\`, or
+- Patch an existing card (prefer updating the card listed above when appropriate).
+
+After any write or patch to \`.knowledge\`, run:
+
+\`\`\`bash
+nerve knowledge sync
+\`\`\`
+
+from this repo root (${cwd}), and fix failures until sync succeeds.
+
+## Output meta
+
+Report \`patches\` as { card, section } entries for cards you edited (section is a short heading or path hint).
+Report \`new_cards\` as repo-relative paths for brand-new files you created (e.g. \`.knowledge/new-topic.md\`).
+
+Do not claim work you did not perform.`;
+}
+
+export function createExplorerRole(
+  adapter: AgentFn,
+  { extract }: CreateExplorerRoleDeps,
+): Role<ExplorerMeta> {
+  return createRole(
+    adapter,
+    async (ctx: ThreadContext) => explorerPrompt(ctx),
+    explorerMetaSchema,
+    extract,
+  );
+}
diff --git a/workflows/extract-knowledge/roles/questioner.ts b/workflows/extract-knowledge/roles/questioner.ts
new file mode 100644
index 0000000..59662f0
--- /dev/null
+++ b/workflows/extract-knowledge/roles/questioner.ts
@@ -0,0 +1,108 @@
+import { readFile } from "node:fs/promises";
+import { join } from "node:path";
+
+import type { AgentFn, Role, ThreadContext, WorkflowMessage } from "@uncaged/nerve-core";
+import type { LlmExtractorConfig } from "@uncaged/nerve-workflow-utils";
+import { createRole } from "@uncaged/nerve-workflow-utils";
+import { z } from "zod";
+
+import { resolveQueueForQuestioner } from "../lib/knowledge-queue.js";
+import { resolveWorkdir } from "../lib/workdir.js";
+
+const questionerExtractSchema = z.object({
+  questions: z
+    .array(
+      z.object({
+        id: z.string(),
+        question: z.string(),
+        domain: z.string(),
+      }),
+    )
+    .length(5),
+});
+
+export type QuestionerMeta = {
+  /** Empty when no .knowledge cards and no work to do. */
+  card: string;
+  questions: { id: string; question: string; domain: string }[];
+  remaining_queue: string[];
+};
+
+export type CreateQuestionerRoleDeps = {
+  extract: LlmExtractorConfig;
+};
+
+function questionerSystem(): string {
+  return `You are the **questioner** in an extract-knowledge workflow.
+
+Read the given markdown knowledge card. Propose exactly **five** technical questions that are **not** already answered or covered by that card.
+
+Rules:
+- Questions must be concrete and technical.
+- Each question needs a stable string id (e.g. q1, q2, q3, q4, q5), a short domain label (e.g. routing, storage), and the question text.
+- Do not assume access to other files or tools — reason only from the card content shown.`;
+}
+
+function questionerUser(card: string, cardBody: string, remainingHint: string[]): string {
+  return `Current card path: ${card}
+
+Remaining queue after this card (paths, may be empty): ${JSON.stringify(remainingHint)}
+
+--- Card content ---
+
+${cardBody}`;
+}
+
+export async function questionerPrompt(ctx: ThreadContext): Promise<string> {
+  const messages = ctx.steps as unknown as WorkflowMessage[];
+  const cwd = resolveWorkdir(ctx.start);
+  const queue = await resolveQueueForQuestioner(ctx.start, messages, cwd);
+  if (queue.length === 0) {
+    throw new Error(
+      "questioner: prompt invoked with empty queue — wrapped role should short-circuit before LLM",
+    );
+  }
+  const card = queue[0]!;
+  const remaining_queue = queue.slice(1);
+  let cardBody: string;
+  try {
+    cardBody = await readFile(join(cwd, card), "utf8");
+  } catch (e) {
+    const msg = e instanceof Error ? e.message : String(e);
+    throw new Error(`questioner: failed to read ${card}: ${msg}`);
+  }
+  return `${questionerSystem()}\n\n${questionerUser(card, cardBody, remaining_queue)}`;
+}
+
+export function createQuestionerRole(adapter: AgentFn, { extract }: CreateQuestionerRoleDeps): Role<QuestionerMeta> {
+  const inner = createRole(adapter, questionerPrompt, questionerExtractSchema, extract);
+
+  return async (ctx: ThreadContext) => {
+    const messages = ctx.steps as unknown as WorkflowMessage[];
+    const cwd = resolveWorkdir(ctx.start);
+    const queue = await resolveQueueForQuestioner(ctx.start, messages, cwd);
+    if (queue.length === 0) {
+      return {
+        content:
+          "questioner: no `.knowledge` markdown files found and no seed path in the trigger prompt; queue is empty.",
+        meta: {
+          card: "",
+          questions: [],
+          remaining_queue: [],
+        },
+      };
+    }
+
+    const card = queue[0]!;
+    const remaining_queue = queue.slice(1);
+    const r = await inner(ctx);
+    return {
+      content: r.content,
+      meta: {
+        card,
+        questions: r.meta.questions,
+        remaining_queue,
+      },
+    };
+  };
+}