From 611de496da67c7365af38b3adba18e53560c8b34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Thu, 14 May 2026 07:50:50 +0000 Subject: [PATCH] feat(workflow): generic AgentFn + adaptAgent utility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AgentFn is now AgentFn — preserving backward compatibility while allowing specialized input types for non-LLM agents (e.g. Cursor). Added adaptAgent(schema, agent) in workflow-utils: bridges any AgentFn to AgentFn via Zod schema parsing. Refs #327, Phase 1 of #328 --- .../2026-05-05-extract-knowledge-preparer.md | 359 ++++++++++++++++++ packages/workflow-utils/src/adapt-agent.ts | 18 + packages/workflow-utils/src/index.ts | 1 + packages/workflow/src/types.ts | 8 +- 4 files changed, 384 insertions(+), 2 deletions(-) create mode 100644 docs/plans/2026-05-05-extract-knowledge-preparer.md create mode 100644 packages/workflow-utils/src/adapt-agent.ts diff --git a/docs/plans/2026-05-05-extract-knowledge-preparer.md b/docs/plans/2026-05-05-extract-knowledge-preparer.md new file mode 100644 index 0000000..af10a00 --- /dev/null +++ b/docs/plans/2026-05-05-extract-knowledge-preparer.md @@ -0,0 +1,359 @@ +# extract-knowledge: Add `preparer` Role for Cold-Start Repos + +> **For Hermes:** Use subagent-driven-development skill to implement this plan task-by-task. + +**Goal:** 让 `extract-knowledge` workflow 能处理没有 `.knowledge/` 目录的「冷启动」仓库——新增 `preparer` 角色,用 Hermes agent 扫描仓库结构,生成初始 `.knowledge/` 卡片和种子问题,然后正常进入 questioner → answerer → explorer 循环。 + +**Architecture:** 在 moderator 最前面插入 `preparer` 角色。moderator 判断:如果是第一轮且 `.knowledge/` 为空,走 preparer;否则跳过直接走 questioner。preparer 用 Hermes adapter 扫描仓库(README、目录结构、package.json 等),产出初始卡片文件 + 种子问题列表。 + +**Tech Stack:** TypeScript, `@uncaged/workflow`, Hermes adapter(已有) + +**Repo:** `~/.uncaged-nerve/workflows/extract-knowledge/`(用户 workflow,不在 monorepo 里) + +--- + +## 现有流程 + +``` +moderator → questioner → answerer → (has_unanswered?) → explorer → questioner → ... +``` + +## 改造后流程 + +``` +moderator → [preparer (if cold-start)] → questioner → answerer → explorer → ... +``` + +--- + +### Task 1: Create `preparer` Role + +**Objective:** 新建 `roles/preparer.ts`,用 Hermes agent 扫描仓库,生成 `.knowledge/` 初始卡片和种子问题。 + +**Files:** +- Create: `roles/preparer.ts` + +**Design:** + +```typescript +// roles/preparer.ts +import type { AgentFn, Role, ThreadContext } from "@uncaged/workflow"; +import type { LlmExtractorConfig } from "@uncaged/nerve-workflow-utils"; +import { createRole } from "@uncaged/nerve-workflow-utils"; +import { z } from "zod"; + +import { resolveWorkdir } from "../lib/workdir.js"; + +export const preparerMetaSchema = z.object({ + /** Markdown files created under .knowledge/ */ + created_cards: z.array(z.string()), + /** Seed questions for questioner to start with */ + seed_questions: z.array( + z.object({ + id: z.string(), + question: z.string(), + domain: z.string(), + }), + ), + /** Whether .knowledge/ was empty before this role ran */ + was_cold_start: z.boolean(), +}); + +export type PreparerMeta = z.infer; + +export type CreatePreparerRoleDeps = { + extract: LlmExtractorConfig; +}; + +function preparerPrompt(ctx: ThreadContext): string { + const cwd = resolveWorkdir(ctx.start); + + return `You are the **preparer** in an extract-knowledge workflow. + +## Context + +Working directory (repo root): ${cwd} + +This repository has NO existing \`.knowledge/\` directory. Your job is to bootstrap it. + +## Task + +1. **Scan the repository structure** — look at README.md, package.json/Cargo.toml/pyproject.toml, directory layout, and key source files to understand what this project does. + +2. **Create \`.knowledge/\` directory** with 2-4 initial markdown cards covering: + - \`overview.md\` — project purpose, architecture, main concepts + - \`api.md\` or \`usage.md\` — how to use the project (CLI commands, API surface, etc.) + - Additional cards for significant subsystems if the project is large enough + + Each card should be a well-structured markdown file with headings, extracted from what you find in the repo. Do NOT fabricate information — only document what's actually in the code/docs. + +3. **Run \`nerve knowledge sync\`** from the repo root after creating cards. + +4. **Generate 5 seed questions** — concrete technical questions that the existing cards do NOT fully answer, to kick off the knowledge extraction loop. + +## Output meta + +Report: +- \`created_cards\`: repo-relative paths of files you created (e.g. \`.knowledge/overview.md\`) +- \`seed_questions\`: array of {id, question, domain} for the questioner to pick up +- \`was_cold_start\`: true (always true when this role runs) + +## Rules + +- Do NOT invent information. Only extract from actual repo content. +- Keep cards concise but informative (200-500 words each). +- Questions should target gaps — things the cards mention but don't fully explain.`; +} + +export function createPreparerRole( + adapter: AgentFn, + { extract }: CreatePreparerRoleDeps, +): Role { + return createRole( + adapter, + async (ctx: ThreadContext) => preparerPrompt(ctx), + preparerMetaSchema, + extract, + ); +} +``` + +**Key decisions:** +- Preparer 用 Hermes adapter(能读写文件、跑命令),不用 LLM adapter +- 产出 seed_questions 给 questioner 直接用,避免 questioner 在空目录上无事可做 +- `was_cold_start: true` 方便 moderator 判断 + +--- + +### Task 2: Add `preparer` to Workflow Definition + +**Objective:** 在 `build.ts` 注册 preparer 角色。 + +**Files:** +- Modify: `build.ts` + +**Changes:** + +```typescript +// 新增 import +import { createPreparerRole } from "./roles/preparer.js"; + +// WorkflowMeta 类型加 preparer +// (moderator.ts 里的 WorkflowMeta 也要同步改) + +// roles 里加: +roles: { + preparer: createPreparerRole(a("preparer"), { extract }), + questioner: createQuestionerRole(adapters?.questioner ?? llmAdapter, { extract }), + answerer: createAnswererRole(adapters?.answerer ?? llmAdapter, { extract }), + explorer: createExplorerRole(a("explorer"), { extract }), +}, +``` + +--- + +### Task 3: Update Moderator — Cold-Start Detection + +**Objective:** moderator 在第一轮检测 `.knowledge/` 是否为空,决定走 preparer 还是 questioner。 + +**Files:** +- Modify: `moderator.ts` +- Modify: `lib/knowledge-queue.ts`(导出 helper 供 moderator 用) + +**Design:** + +moderator 需要同步检查 `.knowledge/` 是否存在。但 moderator 目前是纯函数(不 async),不方便做 fs 操作。 + +**方案:用 steps 历史判断。** 如果 steps 为空且 start prompt 没有 seed 文件路径,走 preparer。preparer 的 meta 里有 `was_cold_start`,后续 moderator 能从 steps 看到。 + +```typescript +// moderator.ts 关键改动 + +export type WorkflowMeta = { + preparer: PreparerMeta; // 新增 + questioner: QuestionerMeta; + answerer: AnswererMeta; + explorer: ExplorerMeta; +}; + +export const moderator: Moderator = (context) => { + const { steps, start } = context; + + if (steps.length === 0) { + // 判断是否 cold start:prompt 第一行不是 .knowledge/ 路径 + const seed = start.content.trim().split(/\r?\n/)[0]?.trim() ?? ""; + const looksLikeKnowledgePath = seed.startsWith(".knowledge/"); + if (!looksLikeKnowledgePath) { + return "preparer"; + } + return "questioner"; + } + + const last = steps[steps.length - 1]; + + // preparer 完成后 → questioner + if (last.role === "preparer") { + return "questioner"; + } + + // ... 其余逻辑不变 +}; +``` + +**⚠️ 注意:** 这个判断逻辑比较粗糙——如果 prompt 第一行不是 `.knowledge/` 路径就走 preparer。这意味着对已有 `.knowledge/` 的仓库,如果 prompt 没带 seed 路径也会走 preparer。更好的办法是让 preparer 自己检测并 short-circuit(见 Task 4)。 + +--- + +### Task 4: Preparer Short-Circuit — 已有 Knowledge 时跳过 + +**Objective:** 如果仓库已有 `.knowledge/` 且里面有 `.md` 文件,preparer 直接返回空结果,不做任何事。 + +**Files:** +- Modify: `roles/preparer.ts` + +**改动:** 在 `createPreparerRole` 返回的 role 函数里,先检查 `.knowledge/` 是否有内容: + +```typescript +export function createPreparerRole( + adapter: AgentFn, + { extract }: CreatePreparerRoleDeps, +): Role { + const inner = createRole( + adapter, + async (ctx: ThreadContext) => preparerPrompt(ctx), + preparerMetaSchema, + extract, + ); + + return async (ctx: ThreadContext) => { + const cwd = resolveWorkdir(ctx.start); + // 检查 .knowledge/ 是否已有 md 文件 + const knowledgeDir = join(cwd, ".knowledge"); + try { + const entries = await readdir(knowledgeDir, { recursive: true }); + const hasMd = entries.some((e) => + typeof e === "string" ? e.endsWith(".md") : false + ); + if (hasMd) { + return { + content: "preparer: .knowledge/ already has content; skipping cold-start bootstrap.", + meta: { + created_cards: [], + seed_questions: [], + was_cold_start: false, + }, + }; + } + } catch { + // .knowledge/ doesn't exist — proceed with cold start + } + + return inner(ctx); + }; +} +``` + +这样 moderator 可以始终先走 preparer,由 preparer 自己决定是否需要工作。 + +--- + +### Task 5: Questioner — 接收 Preparer 的 Seed Questions + +**Objective:** questioner 在 bootstrap 阶段,如果前一步是 preparer 且有 seed_questions,直接使用这些问题而不是从卡片生成。 + +**Files:** +- Modify: `roles/questioner.ts` +- Modify: `lib/knowledge-queue.ts` + +**改动:** `resolveQueueForQuestioner` 在 steps 中找到 preparer 步骤时,从 preparer 的 `created_cards` 构建初始 queue: + +```typescript +// knowledge-queue.ts +export async function resolveQueueForQuestioner( + start: StartStep, + messages: WorkflowMessage[], + cwd: string, +): Promise { + const lastQi = lastIndexOfRole(messages, "questioner"); + if (lastQi === -1) { + // 首次 questioner — 检查是否有 preparer 产出 + const preparerMsg = messages.find((m) => m.role === "preparer"); + if (preparerMsg) { + const pm = preparerMsg.meta as PreparerMeta; + if (pm.created_cards.length > 0) { + // 用 preparer 创建的卡片作为初始 queue + return pm.created_cards; + } + } + return bootstrapKnowledgeQueue(cwd, start.content); + } + // ... 其余不变 +} +``` + +questioner 本身不需要改——它已经能从 queue 读取卡片并生成问题。preparer 产出的 seed_questions 是备用方案(如果未来想让 questioner 直接用 preparer 的问题而不是自己生成,可以加这个逻辑)。 + +--- + +### Task 6: Update `index.ts` — 配置 Preparer Adapter + +**Objective:** 给 preparer 配置 Hermes adapter。 + +**Files:** +- Modify: `index.ts` + +**改动:** + +```typescript +const workflow = createKnowledgeExtractionWorkflow({ + defaultAdapter: hermesAdapter, + adapters: { + preparer: hermesAdapter, // 用 Hermes — 需要读文件、跑命令 + explorer: createCursorAdapter({ + type: "cursor", + model: "claude-sonnet-4", + timeout: CURSOR_TIMEOUT_MS, + }), + }, + extract: { provider }, +}); +``` + +--- + +### Task 7: Build & Test + +**Objective:** 编译并验证 workflow 能跑通。 + +**Steps:** + +```bash +# 1. Build 用户 workflows +cd ~/.uncaged-nerve && npm run build + +# 2. 重启 daemon +node ~/repos/nerve/packages/cli/dist/cli.js daemon stop +node ~/repos/nerve/packages/cli/dist/cli.js daemon start + +# 3. 用一个没有 .knowledge/ 的 repo 测试 +node ~/repos/nerve/packages/cli/dist/cli.js workflow trigger extract-knowledge \ + --prompt $'\n/home/azureuser/repos/knowledge-sources/OfficeCLI' + +# 4. 观察 +node ~/repos/nerve/packages/cli/dist/cli.js thread list +# 期望看到 preparer 先跑,然后 questioner 接力 +``` + +--- + +## Summary + +| Task | 内容 | 文件 | +|------|------|------| +| 1 | 新建 preparer 角色 | `roles/preparer.ts` | +| 2 | 注册到 workflow definition | `build.ts` | +| 3 | Moderator 加 cold-start 判断 | `moderator.ts` | +| 4 | Preparer short-circuit(已有 knowledge 跳过) | `roles/preparer.ts` | +| 5 | Questioner 接收 preparer 产出 | `lib/knowledge-queue.ts` | +| 6 | 配置 preparer adapter | `index.ts` | +| 7 | Build & 测试 | — | diff --git a/packages/workflow-utils/src/adapt-agent.ts b/packages/workflow-utils/src/adapt-agent.ts new file mode 100644 index 0000000..ad60c07 --- /dev/null +++ b/packages/workflow-utils/src/adapt-agent.ts @@ -0,0 +1,18 @@ +import type { AgentFn } from "@uncaged/workflow"; +import type { z } from "zod"; + +/** + * Bridges a specialized `AgentFn` to the standard `AgentFn` signature. + * + * The schema defines what the agent needs; `adaptAgent` parses the raw string + * input (typically JSON) through the schema and passes the typed result to the + * underlying agent. `ctx` is forwarded transparently. + * + * For agents whose input is already `string`, no adaptation is needed. + */ +export function adaptAgent(schema: z.ZodType, agent: AgentFn): AgentFn { + return async (ctx, raw) => { + const parsed = schema.parse(JSON.parse(raw)); + return agent(ctx, parsed); + }; +} diff --git a/packages/workflow-utils/src/index.ts b/packages/workflow-utils/src/index.ts index 8e685f6..a768348 100644 --- a/packages/workflow-utils/src/index.ts +++ b/packages/workflow-utils/src/index.ts @@ -1,4 +1,5 @@ // Primary API — role factory templates +export { adaptAgent } from "./adapt-agent.js"; export { createLlmAdapter } from "./create-llm-adapter.js"; export { createRole, type LlmExtractorConfig } from "./create-role.js"; export { llmExtract, llmExtractWithRetry } from "./shared/llm-extract.js"; diff --git a/packages/workflow/src/types.ts b/packages/workflow/src/types.ts index f66a170..bebac32 100644 --- a/packages/workflow/src/types.ts +++ b/packages/workflow/src/types.ts @@ -54,8 +54,12 @@ export type WorkflowContext = ThreadContext; */ export type Role = (ctx: ThreadContext) => Promise>; -/** Unified agent invocation — raw string output; structured meta uses the extract layer. */ -export type AgentFn = (ctx: ThreadContext, systemPrompt: string) => Promise; +/** + * Unified agent invocation — raw string output; structured meta uses the extract layer. + * `I` is the agent-specific input type (default: `string` for prompt-based agents). + * Non-string agents (e.g. Cursor) declare their own `I` and use `adaptAgent` to bridge. + */ +export type AgentFn = (ctx: ThreadContext, input: I) => Promise; /** A discriminated union of role steps after each execution, aligned with `StartStep` shape. */ export type RoleStep = { -- 2.43.0