/** * PR 摘要工作流：从 Gitea 拉取 PR 与 diff，可选 LLM 分析后输出中文 Markdown 总结。 * 宿主需在 nerve.yaml 中注册 workflows.pr-summarizer；触发示例： * nerve workflow trigger pr-summarizer --payload '{"prompt":""}' * Sense 可返回 workflow: `pr-summarizer|50|`（见 parseSenseWorkflowDirective）。 */ import type { ModeratorContext, RoleResult, StartStep, WorkflowDefinition, WorkflowMessage, } from "@uncaged/nerve-core"; import { END } from "@uncaged/nerve-core"; import { isDryRun, llmExtract, nerveAgentContext, readNerveYaml, spawnSafe, } from "@uncaged/nerve-workflow-utils"; import { join } from "node:path"; import { z } from "zod"; const HOME = process.env.HOME ?? "/home/azureuser"; const NERVE_ROOT = join(HOME, ".uncaged-nerve"); /** unified diff 写入 meta 前的最大字符数（超出则截断并在 content 中说明） */ const DIFF_TEXT_MAX_CHARS = 1_500_000; /** 送给分析模型的 diff 前缀长度上限 */ const DIFF_LLM_MAX_CHARS = 100_000; type PrSummarizerMeta = { fetcher: { prUrl: string | null; owner: string | null; repo: string | null; prIndex: number | null; giteaBaseUrl: string | null; title: string | null; state: string | null; diffText: string | null; diffByteLength: number | null; httpStatus: number | null; errorMessage: string | null; }; analyzer: { analysisMarkdown: string | null; providerModel: string | null; errorMessage: string | null; }; writer: { summaryZhMarkdown: string | null; errorMessage: string | null; }; }; const jsonPromptSchema = z.object({ prUrl: z.string().nullish(), owner: z.string().nullish(), repo: z.string().nullish(), index: z.number().int().positive().nullish(), baseUrl: z.string().nullish(), }); const analysisExtractSchema = z .object({ analysisMarkdown: z.string().describe("Technical PR analysis in Markdown (can be English)."), }) .describe("Structured PR analysis from the diff."); const summaryExtractSchema = z .object({ summaryZhMarkdown: z .string() .describe( "Final deliverable: Chinese Markdown with title, key changes, risks, and test suggestions.", ), }) .describe("Chinese Markdown PR summary."); function getNerveYaml(): string { const result = readNerveYaml({ nerveRoot: NERVE_ROOT }); return result.ok ? result.value : "# nerve.yaml unavailable"; } async function cfgGet(key: string): Promise { const result = await spawnSafe("cfg", ["get", key], { cwd: NERVE_ROOT, env: null, timeoutMs: 10_000, }); if (!result.ok) { return null; } return result.value.stdout.trim() || null; } async function resolveDashScopeProvider(): Promise<{ baseUrl: string; apiKey: string; model: string; } | null> { const apiKey = process.env.DASHSCOPE_API_KEY ?? (await cfgGet("DASHSCOPE_API_KEY")); const baseUrl = process.env.DASHSCOPE_BASE_URL ?? (await cfgGet("DASHSCOPE_BASE_URL")); const model = process.env.DASHSCOPE_MODEL ?? (await cfgGet("DASHSCOPE_MODEL")) ?? "qwen-plus"; if (!apiKey || !baseUrl) { return null; } return { apiKey, baseUrl, model }; } function parseGiteaPullUrl(raw: string): { giteaBaseUrl: string; owner: string; repo: string; prIndex: number; prUrl: string; } | null { let u: URL; try { u = new URL(raw.trim()); } catch { return null; } if (u.protocol !== "http:" && u.protocol !== "https:") { return null; } const parts = u.pathname.replace(/\/+$/, "").split("/").filter(Boolean); const pullsAt = parts.indexOf("pulls"); if (pullsAt < 2 || pullsAt + 1 >= parts.length) { return null; } const indexStr = parts[pullsAt + 1]; if (!indexStr || !/^\d+$/.test(indexStr)) { return null; } const owner = parts[pullsAt - 2]; const repo = parts[pullsAt - 1]; if (!owner || !repo) { return null; } const prIndex = Number.parseInt(indexStr, 10); if (!Number.isFinite(prIndex) || prIndex < 1) { return null; } const giteaBaseUrl = `${u.protocol}//${u.host}`; return { giteaBaseUrl, owner, repo, prIndex, prUrl: raw.trim() }; } type ResolvedPr = { prUrl: string | null; owner: string | null; repo: string | null; prIndex: number | null; giteaBaseUrl: string | null; parseError: string | null; }; function resolvePrFromContent(content: string): ResolvedPr { const empty: ResolvedPr = { prUrl: null, owner: null, repo: null, prIndex: null, giteaBaseUrl: null, parseError: null, }; const trimmed = content.trim(); if (!trimmed) { return { ...empty, parseError: "Empty prompt" }; } if (trimmed.startsWith("{")) { let parsed: unknown; try { parsed = JSON.parse(trimmed) as unknown; } catch { return { ...empty, parseError: "Invalid JSON in prompt" }; } const row = jsonPromptSchema.safeParse(parsed); if (!row.success) { return { ...empty, parseError: `JSON validation failed: ${row.error.message}` }; } const j = row.data; let owner: string | null = j.owner ?? null; let repo: string | null = j.repo ?? null; let prIndex: number | null = j.index ?? null; let giteaBaseUrl: string | null = j.baseUrl ?? null; let prUrl: string | null = j.prUrl ?? null; if (j.prUrl) { const p = parseGiteaPullUrl(j.prUrl); if (p) { owner = owner ?? p.owner; repo = repo ?? p.repo; prIndex = prIndex ?? p.prIndex; giteaBaseUrl = giteaBaseUrl ?? p.giteaBaseUrl; prUrl = prUrl ?? p.prUrl; } } if (owner && repo && prIndex !== null && giteaBaseUrl) { const normalizedBase = giteaBaseUrl.replace(/\/+$/, ""); const builtUrl = `${normalizedBase}/${owner}/${repo}/pulls/${prIndex}`; return { prUrl: prUrl ?? builtUrl, owner, repo, prIndex, giteaBaseUrl: normalizedBase, parseError: null, }; } return { ...empty, parseError: "JSON prompt must include resolvable owner, repo, pr index, and baseUrl (or prUrl)", }; } const p = parseGiteaPullUrl(trimmed); if (!p) { return { ...empty, parseError: "Not a valid Gitea PR URL (expected https://host/owner/repo/pulls/NUMBER)", }; } return { prUrl: p.prUrl, owner: p.owner, repo: p.repo, prIndex: p.prIndex, giteaBaseUrl: p.giteaBaseUrl.replace(/\/+$/, ""), parseError: null, }; } function emptyFetcherMeta(): PrSummarizerMeta["fetcher"] { return { prUrl: null, owner: null, repo: null, prIndex: null, giteaBaseUrl: null, title: null, state: null, diffText: null, diffByteLength: null, httpStatus: null, errorMessage: null, }; } const workflow: WorkflowDefinition = { name: "pr-summarizer", roles: { async fetcher(start: StartStep): Promise> { const resolved = resolvePrFromContent(start.content); if (resolved.parseError !== null) { const meta: PrSummarizerMeta["fetcher"] = { ...emptyFetcherMeta(), errorMessage: resolved.parseError, }; return { content: `Fetcher: parse error — ${resolved.parseError}`, meta }; } const token = process.env.GITEA_TOKEN ?? null; if (!token || token.trim() === "") { const meta: PrSummarizerMeta["fetcher"] = { ...emptyFetcherMeta(), prUrl: resolved.prUrl, owner: resolved.owner, repo: resolved.repo, prIndex: resolved.prIndex, giteaBaseUrl: resolved.giteaBaseUrl, errorMessage: "GITEA_TOKEN is not set", }; return { content: "Fetcher: missing GITEA_TOKEN (set env before running).", meta }; } const apiRoot = `${resolved.giteaBaseUrl}/api/v1`; const pullJsonUrl = `${apiRoot}/repos/${resolved.owner}/${resolved.repo}/pulls/${resolved.prIndex}`; const pullDiffUrl = `${pullJsonUrl}.diff`; const headersJson: Record = { Authorization: `token ${token}`, Accept: "application/json", }; let title: string | null = null; let state: string | null = null; let httpStatus: number | null = null; let jsonError: string | null = null; try { const prRes = await fetch(pullJsonUrl, { headers: headersJson }); httpStatus = prRes.status; const bodyText = await prRes.text(); if (!prRes.ok) { jsonError = `GET PR JSON failed: HTTP ${prRes.status} ${bodyText.slice(0, 500)}`; } else { const data = JSON.parse(bodyText) as Record; const t = data.title; const s = data.state; title = typeof t === "string" ? t : null; state = typeof s === "string" ? s : null; } } catch (e) { jsonError = e instanceof Error ? e.message : String(e); } let diffText: string | null = null; let diffByteLength: number | null = null; let diffError: string | null = jsonError; let diffCharTruncated = false; if (jsonError === null) { try { const diffRes = await fetch(pullDiffUrl, { headers: { Authorization: `token ${token}`, Accept: "text/plain", }, }); httpStatus = diffRes.status; const rawDiff = await diffRes.text(); if (!diffRes.ok) { diffError = `GET PR diff failed: HTTP ${diffRes.status} ${rawDiff.slice(0, 500)}`; } else { diffByteLength = Buffer.byteLength(rawDiff, "utf8"); if (rawDiff.length > DIFF_TEXT_MAX_CHARS) { diffText = rawDiff.slice(0, DIFF_TEXT_MAX_CHARS); diffCharTruncated = true; diffError = null; } else { diffText = rawDiff; } } } catch (e) { diffError = e instanceof Error ? e.message : String(e); } } const truncatedNote = diffCharTruncated && diffByteLength !== null ? ` (diff truncated in meta to ${DIFF_TEXT_MAX_CHARS} chars; full byte length ${diffByteLength})` : ""; const meta: PrSummarizerMeta["fetcher"] = { prUrl: resolved.prUrl, owner: resolved.owner, repo: resolved.repo, prIndex: resolved.prIndex, giteaBaseUrl: resolved.giteaBaseUrl, title, state, diffText, diffByteLength, httpStatus, errorMessage: diffError, }; const content = diffError !== null ? `Fetcher: ${resolved.owner}/${resolved.repo}#${resolved.prIndex} — failed. ${diffError}` : `Fetcher: ${resolved.owner}/${resolved.repo}#${resolved.prIndex} — ${title ?? "(no title)"} [${state ?? "?"}] diff bytes=${diffByteLength ?? 0} HTTP=${httpStatus ?? "?"}${truncatedNote}`; return { content, meta }; }, async analyzer( start: StartStep, messages: WorkflowMessage[], ): Promise> { const last = messages[messages.length - 1]; const fm = last.meta as PrSummarizerMeta["fetcher"]; const skip = (reason: string): RoleResult => ({ content: `Analyzer skipped: ${reason}\n\n${reason}`, meta: { analysisMarkdown: `## 无法分析\n\n${reason}`, providerModel: null, errorMessage: reason, }, }); if (last.role !== "fetcher") { return skip("上一则消息不是 fetcher 输出"); } if (fm.errorMessage !== null) { return skip(`拉取阶段失败: ${fm.errorMessage}`); } const diff = fm.diffText; if (diff === null || diff.length === 0) { return skip("diff 为空，无法分析"); } if (isDryRun(start)) { return { content: "[dryRun] Analyzer skipped real LLM call.", meta: { analysisMarkdown: "## dryRun\n\n未调用模型。", providerModel: null, errorMessage: null, }, }; } const provider = await resolveDashScopeProvider(); if (provider === null) { const excerpt = diff.split("\n").slice(0, 80).join("\n"); const analysisMarkdown = `## 静态摘要（无 LLM 凭据）\n\n` + `- 仓库: ${fm.owner}/${fm.repo} PR #${fm.prIndex}\n` + `- 标题: ${fm.title ?? "(null)"}\n` + `- diff 行数（近似）: ${diff.split("\n").length}\n\n` + `### Diff 开头\n\n\`\`\`diff\n${excerpt}\n\`\`\`\n`; return { content: analysisMarkdown, meta: { analysisMarkdown, providerModel: null, errorMessage: null, }, }; } const diffForModel = diff.length > DIFF_LLM_MAX_CHARS ? diff.slice(0, DIFF_LLM_MAX_CHARS) : diff; const truncated = diff.length > DIFF_LLM_MAX_CHARS; const bundle = `Repository: ${fm.owner}/${fm.repo} PR index ${fm.prIndex}\n` + `Title: ${fm.title ?? ""}\n` + `State: ${fm.state ?? ""}\n` + (truncated ? `\n(diff truncated for model input to ${DIFF_LLM_MAX_CHARS} chars)\n` : "") + `\n--- unified diff ---\n${diffForModel}`; const extractPrompt = `${nerveAgentContext}\n\n` + `You are a senior reviewer. Analyze this Gitea pull request diff.\n` + `Output structured findings as Markdown: scope, files touched, behavior change, risks, test ideas.\n\n` + `Optional nerve.yaml context:\n\`\`\`yaml\n${getNerveYaml().slice(0, 4000)}\n\`\`\`\n\n` + `---\n${bundle}`; const extracted = await llmExtract({ text: extractPrompt, schema: analysisExtractSchema, provider, dryRun: false, }); if (!extracted.ok) { const errText = JSON.stringify(extracted.error); return { content: `Analyzer LLM error: ${errText}`, meta: { analysisMarkdown: null, providerModel: provider.model, errorMessage: errText, }, }; } const analysisMarkdown = extracted.value.analysisMarkdown; return { content: analysisMarkdown, meta: { analysisMarkdown, providerModel: provider.model, errorMessage: null, }, }; }, async writer( start: StartStep, messages: WorkflowMessage[], ): Promise> { const last = messages[messages.length - 1]; const am = last.meta as PrSummarizerMeta["analyzer"]; const errOut = (msg: string): RoleResult => ({ content: `## 错误\n\n${msg}`, meta: { summaryZhMarkdown: `## 错误\n\n${msg}`, errorMessage: msg, }, }); if (last.role !== "analyzer") { return errOut("上一则消息不是 analyzer 输出，无法生成总结。"); } if (am.errorMessage !== null) { return errOut(`分析阶段失败，未生成臆造总结：${am.errorMessage}`); } const analysis = am.analysisMarkdown; if (analysis === null || analysis.trim() === "") { return errOut("分析正文为空，无法生成中文总结。"); } if (isDryRun(start)) { const stub = "## dryRun\n\n未调用模型生成中文总结。"; return { content: stub, meta: { summaryZhMarkdown: stub, errorMessage: null }, }; } const provider = await resolveDashScopeProvider(); if (provider === null) { const stub = `## 中文摘要（无 LLM）\n\n` + `以下为上游分析原文摘录，请配置 DASHSCOPE 相关凭据以生成压缩中文总结。\n\n${analysis.slice(0, 8000)}`; return { content: stub, meta: { summaryZhMarkdown: stub, errorMessage: null }, }; } const writerPrompt = `将下列 PR 技术分析改写为**中文 Markdown**交付物，包含：\n` + `- 标题（含仓库与 PR 编号）\n` + `- 变更要点（条列）\n` + `- 风险与注意事项\n` + `- 测试建议\n\n` + `---\n${analysis}`; const extracted = await llmExtract({ text: writerPrompt, schema: summaryExtractSchema, provider, dryRun: false, }); if (!extracted.ok) { const msg = JSON.stringify(extracted.error); return errOut(`Writer LLM 失败: ${msg}`); } const summaryZhMarkdown = extracted.value.summaryZhMarkdown; return { content: summaryZhMarkdown, meta: { summaryZhMarkdown, errorMessage: null, }, }; }, }, moderator(context: ModeratorContext) { if (context.steps.length === 0) { return "fetcher"; } const signal = context.steps[context.steps.length - 1]; if (signal.role === "fetcher") { return "analyzer"; } if (signal.role === "analyzer") { return "writer"; } if (signal.role === "writer") { return END; } return END; }, }; export default workflow;