小橘 9f2067db7d fix: add dryRun early-return for all roles in workflow-generator
llmExtract returns {} as T in dryRun mode, causing .map() on undefined.
Each role now returns mock data immediately when dryRun is true.

小橘 🍊(NEKO Team)
2026-04-25 04:24:42 +00:00

576 lines
17 KiB
TypeScript

/**
* PR 摘要工作流:从 Gitea 拉取 PR 与 diff,可选 LLM 分析后输出中文 Markdown 总结。
* 宿主需在 nerve.yaml 中注册 workflows.pr-summarizer;触发示例:
* nerve workflow trigger pr-summarizer --payload '{"prompt":"<PR URL 或 JSON>"}'
* Sense 可返回 workflow: `pr-summarizer|50|<prompt>`(见 parseSenseWorkflowDirective)。
*/
import type {
ModeratorContext,
RoleResult,
StartStep,
WorkflowDefinition,
WorkflowMessage,
} from "@uncaged/nerve-core";
import { END } from "@uncaged/nerve-core";
import {
isDryRun,
llmExtract,
nerveAgentContext,
readNerveYaml,
spawnSafe,
} from "@uncaged/nerve-workflow-utils";
import { join } from "node:path";
import { z } from "zod";
const HOME = process.env.HOME ?? "/home/azureuser";
const NERVE_ROOT = join(HOME, ".uncaged-nerve");
/** unified diff 写入 meta 前的最大字符数(超出则截断并在 content 中说明) */
const DIFF_TEXT_MAX_CHARS = 1_500_000;
/** 送给分析模型的 diff 前缀长度上限 */
const DIFF_LLM_MAX_CHARS = 100_000;
type PrSummarizerMeta = {
fetcher: {
prUrl: string | null;
owner: string | null;
repo: string | null;
prIndex: number | null;
giteaBaseUrl: string | null;
title: string | null;
state: string | null;
diffText: string | null;
diffByteLength: number | null;
httpStatus: number | null;
errorMessage: string | null;
};
analyzer: {
analysisMarkdown: string | null;
providerModel: string | null;
errorMessage: string | null;
};
writer: {
summaryZhMarkdown: string | null;
errorMessage: string | null;
};
};
const jsonPromptSchema = z.object({
prUrl: z.string().nullish(),
owner: z.string().nullish(),
repo: z.string().nullish(),
index: z.number().int().positive().nullish(),
baseUrl: z.string().nullish(),
});
const analysisExtractSchema = z
.object({
analysisMarkdown: z.string().describe("Technical PR analysis in Markdown (can be English)."),
})
.describe("Structured PR analysis from the diff.");
const summaryExtractSchema = z
.object({
summaryZhMarkdown: z
.string()
.describe(
"Final deliverable: Chinese Markdown with title, key changes, risks, and test suggestions.",
),
})
.describe("Chinese Markdown PR summary.");
function getNerveYaml(): string {
const result = readNerveYaml({ nerveRoot: NERVE_ROOT });
return result.ok ? result.value : "# nerve.yaml unavailable";
}
async function cfgGet(key: string): Promise<string | null> {
const result = await spawnSafe("cfg", ["get", key], {
cwd: NERVE_ROOT,
env: null,
timeoutMs: 10_000,
});
if (!result.ok) {
return null;
}
return result.value.stdout.trim() || null;
}
async function resolveDashScopeProvider(): Promise<{
baseUrl: string;
apiKey: string;
model: string;
} | null> {
const apiKey = process.env.DASHSCOPE_API_KEY ?? (await cfgGet("DASHSCOPE_API_KEY"));
const baseUrl = process.env.DASHSCOPE_BASE_URL ?? (await cfgGet("DASHSCOPE_BASE_URL"));
const model =
process.env.DASHSCOPE_MODEL ?? (await cfgGet("DASHSCOPE_MODEL")) ?? "qwen-plus";
if (!apiKey || !baseUrl) {
return null;
}
return { apiKey, baseUrl, model };
}
function parseGiteaPullUrl(raw: string): {
giteaBaseUrl: string;
owner: string;
repo: string;
prIndex: number;
prUrl: string;
} | null {
let u: URL;
try {
u = new URL(raw.trim());
} catch {
return null;
}
if (u.protocol !== "http:" && u.protocol !== "https:") {
return null;
}
const parts = u.pathname.replace(/\/+$/, "").split("/").filter(Boolean);
const pullsAt = parts.indexOf("pulls");
if (pullsAt < 2 || pullsAt + 1 >= parts.length) {
return null;
}
const indexStr = parts[pullsAt + 1];
if (!indexStr || !/^\d+$/.test(indexStr)) {
return null;
}
const owner = parts[pullsAt - 2];
const repo = parts[pullsAt - 1];
if (!owner || !repo) {
return null;
}
const prIndex = Number.parseInt(indexStr, 10);
if (!Number.isFinite(prIndex) || prIndex < 1) {
return null;
}
const giteaBaseUrl = `${u.protocol}//${u.host}`;
return { giteaBaseUrl, owner, repo, prIndex, prUrl: raw.trim() };
}
type ResolvedPr = {
prUrl: string | null;
owner: string | null;
repo: string | null;
prIndex: number | null;
giteaBaseUrl: string | null;
parseError: string | null;
};
function resolvePrFromContent(content: string): ResolvedPr {
const empty: ResolvedPr = {
prUrl: null,
owner: null,
repo: null,
prIndex: null,
giteaBaseUrl: null,
parseError: null,
};
const trimmed = content.trim();
if (!trimmed) {
return { ...empty, parseError: "Empty prompt" };
}
if (trimmed.startsWith("{")) {
let parsed: unknown;
try {
parsed = JSON.parse(trimmed) as unknown;
} catch {
return { ...empty, parseError: "Invalid JSON in prompt" };
}
const row = jsonPromptSchema.safeParse(parsed);
if (!row.success) {
return { ...empty, parseError: `JSON validation failed: ${row.error.message}` };
}
const j = row.data;
let owner: string | null = j.owner ?? null;
let repo: string | null = j.repo ?? null;
let prIndex: number | null = j.index ?? null;
let giteaBaseUrl: string | null = j.baseUrl ?? null;
let prUrl: string | null = j.prUrl ?? null;
if (j.prUrl) {
const p = parseGiteaPullUrl(j.prUrl);
if (p) {
owner = owner ?? p.owner;
repo = repo ?? p.repo;
prIndex = prIndex ?? p.prIndex;
giteaBaseUrl = giteaBaseUrl ?? p.giteaBaseUrl;
prUrl = prUrl ?? p.prUrl;
}
}
if (owner && repo && prIndex !== null && giteaBaseUrl) {
const normalizedBase = giteaBaseUrl.replace(/\/+$/, "");
const builtUrl = `${normalizedBase}/${owner}/${repo}/pulls/${prIndex}`;
return {
prUrl: prUrl ?? builtUrl,
owner,
repo,
prIndex,
giteaBaseUrl: normalizedBase,
parseError: null,
};
}
return {
...empty,
parseError: "JSON prompt must include resolvable owner, repo, pr index, and baseUrl (or prUrl)",
};
}
const p = parseGiteaPullUrl(trimmed);
if (!p) {
return {
...empty,
parseError: "Not a valid Gitea PR URL (expected https://host/owner/repo/pulls/NUMBER)",
};
}
return {
prUrl: p.prUrl,
owner: p.owner,
repo: p.repo,
prIndex: p.prIndex,
giteaBaseUrl: p.giteaBaseUrl.replace(/\/+$/, ""),
parseError: null,
};
}
function emptyFetcherMeta(): PrSummarizerMeta["fetcher"] {
return {
prUrl: null,
owner: null,
repo: null,
prIndex: null,
giteaBaseUrl: null,
title: null,
state: null,
diffText: null,
diffByteLength: null,
httpStatus: null,
errorMessage: null,
};
}
const workflow: WorkflowDefinition<PrSummarizerMeta> = {
name: "pr-summarizer",
roles: {
async fetcher(start: StartStep): Promise<RoleResult<PrSummarizerMeta["fetcher"]>> {
const resolved = resolvePrFromContent(start.content);
if (resolved.parseError !== null) {
const meta: PrSummarizerMeta["fetcher"] = {
...emptyFetcherMeta(),
errorMessage: resolved.parseError,
};
return { content: `Fetcher: parse error — ${resolved.parseError}`, meta };
}
const token = process.env.GITEA_TOKEN ?? null;
if (!token || token.trim() === "") {
const meta: PrSummarizerMeta["fetcher"] = {
...emptyFetcherMeta(),
prUrl: resolved.prUrl,
owner: resolved.owner,
repo: resolved.repo,
prIndex: resolved.prIndex,
giteaBaseUrl: resolved.giteaBaseUrl,
errorMessage: "GITEA_TOKEN is not set",
};
return { content: "Fetcher: missing GITEA_TOKEN (set env before running).", meta };
}
const apiRoot = `${resolved.giteaBaseUrl}/api/v1`;
const pullJsonUrl = `${apiRoot}/repos/${resolved.owner}/${resolved.repo}/pulls/${resolved.prIndex}`;
const pullDiffUrl = `${pullJsonUrl}.diff`;
const headersJson: Record<string, string> = {
Authorization: `token ${token}`,
Accept: "application/json",
};
let title: string | null = null;
let state: string | null = null;
let httpStatus: number | null = null;
let jsonError: string | null = null;
try {
const prRes = await fetch(pullJsonUrl, { headers: headersJson });
httpStatus = prRes.status;
const bodyText = await prRes.text();
if (!prRes.ok) {
jsonError = `GET PR JSON failed: HTTP ${prRes.status} ${bodyText.slice(0, 500)}`;
} else {
const data = JSON.parse(bodyText) as Record<string, unknown>;
const t = data.title;
const s = data.state;
title = typeof t === "string" ? t : null;
state = typeof s === "string" ? s : null;
}
} catch (e) {
jsonError = e instanceof Error ? e.message : String(e);
}
let diffText: string | null = null;
let diffByteLength: number | null = null;
let diffError: string | null = jsonError;
let diffCharTruncated = false;
if (jsonError === null) {
try {
const diffRes = await fetch(pullDiffUrl, {
headers: {
Authorization: `token ${token}`,
Accept: "text/plain",
},
});
httpStatus = diffRes.status;
const rawDiff = await diffRes.text();
if (!diffRes.ok) {
diffError = `GET PR diff failed: HTTP ${diffRes.status} ${rawDiff.slice(0, 500)}`;
} else {
diffByteLength = Buffer.byteLength(rawDiff, "utf8");
if (rawDiff.length > DIFF_TEXT_MAX_CHARS) {
diffText = rawDiff.slice(0, DIFF_TEXT_MAX_CHARS);
diffCharTruncated = true;
diffError = null;
} else {
diffText = rawDiff;
}
}
} catch (e) {
diffError = e instanceof Error ? e.message : String(e);
}
}
const truncatedNote =
diffCharTruncated && diffByteLength !== null
? ` (diff truncated in meta to ${DIFF_TEXT_MAX_CHARS} chars; full byte length ${diffByteLength})`
: "";
const meta: PrSummarizerMeta["fetcher"] = {
prUrl: resolved.prUrl,
owner: resolved.owner,
repo: resolved.repo,
prIndex: resolved.prIndex,
giteaBaseUrl: resolved.giteaBaseUrl,
title,
state,
diffText,
diffByteLength,
httpStatus,
errorMessage: diffError,
};
const content =
diffError !== null
? `Fetcher: ${resolved.owner}/${resolved.repo}#${resolved.prIndex} — failed. ${diffError}`
: `Fetcher: ${resolved.owner}/${resolved.repo}#${resolved.prIndex}${title ?? "(no title)"} [${state ?? "?"}] diff bytes=${diffByteLength ?? 0} HTTP=${httpStatus ?? "?"}${truncatedNote}`;
return { content, meta };
},
async analyzer(
start: StartStep,
messages: WorkflowMessage[],
): Promise<RoleResult<PrSummarizerMeta["analyzer"]>> {
const last = messages[messages.length - 1];
const fm = last.meta as PrSummarizerMeta["fetcher"];
const skip = (reason: string): RoleResult<PrSummarizerMeta["analyzer"]> => ({
content: `Analyzer skipped: ${reason}\n\n${reason}`,
meta: {
analysisMarkdown: `## 无法分析\n\n${reason}`,
providerModel: null,
errorMessage: reason,
},
});
if (last.role !== "fetcher") {
return skip("上一则消息不是 fetcher 输出");
}
if (fm.errorMessage !== null) {
return skip(`拉取阶段失败: ${fm.errorMessage}`);
}
const diff = fm.diffText;
if (diff === null || diff.length === 0) {
return skip("diff 为空,无法分析");
}
if (isDryRun(start)) {
return {
content: "[dryRun] Analyzer skipped real LLM call.",
meta: {
analysisMarkdown: "## dryRun\n\n未调用模型。",
providerModel: null,
errorMessage: null,
},
};
}
const provider = await resolveDashScopeProvider();
if (provider === null) {
const excerpt = diff.split("\n").slice(0, 80).join("\n");
const analysisMarkdown =
`## 静态摘要(无 LLM 凭据)\n\n` +
`- 仓库: ${fm.owner}/${fm.repo} PR #${fm.prIndex}\n` +
`- 标题: ${fm.title ?? "(null)"}\n` +
`- diff 行数(近似): ${diff.split("\n").length}\n\n` +
`### Diff 开头\n\n\`\`\`diff\n${excerpt}\n\`\`\`\n`;
return {
content: analysisMarkdown,
meta: {
analysisMarkdown,
providerModel: null,
errorMessage: null,
},
};
}
const diffForModel = diff.length > DIFF_LLM_MAX_CHARS ? diff.slice(0, DIFF_LLM_MAX_CHARS) : diff;
const truncated = diff.length > DIFF_LLM_MAX_CHARS;
const bundle =
`Repository: ${fm.owner}/${fm.repo} PR index ${fm.prIndex}\n` +
`Title: ${fm.title ?? ""}\n` +
`State: ${fm.state ?? ""}\n` +
(truncated ? `\n(diff truncated for model input to ${DIFF_LLM_MAX_CHARS} chars)\n` : "") +
`\n--- unified diff ---\n${diffForModel}`;
const extractPrompt =
`${nerveAgentContext}\n\n` +
`You are a senior reviewer. Analyze this Gitea pull request diff.\n` +
`Output structured findings as Markdown: scope, files touched, behavior change, risks, test ideas.\n\n` +
`Optional nerve.yaml context:\n\`\`\`yaml\n${getNerveYaml().slice(0, 4000)}\n\`\`\`\n\n` +
`---\n${bundle}`;
const extracted = await llmExtract({
text: extractPrompt,
schema: analysisExtractSchema,
provider,
dryRun: false,
});
if (!extracted.ok) {
const errText = JSON.stringify(extracted.error);
return {
content: `Analyzer LLM error: ${errText}`,
meta: {
analysisMarkdown: null,
providerModel: provider.model,
errorMessage: errText,
},
};
}
const analysisMarkdown = extracted.value.analysisMarkdown;
return {
content: analysisMarkdown,
meta: {
analysisMarkdown,
providerModel: provider.model,
errorMessage: null,
},
};
},
async writer(
start: StartStep,
messages: WorkflowMessage[],
): Promise<RoleResult<PrSummarizerMeta["writer"]>> {
const last = messages[messages.length - 1];
const am = last.meta as PrSummarizerMeta["analyzer"];
const errOut = (msg: string): RoleResult<PrSummarizerMeta["writer"]> => ({
content: `## 错误\n\n${msg}`,
meta: {
summaryZhMarkdown: `## 错误\n\n${msg}`,
errorMessage: msg,
},
});
if (last.role !== "analyzer") {
return errOut("上一则消息不是 analyzer 输出,无法生成总结。");
}
if (am.errorMessage !== null) {
return errOut(`分析阶段失败,未生成臆造总结:${am.errorMessage}`);
}
const analysis = am.analysisMarkdown;
if (analysis === null || analysis.trim() === "") {
return errOut("分析正文为空,无法生成中文总结。");
}
if (isDryRun(start)) {
const stub = "## dryRun\n\n未调用模型生成中文总结。";
return {
content: stub,
meta: { summaryZhMarkdown: stub, errorMessage: null },
};
}
const provider = await resolveDashScopeProvider();
if (provider === null) {
const stub =
`## 中文摘要(无 LLM)\n\n` +
`以下为上游分析原文摘录,请配置 DASHSCOPE 相关凭据以生成压缩中文总结。\n\n${analysis.slice(0, 8000)}`;
return {
content: stub,
meta: { summaryZhMarkdown: stub, errorMessage: null },
};
}
const writerPrompt =
`将下列 PR 技术分析改写为**中文 Markdown**交付物,包含:\n` +
`- 标题(含仓库与 PR 编号)\n` +
`- 变更要点(条列)\n` +
`- 风险与注意事项\n` +
`- 测试建议\n\n` +
`---\n${analysis}`;
const extracted = await llmExtract({
text: writerPrompt,
schema: summaryExtractSchema,
provider,
dryRun: false,
});
if (!extracted.ok) {
const msg = JSON.stringify(extracted.error);
return errOut(`Writer LLM 失败: ${msg}`);
}
const summaryZhMarkdown = extracted.value.summaryZhMarkdown;
return {
content: summaryZhMarkdown,
meta: {
summaryZhMarkdown,
errorMessage: null,
},
};
},
},
moderator(context: ModeratorContext<PrSummarizerMeta>) {
if (context.steps.length === 0) {
return "fetcher";
}
const signal = context.steps[context.steps.length - 1];
if (signal.role === "fetcher") {
return "analyzer";
}
if (signal.role === "analyzer") {
return "writer";
}
if (signal.role === "writer") {
return END;
}
return END;
},
};
export default workflow;