llmExtract returns {} as T in dryRun mode, causing .map() on undefined.
Each role now returns mock data immediately when dryRun is true.
小橘 🍊(NEKO Team)
576 lines
17 KiB
TypeScript
576 lines
17 KiB
TypeScript
/**
|
|
* PR 摘要工作流:从 Gitea 拉取 PR 与 diff,可选 LLM 分析后输出中文 Markdown 总结。
|
|
* 宿主需在 nerve.yaml 中注册 workflows.pr-summarizer;触发示例:
|
|
* nerve workflow trigger pr-summarizer --payload '{"prompt":"<PR URL 或 JSON>"}'
|
|
* Sense 可返回 workflow: `pr-summarizer|50|<prompt>`(见 parseSenseWorkflowDirective)。
|
|
*/
|
|
import type {
|
|
ModeratorContext,
|
|
RoleResult,
|
|
StartStep,
|
|
WorkflowDefinition,
|
|
WorkflowMessage,
|
|
} from "@uncaged/nerve-core";
|
|
import { END } from "@uncaged/nerve-core";
|
|
import {
|
|
isDryRun,
|
|
llmExtract,
|
|
nerveAgentContext,
|
|
readNerveYaml,
|
|
spawnSafe,
|
|
} from "@uncaged/nerve-workflow-utils";
|
|
import { join } from "node:path";
|
|
import { z } from "zod";
|
|
|
|
const HOME = process.env.HOME ?? "/home/azureuser";
|
|
const NERVE_ROOT = join(HOME, ".uncaged-nerve");
|
|
|
|
/** unified diff 写入 meta 前的最大字符数(超出则截断并在 content 中说明) */
|
|
const DIFF_TEXT_MAX_CHARS = 1_500_000;
|
|
/** 送给分析模型的 diff 前缀长度上限 */
|
|
const DIFF_LLM_MAX_CHARS = 100_000;
|
|
|
|
type PrSummarizerMeta = {
|
|
fetcher: {
|
|
prUrl: string | null;
|
|
owner: string | null;
|
|
repo: string | null;
|
|
prIndex: number | null;
|
|
giteaBaseUrl: string | null;
|
|
title: string | null;
|
|
state: string | null;
|
|
diffText: string | null;
|
|
diffByteLength: number | null;
|
|
httpStatus: number | null;
|
|
errorMessage: string | null;
|
|
};
|
|
analyzer: {
|
|
analysisMarkdown: string | null;
|
|
providerModel: string | null;
|
|
errorMessage: string | null;
|
|
};
|
|
writer: {
|
|
summaryZhMarkdown: string | null;
|
|
errorMessage: string | null;
|
|
};
|
|
};
|
|
|
|
const jsonPromptSchema = z.object({
|
|
prUrl: z.string().nullish(),
|
|
owner: z.string().nullish(),
|
|
repo: z.string().nullish(),
|
|
index: z.number().int().positive().nullish(),
|
|
baseUrl: z.string().nullish(),
|
|
});
|
|
|
|
const analysisExtractSchema = z
|
|
.object({
|
|
analysisMarkdown: z.string().describe("Technical PR analysis in Markdown (can be English)."),
|
|
})
|
|
.describe("Structured PR analysis from the diff.");
|
|
|
|
const summaryExtractSchema = z
|
|
.object({
|
|
summaryZhMarkdown: z
|
|
.string()
|
|
.describe(
|
|
"Final deliverable: Chinese Markdown with title, key changes, risks, and test suggestions.",
|
|
),
|
|
})
|
|
.describe("Chinese Markdown PR summary.");
|
|
|
|
function getNerveYaml(): string {
|
|
const result = readNerveYaml({ nerveRoot: NERVE_ROOT });
|
|
return result.ok ? result.value : "# nerve.yaml unavailable";
|
|
}
|
|
|
|
async function cfgGet(key: string): Promise<string | null> {
|
|
const result = await spawnSafe("cfg", ["get", key], {
|
|
cwd: NERVE_ROOT,
|
|
env: null,
|
|
timeoutMs: 10_000,
|
|
});
|
|
if (!result.ok) {
|
|
return null;
|
|
}
|
|
return result.value.stdout.trim() || null;
|
|
}
|
|
|
|
async function resolveDashScopeProvider(): Promise<{
|
|
baseUrl: string;
|
|
apiKey: string;
|
|
model: string;
|
|
} | null> {
|
|
const apiKey = process.env.DASHSCOPE_API_KEY ?? (await cfgGet("DASHSCOPE_API_KEY"));
|
|
const baseUrl = process.env.DASHSCOPE_BASE_URL ?? (await cfgGet("DASHSCOPE_BASE_URL"));
|
|
const model =
|
|
process.env.DASHSCOPE_MODEL ?? (await cfgGet("DASHSCOPE_MODEL")) ?? "qwen-plus";
|
|
if (!apiKey || !baseUrl) {
|
|
return null;
|
|
}
|
|
return { apiKey, baseUrl, model };
|
|
}
|
|
|
|
function parseGiteaPullUrl(raw: string): {
|
|
giteaBaseUrl: string;
|
|
owner: string;
|
|
repo: string;
|
|
prIndex: number;
|
|
prUrl: string;
|
|
} | null {
|
|
let u: URL;
|
|
try {
|
|
u = new URL(raw.trim());
|
|
} catch {
|
|
return null;
|
|
}
|
|
if (u.protocol !== "http:" && u.protocol !== "https:") {
|
|
return null;
|
|
}
|
|
const parts = u.pathname.replace(/\/+$/, "").split("/").filter(Boolean);
|
|
const pullsAt = parts.indexOf("pulls");
|
|
if (pullsAt < 2 || pullsAt + 1 >= parts.length) {
|
|
return null;
|
|
}
|
|
const indexStr = parts[pullsAt + 1];
|
|
if (!indexStr || !/^\d+$/.test(indexStr)) {
|
|
return null;
|
|
}
|
|
const owner = parts[pullsAt - 2];
|
|
const repo = parts[pullsAt - 1];
|
|
if (!owner || !repo) {
|
|
return null;
|
|
}
|
|
const prIndex = Number.parseInt(indexStr, 10);
|
|
if (!Number.isFinite(prIndex) || prIndex < 1) {
|
|
return null;
|
|
}
|
|
const giteaBaseUrl = `${u.protocol}//${u.host}`;
|
|
return { giteaBaseUrl, owner, repo, prIndex, prUrl: raw.trim() };
|
|
}
|
|
|
|
type ResolvedPr = {
|
|
prUrl: string | null;
|
|
owner: string | null;
|
|
repo: string | null;
|
|
prIndex: number | null;
|
|
giteaBaseUrl: string | null;
|
|
parseError: string | null;
|
|
};
|
|
|
|
function resolvePrFromContent(content: string): ResolvedPr {
|
|
const empty: ResolvedPr = {
|
|
prUrl: null,
|
|
owner: null,
|
|
repo: null,
|
|
prIndex: null,
|
|
giteaBaseUrl: null,
|
|
parseError: null,
|
|
};
|
|
const trimmed = content.trim();
|
|
if (!trimmed) {
|
|
return { ...empty, parseError: "Empty prompt" };
|
|
}
|
|
|
|
if (trimmed.startsWith("{")) {
|
|
let parsed: unknown;
|
|
try {
|
|
parsed = JSON.parse(trimmed) as unknown;
|
|
} catch {
|
|
return { ...empty, parseError: "Invalid JSON in prompt" };
|
|
}
|
|
const row = jsonPromptSchema.safeParse(parsed);
|
|
if (!row.success) {
|
|
return { ...empty, parseError: `JSON validation failed: ${row.error.message}` };
|
|
}
|
|
const j = row.data;
|
|
let owner: string | null = j.owner ?? null;
|
|
let repo: string | null = j.repo ?? null;
|
|
let prIndex: number | null = j.index ?? null;
|
|
let giteaBaseUrl: string | null = j.baseUrl ?? null;
|
|
let prUrl: string | null = j.prUrl ?? null;
|
|
|
|
if (j.prUrl) {
|
|
const p = parseGiteaPullUrl(j.prUrl);
|
|
if (p) {
|
|
owner = owner ?? p.owner;
|
|
repo = repo ?? p.repo;
|
|
prIndex = prIndex ?? p.prIndex;
|
|
giteaBaseUrl = giteaBaseUrl ?? p.giteaBaseUrl;
|
|
prUrl = prUrl ?? p.prUrl;
|
|
}
|
|
}
|
|
|
|
if (owner && repo && prIndex !== null && giteaBaseUrl) {
|
|
const normalizedBase = giteaBaseUrl.replace(/\/+$/, "");
|
|
const builtUrl = `${normalizedBase}/${owner}/${repo}/pulls/${prIndex}`;
|
|
return {
|
|
prUrl: prUrl ?? builtUrl,
|
|
owner,
|
|
repo,
|
|
prIndex,
|
|
giteaBaseUrl: normalizedBase,
|
|
parseError: null,
|
|
};
|
|
}
|
|
return {
|
|
...empty,
|
|
parseError: "JSON prompt must include resolvable owner, repo, pr index, and baseUrl (or prUrl)",
|
|
};
|
|
}
|
|
|
|
const p = parseGiteaPullUrl(trimmed);
|
|
if (!p) {
|
|
return {
|
|
...empty,
|
|
parseError: "Not a valid Gitea PR URL (expected https://host/owner/repo/pulls/NUMBER)",
|
|
};
|
|
}
|
|
return {
|
|
prUrl: p.prUrl,
|
|
owner: p.owner,
|
|
repo: p.repo,
|
|
prIndex: p.prIndex,
|
|
giteaBaseUrl: p.giteaBaseUrl.replace(/\/+$/, ""),
|
|
parseError: null,
|
|
};
|
|
}
|
|
|
|
function emptyFetcherMeta(): PrSummarizerMeta["fetcher"] {
|
|
return {
|
|
prUrl: null,
|
|
owner: null,
|
|
repo: null,
|
|
prIndex: null,
|
|
giteaBaseUrl: null,
|
|
title: null,
|
|
state: null,
|
|
diffText: null,
|
|
diffByteLength: null,
|
|
httpStatus: null,
|
|
errorMessage: null,
|
|
};
|
|
}
|
|
|
|
const workflow: WorkflowDefinition<PrSummarizerMeta> = {
|
|
name: "pr-summarizer",
|
|
|
|
roles: {
|
|
async fetcher(start: StartStep): Promise<RoleResult<PrSummarizerMeta["fetcher"]>> {
|
|
const resolved = resolvePrFromContent(start.content);
|
|
if (resolved.parseError !== null) {
|
|
const meta: PrSummarizerMeta["fetcher"] = {
|
|
...emptyFetcherMeta(),
|
|
errorMessage: resolved.parseError,
|
|
};
|
|
return { content: `Fetcher: parse error — ${resolved.parseError}`, meta };
|
|
}
|
|
|
|
const token = process.env.GITEA_TOKEN ?? null;
|
|
if (!token || token.trim() === "") {
|
|
const meta: PrSummarizerMeta["fetcher"] = {
|
|
...emptyFetcherMeta(),
|
|
prUrl: resolved.prUrl,
|
|
owner: resolved.owner,
|
|
repo: resolved.repo,
|
|
prIndex: resolved.prIndex,
|
|
giteaBaseUrl: resolved.giteaBaseUrl,
|
|
errorMessage: "GITEA_TOKEN is not set",
|
|
};
|
|
return { content: "Fetcher: missing GITEA_TOKEN (set env before running).", meta };
|
|
}
|
|
|
|
const apiRoot = `${resolved.giteaBaseUrl}/api/v1`;
|
|
const pullJsonUrl = `${apiRoot}/repos/${resolved.owner}/${resolved.repo}/pulls/${resolved.prIndex}`;
|
|
const pullDiffUrl = `${pullJsonUrl}.diff`;
|
|
|
|
const headersJson: Record<string, string> = {
|
|
Authorization: `token ${token}`,
|
|
Accept: "application/json",
|
|
};
|
|
|
|
let title: string | null = null;
|
|
let state: string | null = null;
|
|
let httpStatus: number | null = null;
|
|
let jsonError: string | null = null;
|
|
|
|
try {
|
|
const prRes = await fetch(pullJsonUrl, { headers: headersJson });
|
|
httpStatus = prRes.status;
|
|
const bodyText = await prRes.text();
|
|
if (!prRes.ok) {
|
|
jsonError = `GET PR JSON failed: HTTP ${prRes.status} ${bodyText.slice(0, 500)}`;
|
|
} else {
|
|
const data = JSON.parse(bodyText) as Record<string, unknown>;
|
|
const t = data.title;
|
|
const s = data.state;
|
|
title = typeof t === "string" ? t : null;
|
|
state = typeof s === "string" ? s : null;
|
|
}
|
|
} catch (e) {
|
|
jsonError = e instanceof Error ? e.message : String(e);
|
|
}
|
|
|
|
let diffText: string | null = null;
|
|
let diffByteLength: number | null = null;
|
|
let diffError: string | null = jsonError;
|
|
let diffCharTruncated = false;
|
|
|
|
if (jsonError === null) {
|
|
try {
|
|
const diffRes = await fetch(pullDiffUrl, {
|
|
headers: {
|
|
Authorization: `token ${token}`,
|
|
Accept: "text/plain",
|
|
},
|
|
});
|
|
httpStatus = diffRes.status;
|
|
const rawDiff = await diffRes.text();
|
|
if (!diffRes.ok) {
|
|
diffError = `GET PR diff failed: HTTP ${diffRes.status} ${rawDiff.slice(0, 500)}`;
|
|
} else {
|
|
diffByteLength = Buffer.byteLength(rawDiff, "utf8");
|
|
if (rawDiff.length > DIFF_TEXT_MAX_CHARS) {
|
|
diffText = rawDiff.slice(0, DIFF_TEXT_MAX_CHARS);
|
|
diffCharTruncated = true;
|
|
diffError = null;
|
|
} else {
|
|
diffText = rawDiff;
|
|
}
|
|
}
|
|
} catch (e) {
|
|
diffError = e instanceof Error ? e.message : String(e);
|
|
}
|
|
}
|
|
|
|
const truncatedNote =
|
|
diffCharTruncated && diffByteLength !== null
|
|
? ` (diff truncated in meta to ${DIFF_TEXT_MAX_CHARS} chars; full byte length ${diffByteLength})`
|
|
: "";
|
|
|
|
const meta: PrSummarizerMeta["fetcher"] = {
|
|
prUrl: resolved.prUrl,
|
|
owner: resolved.owner,
|
|
repo: resolved.repo,
|
|
prIndex: resolved.prIndex,
|
|
giteaBaseUrl: resolved.giteaBaseUrl,
|
|
title,
|
|
state,
|
|
diffText,
|
|
diffByteLength,
|
|
httpStatus,
|
|
errorMessage: diffError,
|
|
};
|
|
|
|
const content =
|
|
diffError !== null
|
|
? `Fetcher: ${resolved.owner}/${resolved.repo}#${resolved.prIndex} — failed. ${diffError}`
|
|
: `Fetcher: ${resolved.owner}/${resolved.repo}#${resolved.prIndex} — ${title ?? "(no title)"} [${state ?? "?"}] diff bytes=${diffByteLength ?? 0} HTTP=${httpStatus ?? "?"}${truncatedNote}`;
|
|
|
|
return { content, meta };
|
|
},
|
|
|
|
async analyzer(
|
|
start: StartStep,
|
|
messages: WorkflowMessage[],
|
|
): Promise<RoleResult<PrSummarizerMeta["analyzer"]>> {
|
|
const last = messages[messages.length - 1];
|
|
const fm = last.meta as PrSummarizerMeta["fetcher"];
|
|
|
|
const skip = (reason: string): RoleResult<PrSummarizerMeta["analyzer"]> => ({
|
|
content: `Analyzer skipped: ${reason}\n\n${reason}`,
|
|
meta: {
|
|
analysisMarkdown: `## 无法分析\n\n${reason}`,
|
|
providerModel: null,
|
|
errorMessage: reason,
|
|
},
|
|
});
|
|
|
|
if (last.role !== "fetcher") {
|
|
return skip("上一则消息不是 fetcher 输出");
|
|
}
|
|
|
|
if (fm.errorMessage !== null) {
|
|
return skip(`拉取阶段失败: ${fm.errorMessage}`);
|
|
}
|
|
|
|
const diff = fm.diffText;
|
|
if (diff === null || diff.length === 0) {
|
|
return skip("diff 为空,无法分析");
|
|
}
|
|
|
|
if (isDryRun(start)) {
|
|
return {
|
|
content: "[dryRun] Analyzer skipped real LLM call.",
|
|
meta: {
|
|
analysisMarkdown: "## dryRun\n\n未调用模型。",
|
|
providerModel: null,
|
|
errorMessage: null,
|
|
},
|
|
};
|
|
}
|
|
|
|
const provider = await resolveDashScopeProvider();
|
|
if (provider === null) {
|
|
const excerpt = diff.split("\n").slice(0, 80).join("\n");
|
|
const analysisMarkdown =
|
|
`## 静态摘要(无 LLM 凭据)\n\n` +
|
|
`- 仓库: ${fm.owner}/${fm.repo} PR #${fm.prIndex}\n` +
|
|
`- 标题: ${fm.title ?? "(null)"}\n` +
|
|
`- diff 行数(近似): ${diff.split("\n").length}\n\n` +
|
|
`### Diff 开头\n\n\`\`\`diff\n${excerpt}\n\`\`\`\n`;
|
|
return {
|
|
content: analysisMarkdown,
|
|
meta: {
|
|
analysisMarkdown,
|
|
providerModel: null,
|
|
errorMessage: null,
|
|
},
|
|
};
|
|
}
|
|
|
|
const diffForModel = diff.length > DIFF_LLM_MAX_CHARS ? diff.slice(0, DIFF_LLM_MAX_CHARS) : diff;
|
|
const truncated = diff.length > DIFF_LLM_MAX_CHARS;
|
|
|
|
const bundle =
|
|
`Repository: ${fm.owner}/${fm.repo} PR index ${fm.prIndex}\n` +
|
|
`Title: ${fm.title ?? ""}\n` +
|
|
`State: ${fm.state ?? ""}\n` +
|
|
(truncated ? `\n(diff truncated for model input to ${DIFF_LLM_MAX_CHARS} chars)\n` : "") +
|
|
`\n--- unified diff ---\n${diffForModel}`;
|
|
|
|
const extractPrompt =
|
|
`${nerveAgentContext}\n\n` +
|
|
`You are a senior reviewer. Analyze this Gitea pull request diff.\n` +
|
|
`Output structured findings as Markdown: scope, files touched, behavior change, risks, test ideas.\n\n` +
|
|
`Optional nerve.yaml context:\n\`\`\`yaml\n${getNerveYaml().slice(0, 4000)}\n\`\`\`\n\n` +
|
|
`---\n${bundle}`;
|
|
|
|
const extracted = await llmExtract({
|
|
text: extractPrompt,
|
|
schema: analysisExtractSchema,
|
|
provider,
|
|
dryRun: false,
|
|
});
|
|
|
|
if (!extracted.ok) {
|
|
const errText = JSON.stringify(extracted.error);
|
|
return {
|
|
content: `Analyzer LLM error: ${errText}`,
|
|
meta: {
|
|
analysisMarkdown: null,
|
|
providerModel: provider.model,
|
|
errorMessage: errText,
|
|
},
|
|
};
|
|
}
|
|
|
|
const analysisMarkdown = extracted.value.analysisMarkdown;
|
|
return {
|
|
content: analysisMarkdown,
|
|
meta: {
|
|
analysisMarkdown,
|
|
providerModel: provider.model,
|
|
errorMessage: null,
|
|
},
|
|
};
|
|
},
|
|
|
|
async writer(
|
|
start: StartStep,
|
|
messages: WorkflowMessage[],
|
|
): Promise<RoleResult<PrSummarizerMeta["writer"]>> {
|
|
const last = messages[messages.length - 1];
|
|
const am = last.meta as PrSummarizerMeta["analyzer"];
|
|
|
|
const errOut = (msg: string): RoleResult<PrSummarizerMeta["writer"]> => ({
|
|
content: `## 错误\n\n${msg}`,
|
|
meta: {
|
|
summaryZhMarkdown: `## 错误\n\n${msg}`,
|
|
errorMessage: msg,
|
|
},
|
|
});
|
|
|
|
if (last.role !== "analyzer") {
|
|
return errOut("上一则消息不是 analyzer 输出,无法生成总结。");
|
|
}
|
|
|
|
if (am.errorMessage !== null) {
|
|
return errOut(`分析阶段失败,未生成臆造总结:${am.errorMessage}`);
|
|
}
|
|
|
|
const analysis = am.analysisMarkdown;
|
|
if (analysis === null || analysis.trim() === "") {
|
|
return errOut("分析正文为空,无法生成中文总结。");
|
|
}
|
|
|
|
if (isDryRun(start)) {
|
|
const stub = "## dryRun\n\n未调用模型生成中文总结。";
|
|
return {
|
|
content: stub,
|
|
meta: { summaryZhMarkdown: stub, errorMessage: null },
|
|
};
|
|
}
|
|
|
|
const provider = await resolveDashScopeProvider();
|
|
if (provider === null) {
|
|
const stub =
|
|
`## 中文摘要(无 LLM)\n\n` +
|
|
`以下为上游分析原文摘录,请配置 DASHSCOPE 相关凭据以生成压缩中文总结。\n\n${analysis.slice(0, 8000)}`;
|
|
return {
|
|
content: stub,
|
|
meta: { summaryZhMarkdown: stub, errorMessage: null },
|
|
};
|
|
}
|
|
|
|
const writerPrompt =
|
|
`将下列 PR 技术分析改写为**中文 Markdown**交付物,包含:\n` +
|
|
`- 标题(含仓库与 PR 编号)\n` +
|
|
`- 变更要点(条列)\n` +
|
|
`- 风险与注意事项\n` +
|
|
`- 测试建议\n\n` +
|
|
`---\n${analysis}`;
|
|
|
|
const extracted = await llmExtract({
|
|
text: writerPrompt,
|
|
schema: summaryExtractSchema,
|
|
provider,
|
|
dryRun: false,
|
|
});
|
|
|
|
if (!extracted.ok) {
|
|
const msg = JSON.stringify(extracted.error);
|
|
return errOut(`Writer LLM 失败: ${msg}`);
|
|
}
|
|
|
|
const summaryZhMarkdown = extracted.value.summaryZhMarkdown;
|
|
return {
|
|
content: summaryZhMarkdown,
|
|
meta: {
|
|
summaryZhMarkdown,
|
|
errorMessage: null,
|
|
},
|
|
};
|
|
},
|
|
},
|
|
|
|
moderator(context: ModeratorContext<PrSummarizerMeta>) {
|
|
if (context.steps.length === 0) {
|
|
return "fetcher";
|
|
}
|
|
const signal = context.steps[context.steps.length - 1];
|
|
if (signal.role === "fetcher") {
|
|
return "analyzer";
|
|
}
|
|
if (signal.role === "analyzer") {
|
|
return "writer";
|
|
}
|
|
if (signal.role === "writer") {
|
|
return END;
|
|
}
|
|
return END;
|
|
},
|
|
};
|
|
|
|
export default workflow;
|