nerve-workspace/workflows/pr-summarizer/index.ts

/**
 * PR 摘要工作流：从 Gitea 拉取 PR 与 diff，可选 LLM 分析后输出中文 Markdown 总结。
 * 宿主需在 nerve.yaml 中注册 workflows.pr-summarizer；触发示例：
 *   nerve workflow trigger pr-summarizer --payload '{"prompt":"<PR URL 或 JSON>"}'
 * Sense 可返回 workflow: `pr-summarizer|50|<prompt>`（见 parseSenseWorkflowDirective）。
 */
import type {
  ModeratorContext,
  RoleResult,
  StartStep,
  WorkflowDefinition,
  WorkflowMessage,
} from "@uncaged/nerve-core";
import { END } from "@uncaged/nerve-core";
import {
  isDryRun,
  llmExtract,
  nerveAgentContext,
  readNerveYaml,
  spawnSafe,
} from "@uncaged/nerve-workflow-utils";
import { join } from "node:path";
import { z } from "zod";

const HOME = process.env.HOME ?? "/home/azureuser";
const NERVE_ROOT = join(HOME, ".uncaged-nerve");

/** unified diff 写入 meta 前的最大字符数（超出则截断并在 content 中说明） */
const DIFF_TEXT_MAX_CHARS = 1_500_000;
/** 送给分析模型的 diff 前缀长度上限 */
const DIFF_LLM_MAX_CHARS = 100_000;

type PrSummarizerMeta = {
  fetcher: {
    prUrl: string | null;
    owner: string | null;
    repo: string | null;
    prIndex: number | null;
    giteaBaseUrl: string | null;
    title: string | null;
    state: string | null;
    diffText: string | null;
    diffByteLength: number | null;
    httpStatus: number | null;
    errorMessage: string | null;
  };
  analyzer: {
    analysisMarkdown: string | null;
    providerModel: string | null;
    errorMessage: string | null;
  };
  writer: {
    summaryZhMarkdown: string | null;
    errorMessage: string | null;
  };
};

const jsonPromptSchema = z.object({
  prUrl: z.string().nullish(),
  owner: z.string().nullish(),
  repo: z.string().nullish(),
  index: z.number().int().positive().nullish(),
  baseUrl: z.string().nullish(),
});

const analysisExtractSchema = z
  .object({
    analysisMarkdown: z.string().describe("Technical PR analysis in Markdown (can be English)."),
  })
  .describe("Structured PR analysis from the diff.");

const summaryExtractSchema = z
  .object({
    summaryZhMarkdown: z
      .string()
      .describe(
        "Final deliverable: Chinese Markdown with title, key changes, risks, and test suggestions.",
      ),
  })
  .describe("Chinese Markdown PR summary.");

function getNerveYaml(): string {
  const result = readNerveYaml({ nerveRoot: NERVE_ROOT });
  return result.ok ? result.value : "# nerve.yaml unavailable";
}

async function cfgGet(key: string): Promise<string | null> {
  const result = await spawnSafe("cfg", ["get", key], {
    cwd: NERVE_ROOT,
    env: null,
    timeoutMs: 10_000,
  });
  if (!result.ok) {
    return null;
  }
  return result.value.stdout.trim() || null;
}

async function resolveDashScopeProvider(): Promise<{
  baseUrl: string;
  apiKey: string;
  model: string;
} | null> {
  const apiKey = process.env.DASHSCOPE_API_KEY ?? (await cfgGet("DASHSCOPE_API_KEY"));
  const baseUrl = process.env.DASHSCOPE_BASE_URL ?? (await cfgGet("DASHSCOPE_BASE_URL"));
  const model =
    process.env.DASHSCOPE_MODEL ?? (await cfgGet("DASHSCOPE_MODEL")) ?? "qwen-plus";
  if (!apiKey || !baseUrl) {
    return null;
  }
  return { apiKey, baseUrl, model };
}

function parseGiteaPullUrl(raw: string): {
  giteaBaseUrl: string;
  owner: string;
  repo: string;
  prIndex: number;
  prUrl: string;
} | null {
  let u: URL;
  try {
    u = new URL(raw.trim());
  } catch {
    return null;
  }
  if (u.protocol !== "http:" && u.protocol !== "https:") {
    return null;
  }
  const parts = u.pathname.replace(/\/+$/, "").split("/").filter(Boolean);
  const pullsAt = parts.indexOf("pulls");
  if (pullsAt < 2 || pullsAt + 1 >= parts.length) {
    return null;
  }
  const indexStr = parts[pullsAt + 1];
  if (!indexStr || !/^\d+$/.test(indexStr)) {
    return null;
  }
  const owner = parts[pullsAt - 2];
  const repo = parts[pullsAt - 1];
  if (!owner || !repo) {
    return null;
  }
  const prIndex = Number.parseInt(indexStr, 10);
  if (!Number.isFinite(prIndex) || prIndex < 1) {
    return null;
  }
  const giteaBaseUrl = `${u.protocol}//${u.host}`;
  return { giteaBaseUrl, owner, repo, prIndex, prUrl: raw.trim() };
}

type ResolvedPr = {
  prUrl: string | null;
  owner: string | null;
  repo: string | null;
  prIndex: number | null;
  giteaBaseUrl: string | null;
  parseError: string | null;
};

function resolvePrFromContent(content: string): ResolvedPr {
  const empty: ResolvedPr = {
    prUrl: null,
    owner: null,
    repo: null,
    prIndex: null,
    giteaBaseUrl: null,
    parseError: null,
  };
  const trimmed = content.trim();
  if (!trimmed) {
    return { ...empty, parseError: "Empty prompt" };
  }

  if (trimmed.startsWith("{")) {
    let parsed: unknown;
    try {
      parsed = JSON.parse(trimmed) as unknown;
    } catch {
      return { ...empty, parseError: "Invalid JSON in prompt" };
    }
    const row = jsonPromptSchema.safeParse(parsed);
    if (!row.success) {
      return { ...empty, parseError: `JSON validation failed: ${row.error.message}` };
    }
    const j = row.data;
    let owner: string | null = j.owner ?? null;
    let repo: string | null = j.repo ?? null;
    let prIndex: number | null = j.index ?? null;
    let giteaBaseUrl: string | null = j.baseUrl ?? null;
    let prUrl: string | null = j.prUrl ?? null;

    if (j.prUrl) {
      const p = parseGiteaPullUrl(j.prUrl);
      if (p) {
        owner = owner ?? p.owner;
        repo = repo ?? p.repo;
        prIndex = prIndex ?? p.prIndex;
        giteaBaseUrl = giteaBaseUrl ?? p.giteaBaseUrl;
        prUrl = prUrl ?? p.prUrl;
      }
    }

    if (owner && repo && prIndex !== null && giteaBaseUrl) {
      const normalizedBase = giteaBaseUrl.replace(/\/+$/, "");
      const builtUrl = `${normalizedBase}/${owner}/${repo}/pulls/${prIndex}`;
      return {
        prUrl: prUrl ?? builtUrl,
        owner,
        repo,
        prIndex,
        giteaBaseUrl: normalizedBase,
        parseError: null,
      };
    }
    return {
      ...empty,
      parseError: "JSON prompt must include resolvable owner, repo, pr index, and baseUrl (or prUrl)",
    };
  }

  const p = parseGiteaPullUrl(trimmed);
  if (!p) {
    return {
      ...empty,
      parseError: "Not a valid Gitea PR URL (expected https://host/owner/repo/pulls/NUMBER)",
    };
  }
  return {
    prUrl: p.prUrl,
    owner: p.owner,
    repo: p.repo,
    prIndex: p.prIndex,
    giteaBaseUrl: p.giteaBaseUrl.replace(/\/+$/, ""),
    parseError: null,
  };
}

function emptyFetcherMeta(): PrSummarizerMeta["fetcher"] {
  return {
    prUrl: null,
    owner: null,
    repo: null,
    prIndex: null,
    giteaBaseUrl: null,
    title: null,
    state: null,
    diffText: null,
    diffByteLength: null,
    httpStatus: null,
    errorMessage: null,
  };
}

const workflow: WorkflowDefinition<PrSummarizerMeta> = {
  name: "pr-summarizer",

  roles: {
    async fetcher(start: StartStep): Promise<RoleResult<PrSummarizerMeta["fetcher"]>> {
      const resolved = resolvePrFromContent(start.content);
      if (resolved.parseError !== null) {
        const meta: PrSummarizerMeta["fetcher"] = {
          ...emptyFetcherMeta(),
          errorMessage: resolved.parseError,
        };
        return { content: `Fetcher: parse error — ${resolved.parseError}`, meta };
      }

      const token = process.env.GITEA_TOKEN ?? null;
      if (!token || token.trim() === "") {
        const meta: PrSummarizerMeta["fetcher"] = {
          ...emptyFetcherMeta(),
          prUrl: resolved.prUrl,
          owner: resolved.owner,
          repo: resolved.repo,
          prIndex: resolved.prIndex,
          giteaBaseUrl: resolved.giteaBaseUrl,
          errorMessage: "GITEA_TOKEN is not set",
        };
        return { content: "Fetcher: missing GITEA_TOKEN (set env before running).", meta };
      }

      const apiRoot = `${resolved.giteaBaseUrl}/api/v1`;
      const pullJsonUrl = `${apiRoot}/repos/${resolved.owner}/${resolved.repo}/pulls/${resolved.prIndex}`;
      const pullDiffUrl = `${pullJsonUrl}.diff`;

      const headersJson: Record<string, string> = {
        Authorization: `token ${token}`,
        Accept: "application/json",
      };

      let title: string | null = null;
      let state: string | null = null;
      let httpStatus: number | null = null;
      let jsonError: string | null = null;

      try {
        const prRes = await fetch(pullJsonUrl, { headers: headersJson });
        httpStatus = prRes.status;
        const bodyText = await prRes.text();
        if (!prRes.ok) {
          jsonError = `GET PR JSON failed: HTTP ${prRes.status} ${bodyText.slice(0, 500)}`;
        } else {
          const data = JSON.parse(bodyText) as Record<string, unknown>;
          const t = data.title;
          const s = data.state;
          title = typeof t === "string" ? t : null;
          state = typeof s === "string" ? s : null;
        }
      } catch (e) {
        jsonError = e instanceof Error ? e.message : String(e);
      }

      let diffText: string | null = null;
      let diffByteLength: number | null = null;
      let diffError: string | null = jsonError;
      let diffCharTruncated = false;

      if (jsonError === null) {
        try {
          const diffRes = await fetch(pullDiffUrl, {
            headers: {
              Authorization: `token ${token}`,
              Accept: "text/plain",
            },
          });
          httpStatus = diffRes.status;
          const rawDiff = await diffRes.text();
          if (!diffRes.ok) {
            diffError = `GET PR diff failed: HTTP ${diffRes.status} ${rawDiff.slice(0, 500)}`;
          } else {
            diffByteLength = Buffer.byteLength(rawDiff, "utf8");
            if (rawDiff.length > DIFF_TEXT_MAX_CHARS) {
              diffText = rawDiff.slice(0, DIFF_TEXT_MAX_CHARS);
              diffCharTruncated = true;
              diffError = null;
            } else {
              diffText = rawDiff;
            }
          }
        } catch (e) {
          diffError = e instanceof Error ? e.message : String(e);
        }
      }

      const truncatedNote =
        diffCharTruncated && diffByteLength !== null
          ? ` (diff truncated in meta to ${DIFF_TEXT_MAX_CHARS} chars; full byte length ${diffByteLength})`
          : "";

      const meta: PrSummarizerMeta["fetcher"] = {
        prUrl: resolved.prUrl,
        owner: resolved.owner,
        repo: resolved.repo,
        prIndex: resolved.prIndex,
        giteaBaseUrl: resolved.giteaBaseUrl,
        title,
        state,
        diffText,
        diffByteLength,
        httpStatus,
        errorMessage: diffError,
      };

      const content =
        diffError !== null
          ? `Fetcher: ${resolved.owner}/${resolved.repo}#${resolved.prIndex} — failed. ${diffError}`
          : `Fetcher: ${resolved.owner}/${resolved.repo}#${resolved.prIndex} — ${title ?? "(no title)"} [${state ?? "?"}] diff bytes=${diffByteLength ?? 0} HTTP=${httpStatus ?? "?"}${truncatedNote}`;

      return { content, meta };
    },

    async analyzer(
      start: StartStep,
      messages: WorkflowMessage[],
    ): Promise<RoleResult<PrSummarizerMeta["analyzer"]>> {
      const last = messages[messages.length - 1];
      const fm = last.meta as PrSummarizerMeta["fetcher"];

      const skip = (reason: string): RoleResult<PrSummarizerMeta["analyzer"]> => ({
        content: `Analyzer skipped: ${reason}\n\n${reason}`,
        meta: {
          analysisMarkdown: `## 无法分析\n\n${reason}`,
          providerModel: null,
          errorMessage: reason,
        },
      });

      if (last.role !== "fetcher") {
        return skip("上一则消息不是 fetcher 输出");
      }

      if (fm.errorMessage !== null) {
        return skip(`拉取阶段失败: ${fm.errorMessage}`);
      }

      const diff = fm.diffText;
      if (diff === null || diff.length === 0) {
        return skip("diff 为空，无法分析");
      }

      if (isDryRun(start)) {
        return {
          content: "[dryRun] Analyzer skipped real LLM call.",
          meta: {
            analysisMarkdown: "## dryRun\n\n未调用模型。",
            providerModel: null,
            errorMessage: null,
          },
        };
      }

      const provider = await resolveDashScopeProvider();
      if (provider === null) {
        const excerpt = diff.split("\n").slice(0, 80).join("\n");
        const analysisMarkdown =
          `## 静态摘要（无 LLM 凭据）\n\n` +
          `- 仓库: ${fm.owner}/${fm.repo} PR #${fm.prIndex}\n` +
          `- 标题: ${fm.title ?? "(null)"}\n` +
          `- diff 行数（近似）: ${diff.split("\n").length}\n\n` +
          `### Diff 开头\n\n\`\`\`diff\n${excerpt}\n\`\`\`\n`;
        return {
          content: analysisMarkdown,
          meta: {
            analysisMarkdown,
            providerModel: null,
            errorMessage: null,
          },
        };
      }

      const diffForModel = diff.length > DIFF_LLM_MAX_CHARS ? diff.slice(0, DIFF_LLM_MAX_CHARS) : diff;
      const truncated = diff.length > DIFF_LLM_MAX_CHARS;

      const bundle =
        `Repository: ${fm.owner}/${fm.repo} PR index ${fm.prIndex}\n` +
        `Title: ${fm.title ?? ""}\n` +
        `State: ${fm.state ?? ""}\n` +
        (truncated ? `\n(diff truncated for model input to ${DIFF_LLM_MAX_CHARS} chars)\n` : "") +
        `\n--- unified diff ---\n${diffForModel}`;

      const extractPrompt =
        `${nerveAgentContext}\n\n` +
        `You are a senior reviewer. Analyze this Gitea pull request diff.\n` +
        `Output structured findings as Markdown: scope, files touched, behavior change, risks, test ideas.\n\n` +
        `Optional nerve.yaml context:\n\`\`\`yaml\n${getNerveYaml().slice(0, 4000)}\n\`\`\`\n\n` +
        `---\n${bundle}`;

      const extracted = await llmExtract({
        text: extractPrompt,
        schema: analysisExtractSchema,
        provider,
        dryRun: false,
      });

      if (!extracted.ok) {
        const errText = JSON.stringify(extracted.error);
        return {
          content: `Analyzer LLM error: ${errText}`,
          meta: {
            analysisMarkdown: null,
            providerModel: provider.model,
            errorMessage: errText,
          },
        };
      }

      const analysisMarkdown = extracted.value.analysisMarkdown;
      return {
        content: analysisMarkdown,
        meta: {
          analysisMarkdown,
          providerModel: provider.model,
          errorMessage: null,
        },
      };
    },

    async writer(
      start: StartStep,
      messages: WorkflowMessage[],
    ): Promise<RoleResult<PrSummarizerMeta["writer"]>> {
      const last = messages[messages.length - 1];
      const am = last.meta as PrSummarizerMeta["analyzer"];

      const errOut = (msg: string): RoleResult<PrSummarizerMeta["writer"]> => ({
        content: `## 错误\n\n${msg}`,
        meta: {
          summaryZhMarkdown: `## 错误\n\n${msg}`,
          errorMessage: msg,
        },
      });

      if (last.role !== "analyzer") {
        return errOut("上一则消息不是 analyzer 输出，无法生成总结。");
      }

      if (am.errorMessage !== null) {
        return errOut(`分析阶段失败，未生成臆造总结：${am.errorMessage}`);
      }

      const analysis = am.analysisMarkdown;
      if (analysis === null || analysis.trim() === "") {
        return errOut("分析正文为空，无法生成中文总结。");
      }

      if (isDryRun(start)) {
        const stub = "## dryRun\n\n未调用模型生成中文总结。";
        return {
          content: stub,
          meta: { summaryZhMarkdown: stub, errorMessage: null },
        };
      }

      const provider = await resolveDashScopeProvider();
      if (provider === null) {
        const stub =
          `## 中文摘要（无 LLM）\n\n` +
          `以下为上游分析原文摘录，请配置 DASHSCOPE 相关凭据以生成压缩中文总结。\n\n${analysis.slice(0, 8000)}`;
        return {
          content: stub,
          meta: { summaryZhMarkdown: stub, errorMessage: null },
        };
      }

      const writerPrompt =
        `将下列 PR 技术分析改写为**中文 Markdown**交付物，包含：\n` +
        `- 标题（含仓库与 PR 编号）\n` +
        `- 变更要点（条列）\n` +
        `- 风险与注意事项\n` +
        `- 测试建议\n\n` +
        `---\n${analysis}`;

      const extracted = await llmExtract({
        text: writerPrompt,
        schema: summaryExtractSchema,
        provider,
        dryRun: false,
      });

      if (!extracted.ok) {
        const msg = JSON.stringify(extracted.error);
        return errOut(`Writer LLM 失败: ${msg}`);
      }

      const summaryZhMarkdown = extracted.value.summaryZhMarkdown;
      return {
        content: summaryZhMarkdown,
        meta: {
          summaryZhMarkdown,
          errorMessage: null,
        },
      };
    },
  },

  moderator(context: ModeratorContext<PrSummarizerMeta>) {
    if (context.steps.length === 0) {
      return "fetcher";
    }
    const signal = context.steps[context.steps.length - 1];
    if (signal.role === "fetcher") {
      return "analyzer";
    }
    if (signal.role === "analyzer") {
      return "writer";
    }
    if (signal.role === "writer") {
      return END;
    }
    return END;
  },
};

export default workflow;