diff --git a/packages/eval/__tests__/schemas.test.ts b/packages/eval/__tests__/schemas.test.ts new file mode 100644 index 0000000..5e760d4 --- /dev/null +++ b/packages/eval/__tests__/schemas.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, test } from "vitest"; +import { + EVAL_JUDGE_FRONTMATTER_SCHEMA, + EVAL_JUDGE_HALLUCINATION_SCHEMA, + EVAL_JUDGE_TOKEN_STATS_SCHEMA, + EVAL_JUDGE_UPSTREAM_SCHEMA, + EVAL_RUN_SCHEMA, +} from "../src/storage/index.js"; + +describe("OCAS schema definitions", () => { + test("eval-run schema has correct title and required fields", () => { + expect(EVAL_RUN_SCHEMA.title).toBe("@uwf/eval-run"); + const required = EVAL_RUN_SCHEMA.required as string[]; + expect(required).toContain("task"); + expect(required).toContain("config"); + expect(required).toContain("threadId"); + expect(required).toContain("judges"); + expect(required).toContain("overall"); + expect(required).toContain("timestamp"); + }); + + test("frontmatter judge schema has correct title", () => { + expect(EVAL_JUDGE_FRONTMATTER_SCHEMA.title).toBe("@uwf/eval-judge-frontmatter"); + const required = EVAL_JUDGE_FRONTMATTER_SCHEMA.required as string[]; + expect(required).toContain("stepsTotal"); + expect(required).toContain("stepsValid"); + expect(required).toContain("invalidSteps"); + }); + + test("upstream judge schema has correct title", () => { + expect(EVAL_JUDGE_UPSTREAM_SCHEMA.title).toBe("@uwf/eval-judge-upstream"); + const required = EVAL_JUDGE_UPSTREAM_SCHEMA.required as string[]; + expect(required).toContain("perStep"); + }); + + test("hallucination judge schema has correct title", () => { + expect(EVAL_JUDGE_HALLUCINATION_SCHEMA.title).toBe("@uwf/eval-judge-hallucination"); + const required = EVAL_JUDGE_HALLUCINATION_SCHEMA.required as string[]; + expect(required).toContain("perStep"); + }); + + test("token-stats judge schema has correct title", () => { + expect(EVAL_JUDGE_TOKEN_STATS_SCHEMA.title).toBe("@uwf/eval-judge-token-stats"); + const required = EVAL_JUDGE_TOKEN_STATS_SCHEMA.required as string[]; + expect(required).toContain("totalInput"); + expect(required).toContain("totalOutput"); + expect(required).toContain("totalTurns"); + expect(required).toContain("perStep"); + }); + + test("all schemas have type object at root", () => { + const schemas = [ + EVAL_RUN_SCHEMA, + EVAL_JUDGE_FRONTMATTER_SCHEMA, + EVAL_JUDGE_UPSTREAM_SCHEMA, + EVAL_JUDGE_HALLUCINATION_SCHEMA, + EVAL_JUDGE_TOKEN_STATS_SCHEMA, + ]; + for (const s of schemas) { + expect(s.type).toBe("object"); + } + }); +}); diff --git a/packages/eval/__tests__/task-loader.test.ts b/packages/eval/__tests__/task-loader.test.ts new file mode 100644 index 0000000..bbbc857 --- /dev/null +++ b/packages/eval/__tests__/task-loader.test.ts @@ -0,0 +1,163 @@ +import { describe, expect, test } from "vitest"; +import { parseTaskManifest } from "../src/task/index.js"; + +const VALID_YAML = ` +name: fix-off-by-one +description: Fix an off-by-one error in a calculator +workflow: solve-issue +prompt: "Fix the bug: add(1,2) returns 4 instead of 3" +limits: + maxSteps: 15 + timeoutMinutes: 30 +judges: + - name: frontmatter-compliance + weight: 0.15 + builtin: true + - name: test-pass + weight: 0.3 + entry: dist/judges/test-pass.js + schema: schemas/test-pass.json +`; + +describe("parseTaskManifest", () => { + test("parses valid task.yaml", () => { + const manifest = parseTaskManifest(VALID_YAML); + expect(manifest.name).toBe("fix-off-by-one"); + expect(manifest.description).toBe("Fix an off-by-one error in a calculator"); + expect(manifest.workflow).toBe("solve-issue"); + expect(manifest.prompt).toBe("Fix the bug: add(1,2) returns 4 instead of 3"); + expect(manifest.limits).toEqual({ maxSteps: 15, timeoutMinutes: 30 }); + expect(manifest.judges).toHaveLength(2); + }); + + test("parses builtin judge", () => { + const manifest = parseTaskManifest(VALID_YAML); + const builtin = manifest.judges[0]; + expect(builtin).toBeDefined(); + expect(builtin!.name).toBe("frontmatter-compliance"); + expect(builtin!.weight).toBe(0.15); + expect(builtin!.builtin).toBe(true); + expect(builtin!.entry).toBeNull(); + }); + + test("parses custom judge with entry + schema", () => { + const manifest = parseTaskManifest(VALID_YAML); + const custom = manifest.judges[1]; + expect(custom).toBeDefined(); + expect(custom!.name).toBe("test-pass"); + expect(custom!.weight).toBe(0.3); + expect(custom!.builtin).toBe(false); + expect(custom!.entry).toBe("dist/judges/test-pass.js"); + expect(custom!.schema).toBe("schemas/test-pass.json"); + }); + + test("defaults limits when omitted", () => { + const yaml = ` +name: minimal +workflow: solve-issue +prompt: do something +judges: + - name: check + builtin: true +`; + const manifest = parseTaskManifest(yaml); + expect(manifest.limits).toEqual({ maxSteps: 20, timeoutMinutes: 30 }); + }); + + test("defaults description to empty string", () => { + const yaml = ` +name: no-desc +workflow: solve-issue +prompt: do something +judges: + - name: check + builtin: true +`; + const manifest = parseTaskManifest(yaml); + expect(manifest.description).toBe(""); + }); + + test("rejects missing name", () => { + const yaml = ` +workflow: solve-issue +prompt: do something +judges: + - name: check + builtin: true +`; + expect(() => parseTaskManifest(yaml)).toThrow("name is required"); + }); + + test("rejects missing workflow", () => { + const yaml = ` +name: test +prompt: do something +judges: + - name: check + builtin: true +`; + expect(() => parseTaskManifest(yaml)).toThrow("workflow is required"); + }); + + test("rejects missing prompt", () => { + const yaml = ` +name: test +workflow: solve-issue +judges: + - name: check + builtin: true +`; + expect(() => parseTaskManifest(yaml)).toThrow("prompt is required"); + }); + + test("rejects empty judges array", () => { + const yaml = ` +name: test +workflow: solve-issue +prompt: do something +judges: [] +`; + expect(() => parseTaskManifest(yaml)).toThrow("at least one judge"); + }); + + test("rejects non-builtin judge without entry", () => { + const yaml = ` +name: test +workflow: solve-issue +prompt: do something +judges: + - name: custom-check + weight: 0.5 +`; + expect(() => parseTaskManifest(yaml)).toThrow("non-builtin judge must have entry"); + }); + + test("rejects non-object YAML root", () => { + expect(() => parseTaskManifest("just a string")).toThrow("must be a YAML mapping"); + }); + + test("rejects judge without name", () => { + const yaml = ` +name: test +workflow: solve-issue +prompt: do something +judges: + - weight: 0.5 + builtin: true +`; + expect(() => parseTaskManifest(yaml)).toThrow("name is required"); + }); + + test("defaults weight to 0 when omitted", () => { + const yaml = ` +name: test +workflow: solve-issue +prompt: do something +judges: + - name: token-stats + builtin: true +`; + const manifest = parseTaskManifest(yaml); + expect(manifest.judges[0]!.weight).toBe(0); + }); +}); diff --git a/packages/eval/package.json b/packages/eval/package.json new file mode 100644 index 0000000..696b2a2 --- /dev/null +++ b/packages/eval/package.json @@ -0,0 +1,46 @@ +{ + "name": "@united-workforce/eval", + "version": "0.1.0", + "private": true, + "files": [ + "src", + "dist", + "package.json" + ], + "type": "module", + "bin": { + "uwf-eval": "./dist/cli.js" + }, + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "scripts": { + "prepublishOnly": "echo 'Use pnpm run release from repo root' && exit 1", + "test": "vitest run __tests__/", + "test:ci": "vitest run __tests__/" + }, + "dependencies": { + "@ocas/core": "^0.3.0", + "@ocas/fs": "^0.3.0", + "@united-workforce/protocol": "workspace:^", + "@united-workforce/util": "workspace:^", + "commander": "^14.0.3", + "yaml": "^2.9.0" + }, + "devDependencies": { + "typescript": "^5.8.3" + }, + "repository": { + "type": "git", + "url": "https://git.shazhou.work/shazhou/united-workforce.git", + "directory": "packages/eval" + }, + "homepage": "https://git.shazhou.work/shazhou/united-workforce#readme", + "bugs": { + "url": "https://git.shazhou.work/shazhou/united-workforce/issues" + }, + "license": "MIT" +} diff --git a/packages/eval/src/cli.ts b/packages/eval/src/cli.ts new file mode 100644 index 0000000..00c89b5 --- /dev/null +++ b/packages/eval/src/cli.ts @@ -0,0 +1,22 @@ +#!/usr/bin/env node +import { Command } from "commander"; +import { + registerDiffCommand, + registerListCommand, + registerReportCommand, + registerRunCommand, +} from "./commands/index.js"; + +const program = new Command(); + +program + .name("uwf-eval") + .description("Evaluate uwf workflow quality with real agents") + .version("0.1.0"); + +registerRunCommand(program); +registerReportCommand(program); +registerDiffCommand(program); +registerListCommand(program); + +program.parse(); diff --git a/packages/eval/src/commands/diff.ts b/packages/eval/src/commands/diff.ts new file mode 100644 index 0000000..fa443d8 --- /dev/null +++ b/packages/eval/src/commands/diff.ts @@ -0,0 +1,11 @@ +import type { Command } from "commander"; + +export function registerDiffCommand(program: Command): void { + program + .command("diff ") + .description("Compare two eval runs side-by-side") + .action(async (_hash1: string, _hash2: string) => { + process.stderr.write("uwf-eval diff: not yet implemented\n"); + process.exitCode = 1; + }); +} diff --git a/packages/eval/src/commands/index.ts b/packages/eval/src/commands/index.ts new file mode 100644 index 0000000..0dded07 --- /dev/null +++ b/packages/eval/src/commands/index.ts @@ -0,0 +1,4 @@ +export { registerDiffCommand } from "./diff.js"; +export { registerListCommand } from "./list.js"; +export { registerReportCommand } from "./report.js"; +export { registerRunCommand } from "./run.js"; diff --git a/packages/eval/src/commands/list.ts b/packages/eval/src/commands/list.ts new file mode 100644 index 0000000..9c4bd0c --- /dev/null +++ b/packages/eval/src/commands/list.ts @@ -0,0 +1,13 @@ +import type { Command } from "commander"; + +export function registerListCommand(program: Command): void { + program + .command("list") + .description("List past eval runs") + .option("--task ", "filter by task name") + .option("--limit ", "max results", "20") + .action(async (_opts: Record) => { + process.stderr.write("uwf-eval list: not yet implemented\n"); + process.exitCode = 1; + }); +} diff --git a/packages/eval/src/commands/report.ts b/packages/eval/src/commands/report.ts new file mode 100644 index 0000000..db3e8d6 --- /dev/null +++ b/packages/eval/src/commands/report.ts @@ -0,0 +1,11 @@ +import type { Command } from "commander"; + +export function registerReportCommand(program: Command): void { + program + .command("report ") + .description("Show eval run results") + .action(async (_hash: string) => { + process.stderr.write("uwf-eval report: not yet implemented\n"); + process.exitCode = 1; + }); +} diff --git a/packages/eval/src/commands/run.ts b/packages/eval/src/commands/run.ts new file mode 100644 index 0000000..4bc6b08 --- /dev/null +++ b/packages/eval/src/commands/run.ts @@ -0,0 +1,14 @@ +import type { Command } from "commander"; + +export function registerRunCommand(program: Command): void { + program + .command("run ") + .description("Run eval on a task directory or tarball") + .option("--agent ", "agent adapter to use", "hermes") + .option("--model ", "model override") + .option("--count ", "number of eval runs", "1") + .action(async (_task: string, _opts: Record) => { + process.stderr.write("uwf-eval run: not yet implemented\n"); + process.exitCode = 1; + }); +} diff --git a/packages/eval/src/index.ts b/packages/eval/src/index.ts new file mode 100644 index 0000000..69c6e69 --- /dev/null +++ b/packages/eval/src/index.ts @@ -0,0 +1,15 @@ +// Task manifest + +// Judge types +export type { JudgeInput, JudgeOutput } from "./judge/index.js"; +export type { EvalJudgeRecord, EvalRunConfig, EvalRunPayload } from "./storage/index.js"; +// Storage schemas and types +export { + EVAL_JUDGE_FRONTMATTER_SCHEMA, + EVAL_JUDGE_HALLUCINATION_SCHEMA, + EVAL_JUDGE_TOKEN_STATS_SCHEMA, + EVAL_JUDGE_UPSTREAM_SCHEMA, + EVAL_RUN_SCHEMA, +} from "./storage/index.js"; +export type { JudgeEntry, TaskLimits, TaskManifest } from "./task/index.js"; +export { loadTaskManifest, parseTaskManifest } from "./task/index.js"; diff --git a/packages/eval/src/judge/index.ts b/packages/eval/src/judge/index.ts new file mode 100644 index 0000000..ebb4dfb --- /dev/null +++ b/packages/eval/src/judge/index.ts @@ -0,0 +1 @@ +export type { JudgeInput, JudgeOutput } from "./types.js"; diff --git a/packages/eval/src/judge/types.ts b/packages/eval/src/judge/types.ts new file mode 100644 index 0000000..93b506f --- /dev/null +++ b/packages/eval/src/judge/types.ts @@ -0,0 +1,15 @@ +/** Output shape every judge must produce on stdout (JSON). */ +export type JudgeOutput = { + /** Score between 0.0 and 1.0. */ + score: number; + /** Judge-specific structured data, stored in CAS with its own schema. */ + data: T; +}; + +/** Input context passed to judge scripts via argv. */ +export type JudgeInput = { + /** Working directory where the task was executed. */ + cwd: string; + /** Thread ID of the eval run. */ + threadId: string; +}; diff --git a/packages/eval/src/storage/index.ts b/packages/eval/src/storage/index.ts new file mode 100644 index 0000000..8b0d554 --- /dev/null +++ b/packages/eval/src/storage/index.ts @@ -0,0 +1,8 @@ +export { + EVAL_JUDGE_FRONTMATTER_SCHEMA, + EVAL_JUDGE_HALLUCINATION_SCHEMA, + EVAL_JUDGE_TOKEN_STATS_SCHEMA, + EVAL_JUDGE_UPSTREAM_SCHEMA, + EVAL_RUN_SCHEMA, +} from "./schemas.js"; +export type { EvalJudgeRecord, EvalRunConfig, EvalRunPayload } from "./types.js"; diff --git a/packages/eval/src/storage/schemas.ts b/packages/eval/src/storage/schemas.ts new file mode 100644 index 0000000..a7809c6 --- /dev/null +++ b/packages/eval/src/storage/schemas.ts @@ -0,0 +1,123 @@ +import type { JSONSchema } from "@ocas/core"; + +export const EVAL_RUN_SCHEMA: JSONSchema = { + title: "@uwf/eval-run", + type: "object", + required: ["task", "config", "threadId", "judges", "overall", "timestamp"], + properties: { + task: { type: "string" }, + config: { + type: "object", + required: ["agent", "model", "engineVersion"], + properties: { + agent: { type: "string" }, + model: { type: "string" }, + engineVersion: { type: "string" }, + }, + }, + threadId: { type: "string" }, + judges: { + type: "array", + items: { + type: "object", + required: ["name", "score", "weight", "dataHash"], + properties: { + name: { type: "string" }, + score: { type: "number" }, + weight: { type: "number" }, + dataHash: { type: "string" }, + }, + }, + }, + overall: { type: "number" }, + timestamp: { type: "integer" }, + }, +}; + +export const EVAL_JUDGE_FRONTMATTER_SCHEMA: JSONSchema = { + title: "@uwf/eval-judge-frontmatter", + type: "object", + required: ["stepsTotal", "stepsValid", "invalidSteps"], + properties: { + stepsTotal: { type: "integer" }, + stepsValid: { type: "integer" }, + invalidSteps: { + type: "array", + items: { + type: "object", + required: ["stepIndex", "role", "errors"], + properties: { + stepIndex: { type: "integer" }, + role: { type: "string" }, + errors: { type: "array", items: { type: "string" } }, + }, + }, + }, + }, +}; + +export const EVAL_JUDGE_UPSTREAM_SCHEMA: JSONSchema = { + title: "@uwf/eval-judge-upstream", + type: "object", + required: ["perStep"], + properties: { + perStep: { + type: "array", + items: { + type: "object", + required: ["role", "consumed", "missed", "score"], + properties: { + role: { type: "string" }, + consumed: { type: "array", items: { type: "string" } }, + missed: { type: "array", items: { type: "string" } }, + score: { type: "number" }, + }, + }, + }, + }, +}; + +export const EVAL_JUDGE_HALLUCINATION_SCHEMA: JSONSchema = { + title: "@uwf/eval-judge-hallucination", + type: "object", + required: ["perStep"], + properties: { + perStep: { + type: "array", + items: { + type: "object", + required: ["role", "hallucinations", "score"], + properties: { + role: { type: "string" }, + hallucinations: { type: "array", items: { type: "string" } }, + score: { type: "number" }, + }, + }, + }, + }, +}; + +export const EVAL_JUDGE_TOKEN_STATS_SCHEMA: JSONSchema = { + title: "@uwf/eval-judge-token-stats", + type: "object", + required: ["totalInput", "totalOutput", "totalTurns", "perStep"], + properties: { + totalInput: { type: "integer" }, + totalOutput: { type: "integer" }, + totalTurns: { type: "integer" }, + perStep: { + type: "array", + items: { + type: "object", + required: ["role", "inputTokens", "outputTokens", "turns", "duration"], + properties: { + role: { type: "string" }, + inputTokens: { type: "integer" }, + outputTokens: { type: "integer" }, + turns: { type: "integer" }, + duration: { type: "number" }, + }, + }, + }, + }, +}; diff --git a/packages/eval/src/storage/types.ts b/packages/eval/src/storage/types.ts new file mode 100644 index 0000000..862e45c --- /dev/null +++ b/packages/eval/src/storage/types.ts @@ -0,0 +1,26 @@ +import type { CasRef } from "@united-workforce/protocol"; + +/** A single judge result within an eval run. */ +export type EvalJudgeRecord = { + name: string; + score: number; + weight: number; + dataHash: CasRef; +}; + +/** Config snapshot for an eval run. */ +export type EvalRunConfig = { + agent: string; + model: string; + engineVersion: string; +}; + +/** Full eval run record stored in CAS. */ +export type EvalRunPayload = { + task: string; + config: EvalRunConfig; + threadId: string; + judges: EvalJudgeRecord[]; + overall: number; + timestamp: number; +}; diff --git a/packages/eval/src/task/index.ts b/packages/eval/src/task/index.ts new file mode 100644 index 0000000..67009e3 --- /dev/null +++ b/packages/eval/src/task/index.ts @@ -0,0 +1,2 @@ +export { loadTaskManifest, parseTaskManifest } from "./loader.js"; +export type { JudgeEntry, TaskLimits, TaskManifest } from "./types.js"; diff --git a/packages/eval/src/task/loader.ts b/packages/eval/src/task/loader.ts new file mode 100644 index 0000000..1257c3b --- /dev/null +++ b/packages/eval/src/task/loader.ts @@ -0,0 +1,74 @@ +import { readFile } from "node:fs/promises"; +import { join } from "node:path"; +import { parse as parseYaml } from "yaml"; +import type { JudgeEntry, TaskLimits, TaskManifest } from "./types.js"; + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function parseJudgeEntry(raw: unknown, index: number): JudgeEntry { + if (!isRecord(raw)) { + throw new Error(`judges[${index}]: expected object`); + } + const name = raw.name; + if (typeof name !== "string" || name === "") { + throw new Error(`judges[${index}]: name is required`); + } + const weight = typeof raw.weight === "number" ? raw.weight : 0; + const builtin = raw.builtin === true; + const entry = typeof raw.entry === "string" ? raw.entry : null; + const schema = typeof raw.schema === "string" ? raw.schema : null; + if (!builtin && entry === null) { + throw new Error(`judges[${index}] "${name}": non-builtin judge must have entry`); + } + return { name, weight, builtin, entry, schema }; +} + +function parseLimits(raw: unknown): TaskLimits { + if (!isRecord(raw)) { + return { maxSteps: 20, timeoutMinutes: 30 }; + } + return { + maxSteps: typeof raw.maxSteps === "number" ? raw.maxSteps : 20, + timeoutMinutes: typeof raw.timeoutMinutes === "number" ? raw.timeoutMinutes : 30, + }; +} + +/** Parse and validate a task.yaml file into a TaskManifest. */ +export function parseTaskManifest(yamlText: string): TaskManifest { + const raw = parseYaml(yamlText) as unknown; + if (!isRecord(raw)) { + throw new Error("task.yaml must be a YAML mapping"); + } + const name = raw.name; + if (typeof name !== "string" || name === "") { + throw new Error("task.yaml: name is required"); + } + const description = typeof raw.description === "string" ? raw.description : ""; + const workflow = raw.workflow; + if (typeof workflow !== "string" || workflow === "") { + throw new Error("task.yaml: workflow is required"); + } + const prompt = raw.prompt; + if (typeof prompt !== "string" || prompt === "") { + throw new Error("task.yaml: prompt is required"); + } + const limits = parseLimits(raw.limits); + const judgesRaw = raw.judges; + if (!Array.isArray(judgesRaw) || judgesRaw.length === 0) { + throw new Error("task.yaml: at least one judge is required"); + } + const judges: JudgeEntry[] = []; + for (let i = 0; i < judgesRaw.length; i++) { + judges.push(parseJudgeEntry(judgesRaw[i], i)); + } + return { name, description, workflow, prompt, limits, judges }; +} + +/** Load and parse task.yaml from a directory. */ +export async function loadTaskManifest(taskDir: string): Promise { + const yamlPath = join(taskDir, "task.yaml"); + const text = await readFile(yamlPath, "utf8"); + return parseTaskManifest(text); +} diff --git a/packages/eval/src/task/types.ts b/packages/eval/src/task/types.ts new file mode 100644 index 0000000..80f27e5 --- /dev/null +++ b/packages/eval/src/task/types.ts @@ -0,0 +1,28 @@ +/** Judge entry in task.yaml */ +export type JudgeEntry = { + name: string; + weight: number; + builtin: boolean; + /** Path to judge entry script (relative to task root). Required for non-builtin judges. */ + entry: string | null; + /** Path to OCAS schema JSON for judge data. Required for non-builtin judges. */ + schema: string | null; +}; + +/** Limits for eval execution. */ +export type TaskLimits = { + maxSteps: number; + timeoutMinutes: number; +}; + +/** Parsed task.yaml manifest. */ +export type TaskManifest = { + name: string; + description: string; + /** Workflow name or relative path to .yaml file. */ + workflow: string; + /** Initial prompt for thread start. */ + prompt: string; + limits: TaskLimits; + judges: JudgeEntry[]; +}; diff --git a/packages/eval/tsconfig.json b/packages/eval/tsconfig.json new file mode 100644 index 0000000..e8bf6d5 --- /dev/null +++ b/packages/eval/tsconfig.json @@ -0,0 +1,9 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "rootDir": "src", + "outDir": "dist" + }, + "include": ["src"], + "references": [{ "path": "../protocol" }, { "path": "../util" }] +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 11093aa..299e83d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -228,6 +228,31 @@ importers: specifier: ^8.0.13 version: 8.0.16(@types/node@25.9.1)(esbuild@0.27.7)(jiti@2.7.0)(yaml@2.9.0) + packages/eval: + dependencies: + '@ocas/core': + specifier: ^0.3.0 + version: 0.3.0 + '@ocas/fs': + specifier: ^0.3.0 + version: 0.3.0 + '@united-workforce/protocol': + specifier: workspace:^ + version: link:../protocol + '@united-workforce/util': + specifier: workspace:^ + version: link:../util + commander: + specifier: ^14.0.3 + version: 14.0.3 + yaml: + specifier: ^2.9.0 + version: 2.9.0 + devDependencies: + typescript: + specifier: ^5.8.3 + version: 5.9.3 + packages/protocol: dependencies: '@ocas/core': diff --git a/tsconfig.json b/tsconfig.json index 76e1129..7cba641 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -25,6 +25,7 @@ { "path": "packages/agent-builtin" }, { "path": "packages/agent-mock" }, { "path": "packages/agent-claude-code" }, - { "path": "packages/cli" } + { "path": "packages/cli" }, + { "path": "packages/eval" } ] }