Files
united-workforce/packages/eval/src/storage/schemas.ts
T
xiaoju 99619d85db
CI / check (pull_request) Successful in 1m42s
feat: eval package scaffold with CLI, schemas, types, task loader
New package @united-workforce/eval (uwf-eval CLI):

- CLI skeleton: run/report/diff/list subcommands (stubs)
- 5 OCAS schemas: eval-run, judge-frontmatter, judge-upstream,
  judge-hallucination, judge-token-stats
- TaskManifest type + parser/validator for task.yaml
- JudgeOutput/JudgeInput types for judge contract
- EvalRunPayload/EvalRunConfig/EvalJudgeRecord storage types
- 19 unit tests: task loader validation + schema definitions

Refs #69
2026-06-04 23:42:16 +00:00

124 lines
3.2 KiB
TypeScript

import type { JSONSchema } from "@ocas/core";
export const EVAL_RUN_SCHEMA: JSONSchema = {
title: "@uwf/eval-run",
type: "object",
required: ["task", "config", "threadId", "judges", "overall", "timestamp"],
properties: {
task: { type: "string" },
config: {
type: "object",
required: ["agent", "model", "engineVersion"],
properties: {
agent: { type: "string" },
model: { type: "string" },
engineVersion: { type: "string" },
},
},
threadId: { type: "string" },
judges: {
type: "array",
items: {
type: "object",
required: ["name", "score", "weight", "dataHash"],
properties: {
name: { type: "string" },
score: { type: "number" },
weight: { type: "number" },
dataHash: { type: "string" },
},
},
},
overall: { type: "number" },
timestamp: { type: "integer" },
},
};
export const EVAL_JUDGE_FRONTMATTER_SCHEMA: JSONSchema = {
title: "@uwf/eval-judge-frontmatter",
type: "object",
required: ["stepsTotal", "stepsValid", "invalidSteps"],
properties: {
stepsTotal: { type: "integer" },
stepsValid: { type: "integer" },
invalidSteps: {
type: "array",
items: {
type: "object",
required: ["stepIndex", "role", "errors"],
properties: {
stepIndex: { type: "integer" },
role: { type: "string" },
errors: { type: "array", items: { type: "string" } },
},
},
},
},
};
export const EVAL_JUDGE_UPSTREAM_SCHEMA: JSONSchema = {
title: "@uwf/eval-judge-upstream",
type: "object",
required: ["perStep"],
properties: {
perStep: {
type: "array",
items: {
type: "object",
required: ["role", "consumed", "missed", "score"],
properties: {
role: { type: "string" },
consumed: { type: "array", items: { type: "string" } },
missed: { type: "array", items: { type: "string" } },
score: { type: "number" },
},
},
},
},
};
export const EVAL_JUDGE_HALLUCINATION_SCHEMA: JSONSchema = {
title: "@uwf/eval-judge-hallucination",
type: "object",
required: ["perStep"],
properties: {
perStep: {
type: "array",
items: {
type: "object",
required: ["role", "hallucinations", "score"],
properties: {
role: { type: "string" },
hallucinations: { type: "array", items: { type: "string" } },
score: { type: "number" },
},
},
},
},
};
export const EVAL_JUDGE_TOKEN_STATS_SCHEMA: JSONSchema = {
title: "@uwf/eval-judge-token-stats",
type: "object",
required: ["totalInput", "totalOutput", "totalTurns", "perStep"],
properties: {
totalInput: { type: "integer" },
totalOutput: { type: "integer" },
totalTurns: { type: "integer" },
perStep: {
type: "array",
items: {
type: "object",
required: ["role", "inputTokens", "outputTokens", "turns", "duration"],
properties: {
role: { type: "string" },
inputTokens: { type: "integer" },
outputTokens: { type: "integer" },
turns: { type: "integer" },
duration: { type: "number" },
},
},
},
},
};