fae9e9ed3a
CI / check (pull_request) Successful in 1m45s
Implement the uwf-eval run <task-dir> command with 3-phase pipeline: - prepare: read task.yaml, copy fixture/ to temp workdir - execute: shell out to uwf thread start + exec - collect: run judges, compute weighted score, store CAS node, set @uwf/eval/<task>/latest variable Changes: - src/runner/ — types, prepare, execute, collect, index - src/storage/store.ts — createEvalStore(), setEvalLatest() - src/commands/run.ts — full pipeline wiring with --agent/--model/--count - 9 new tests (prepare + collect + weighted scoring) Builtin judges return placeholder score 0 (Phase 1c). Refs #70
35 lines
933 B
TypeScript
35 lines
933 B
TypeScript
// Judge types
|
|
export type { JudgeInput, JudgeOutput } from "./judge/index.js";
|
|
export type {
|
|
CollectInput,
|
|
CollectResult,
|
|
ExecuteInput,
|
|
ExecuteResult,
|
|
JudgeRunner,
|
|
JudgeRunOutput,
|
|
JudgeSummary,
|
|
PrepareResult,
|
|
RunOptions,
|
|
RunResult,
|
|
} from "./runner/index.js";
|
|
// Runner (prepare → execute → collect)
|
|
export { collect, computeOverall, execute, getEngineVersion, prepare } from "./runner/index.js";
|
|
export type {
|
|
EvalJudgeRecord,
|
|
EvalRunConfig,
|
|
EvalRunPayload,
|
|
EvalStore,
|
|
} from "./storage/index.js";
|
|
// Storage schemas and types
|
|
export {
|
|
createEvalStore,
|
|
EVAL_JUDGE_FRONTMATTER_SCHEMA,
|
|
EVAL_JUDGE_HALLUCINATION_SCHEMA,
|
|
EVAL_JUDGE_TOKEN_STATS_SCHEMA,
|
|
EVAL_JUDGE_UPSTREAM_SCHEMA,
|
|
EVAL_RUN_SCHEMA,
|
|
setEvalLatest,
|
|
} from "./storage/index.js";
|
|
export type { JudgeEntry, TaskLimits, TaskManifest } from "./task/index.js";
|
|
export { loadTaskManifest, parseTaskManifest } from "./task/index.js";
|