From 4659258693d17135b81fd41444652fd03d00dc47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Sun, 7 Jun 2026 13:06:59 +0000 Subject: [PATCH] fix: gc must traverse oneOf and preserve template content collectRefs silently skipped oneOf even though it is in the meta-schema's allowed keys. uwf step nodes use the standard JSON-Schema idiom oneOf: [{type:"null"}, {type:"string", format:"ocas_ref"}] for nullable prev/detail/start refs, so walk() never reached the chain and gc swept the intermediate steps as false orphans. Mirror the anyOf branch in collectRefs so every oneOf variant contributes refs. Also align gc with closure.ts Phase 3: walk @ocas/template/text/ content for every reachable schema so rendered template nodes survive when their schema is reachable, and are still collected when the schema itself is unreachable. Fixes #93 --- .changeset/93-gc-collectrefs-oneof.md | 17 ++ packages/cli/tests/gc.test.ts | 97 ++++++++++++ packages/core/src/gc.test.ts | 216 ++++++++++++++++++++++++++ packages/core/src/gc.ts | 42 ++++- packages/core/src/schema.test.ts | 79 ++++++++++ packages/core/src/schema.ts | 11 ++ 6 files changed, 460 insertions(+), 2 deletions(-) create mode 100644 .changeset/93-gc-collectrefs-oneof.md diff --git a/.changeset/93-gc-collectrefs-oneof.md b/.changeset/93-gc-collectrefs-oneof.md new file mode 100644 index 0000000..e0daa0e --- /dev/null +++ b/.changeset/93-gc-collectrefs-oneof.md @@ -0,0 +1,17 @@ +--- +"@ocas/core": patch +--- + +Fix `gc` false-orphan deletion of `oneOf`-linked CAS chains and template content nodes. + +`collectRefs` now traverses `oneOf` sub-schemas (previously skipped even though +the meta-schema accepted the keyword), so `walk`/`refs` correctly follow +`ocas_ref` fields nested inside `oneOf` combinators (e.g. nullable `prev` / +`detail` / `workflow` refs in uwf step chains). + +`gc` no longer treats `@ocas/template/text/*` variables as roots; instead it +walks template content only when its referenced schema is itself reachable +from non-template roots, mirroring `computeClosure` Phase 3. This prevents +orphan template nodes from surviving GC when their schema is unreachable. + +Fixes #93 diff --git a/packages/cli/tests/gc.test.ts b/packages/cli/tests/gc.test.ts index ebcc884..d8d49e6 100644 --- a/packages/cli/tests/gc.test.ts +++ b/packages/cli/tests/gc.test.ts @@ -123,3 +123,100 @@ describe("Phase 6: GC", () => { expect(envValue(afterGc)).toBe(false); }); }); + +// ────────────────────────────────────────────────────────────────────────────── +// Issue #93: gc must not collect uwf-style step chains joined by oneOf prev +// ────────────────────────────────────────────────────────────────────────────── +describe("GC #93 - oneOf step chain CLI integration", () => { + test("6.5 gc preserves step chain joined by oneOf prev", async () => { + const subStore = mkdtempSync(join(tmpdir(), "ocas-e2e-93-")); + try { + const { openStore: openFsStore } = await import("@ocas/fs"); + const { putSchema } = await import("@ocas/core"); + const store = await openFsStore(subStore); + + const stepSchemaHash = putSchema(store, { + type: "object", + properties: { + payload: { type: "string" }, + prev: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const step1File = join(subStore, "step1.json"); + writeFileSync(step1File, JSON.stringify({ payload: "a", prev: null })); + const runCliSub = ( + args: string[], + ): { stdout: string; stderr: string; exitCode: number } => { + try { + const stdout = execFileSync( + "node", + [entrypoint, "--home", subStore, ...args], + { encoding: "utf-8", timeout: 10000 }, + ); + return { stdout: stdout.trim(), stderr: "", exitCode: 0 }; + } catch (e: unknown) { + const err = e as { + stdout?: string; + stderr?: string; + status?: number; + }; + return { + stdout: (err.stdout ?? "").trim(), + stderr: (err.stderr ?? "").trim(), + exitCode: err.status ?? 1, + }; + } + }; + + const { stdout: s1Out } = runCliSub(["put", stepSchemaHash, step1File]); + const step1Hash = envValue(s1Out) as string; + + const step2File = join(subStore, "step2.json"); + writeFileSync( + step2File, + JSON.stringify({ payload: "b", prev: step1Hash }), + ); + const { stdout: s2Out } = runCliSub(["put", stepSchemaHash, step2File]); + const step2Hash = envValue(s2Out) as string; + + const step3File = join(subStore, "step3.json"); + writeFileSync( + step3File, + JSON.stringify({ payload: "c", prev: step2Hash }), + ); + const { stdout: s3Out } = runCliSub(["put", stepSchemaHash, step3File]); + const step3Hash = envValue(s3Out) as string; + + const orphanFile = join(subStore, "orphan-step.json"); + writeFileSync( + orphanFile, + JSON.stringify({ payload: "orphan", prev: null }), + ); + const { stdout: orphanOut } = runCliSub([ + "put", + stepSchemaHash, + orphanFile, + ]); + const orphanHash = envValue(orphanOut) as string; + + runCliSub(["var", "set", "@test/thread/head", step3Hash]); + + const { exitCode } = runCliSub(["gc"]); + expect(exitCode).toBe(0); + + const { stdout: has1 } = runCliSub(["has", step1Hash]); + expect(envValue(has1)).toBe(true); + const { stdout: has2 } = runCliSub(["has", step2Hash]); + expect(envValue(has2)).toBe(true); + const { stdout: has3 } = runCliSub(["has", step3Hash]); + expect(envValue(has3)).toBe(true); + const { stdout: hasOrphan } = runCliSub(["has", orphanHash]); + expect(envValue(hasOrphan)).toBe(false); + } finally { + rmSync(subStore, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/core/src/gc.test.ts b/packages/core/src/gc.test.ts index 78c8a41..42748a3 100644 --- a/packages/core/src/gc.test.ts +++ b/packages/core/src/gc.test.ts @@ -119,3 +119,219 @@ describe("GC - Variable Model Refactoring", () => { expect(store.cas.has(hashB)).toBe(true); }); }); + +// ────────────────────────────────────────────────────────────────────────────── +// Suite B: gc end-to-end with oneOf step chains (issue #93) +// ────────────────────────────────────────────────────────────────────────────── +describe("GC - oneOf step chain preservation (#93)", () => { + test("B.1 preserves a 3-step chain joined by oneOf nullable prev", async () => { + const store = createMemoryStore(); + bootstrap(store); + + const stepSchema = putSchema(store, { + type: "object", + properties: { + payload: { type: "string" }, + prev: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const step1 = store.cas.put(stepSchema, { payload: "a", prev: null }); + const step2 = store.cas.put(stepSchema, { payload: "b", prev: step1 }); + const step3 = store.cas.put(stepSchema, { payload: "c", prev: step2 }); + const orphanStep = store.cas.put(stepSchema, { + payload: "orphan", + prev: null, + }); + + store.var.set("@test/thread/head", step3); + + gc(store); + + expect(store.cas.has(step1)).toBe(true); + expect(store.cas.has(step2)).toBe(true); + expect(store.cas.has(step3)).toBe(true); + expect(store.cas.has(stepSchema)).toBe(true); + expect(store.cas.has(orphanStep)).toBe(false); + }); + + test("B.2 preserves a chain that mixes oneOf detail refs", async () => { + const store = createMemoryStore(); + bootstrap(store); + + const detailSchema = putSchema(store, { + type: "object", + properties: { info: { type: "string" } }, + }); + const stepSchema = putSchema(store, { + type: "object", + properties: { + prev: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + detail: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const detail1 = store.cas.put(detailSchema, { info: "d1" }); + const detail2 = store.cas.put(detailSchema, { info: "d2" }); + const step1 = store.cas.put(stepSchema, { + prev: null, + detail: detail1, + }); + const step2 = store.cas.put(stepSchema, { + prev: step1, + detail: detail2, + }); + + store.var.set("@test/thread/head", step2); + + gc(store); + + expect(store.cas.has(step1)).toBe(true); + expect(store.cas.has(step2)).toBe(true); + expect(store.cas.has(detail1)).toBe(true); + expect(store.cas.has(detail2)).toBe(true); + }); + + test("B.3 preserves a workflow node referenced via oneOf from a step", async () => { + const store = createMemoryStore(); + bootstrap(store); + + const workflowSchema = putSchema(store, { + type: "object", + properties: { name: { type: "string" } }, + }); + const stepSchema = putSchema(store, { + type: "object", + properties: { + workflow: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const workflowNode = store.cas.put(workflowSchema, { + name: "solve-issue", + }); + const step = store.cas.put(stepSchema, { workflow: workflowNode }); + + store.var.set("@test/thread/head", step); + store.var.set("@uwf/registry/solve-issue", workflowNode); + + gc(store); + + expect(store.cas.has(step)).toBe(true); + expect(store.cas.has(workflowNode)).toBe(true); + }); + + test("B.4 regression: existing anyOf traversal still works", async () => { + const store = createMemoryStore(); + bootstrap(store); + + const childSchema = putSchema(store, { type: "string" }); + const parentSchema = putSchema(store, { + type: "object", + properties: { + child: { + anyOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const child = store.cas.put(childSchema, "child-value"); + const parent = store.cas.put(parentSchema, { child }); + + store.var.set("@test/parent", parent); + + gc(store); + + expect(store.cas.has(parent)).toBe(true); + expect(store.cas.has(child)).toBe(true); + }); + + test("B.5 reports correct stats with oneOf chains", async () => { + const store = createMemoryStore(); + bootstrap(store); + + const stepSchema = putSchema(store, { + type: "object", + properties: { + payload: { type: "string" }, + prev: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const step1 = store.cas.put(stepSchema, { payload: "a", prev: null }); + const step2 = store.cas.put(stepSchema, { payload: "b", prev: step1 }); + const step3 = store.cas.put(stepSchema, { payload: "c", prev: step2 }); + store.cas.put(stepSchema, { payload: "orphan", prev: null }); + + store.var.set("@test/thread/head", step3); + + const stats = gc(store); + + expect(stats.collected).toBe(1); + expect(stats.reachable).toBeGreaterThanOrEqual(4); + expect(stats.scanned).toBeGreaterThanOrEqual(1); + }); +}); + +// ────────────────────────────────────────────────────────────────────────────── +// Suite C: gc preserves template content for reachable schemas (issue #93) +// ────────────────────────────────────────────────────────────────────────────── +describe("GC - template content preservation (#93)", () => { + test("C.1 preserves @ocas/template/text/ content when schema is reachable", async () => { + const store = createMemoryStore(); + const aliases = bootstrap(store); + const stringHash = aliases["@ocas/string"] as string; + + const schemaA = putSchema(store, { + type: "object", + properties: { x: { type: "number" } }, + }); + const nodeA = store.cas.put(schemaA, { x: 42 }); + store.var.set("@test/a", nodeA); + + const tplA = store.cas.put(stringHash, "rendered: {{x}}"); + store.var.set(`@ocas/template/text/${schemaA}`, tplA); + + gc(store); + + expect(store.cas.has(tplA)).toBe(true); + + const tplVar = store.var.get(`@ocas/template/text/${schemaA}`); + expect(tplVar).not.toBeNull(); + expect(tplVar?.value).toBe(tplA); + }); + + test("C.2 removes orphan template content for an unreachable schema", async () => { + const store = createMemoryStore(); + const aliases = bootstrap(store); + const stringHash = aliases["@ocas/string"] as string; + + const schemaA = putSchema(store, { + type: "object", + properties: { x: { type: "number" } }, + }); + // Note: do NOT bind any variable to a node typed by schemaA — schemaA is + // unreachable as a typeHash of any reachable node. + + const otherSchema = putSchema(store, { type: "string" }); + const otherNode = store.cas.put(otherSchema, "other"); + store.var.set("@test/other", otherNode); + + const tplA = store.cas.put(stringHash, "rendered: {{x}}"); + store.var.set(`@ocas/template/text/${schemaA}`, tplA); + + gc(store); + + expect(store.cas.has(tplA)).toBe(false); + }); +}); diff --git a/packages/core/src/gc.ts b/packages/core/src/gc.ts index 18e6abe..0d0885c 100644 --- a/packages/core/src/gc.ts +++ b/packages/core/src/gc.ts @@ -8,10 +8,17 @@ export interface GcStats { scanned: number; // Variables scanned as roots } +const TEMPLATE_VAR_PREFIX = "@ocas/template/text/"; + /** * Garbage collection: mark-and-sweep algorithm - * - Roots: all variable values (global, not scoped) + * - Roots: all variable values (global, not scoped), excluding + * `@ocas/template/text/*` variables — those are added in a follow-up + * phase only when their referenced schema is itself reachable. * - Mark: recursively walk refs from roots + * - Template phase: for every reachable schema, walk the contents of its + * `@ocas/template/text/` template variable (mirrors + * `computeClosure` Phase 3). * - Sweep: delete unmarked nodes * - Schema preservation: schemas of reachable nodes are also marked */ @@ -21,9 +28,13 @@ export function gc(store: Store): GcStats { const variables = store.var.list(); const scanned = variables.length; - // Collect unique root hashes from all variables + // Collect unique root hashes from all variables, except template + // variables (`@ocas/template/text/*`). Template variables are processed + // in a follow-up phase so their content is preserved only when the + // referenced schema is itself reachable from non-template roots. const roots = new Set(); for (const variable of variables) { + if (variable.name.startsWith(TEMPLATE_VAR_PREFIX)) continue; roots.add(variable.value); } @@ -63,6 +74,33 @@ export function gc(store: Store): GcStats { } } + // Template phase: include `@ocas/template/text/` content nodes + // when their schema is in the reachable set (mirrors closure.ts Phase 3). + // Snapshot the current reachable set before walking template content so + // that template-only nodes do not transitively pull in further templates. + const reachableSnapshot = [...reachable]; + for (const hash of reachableSnapshot) { + const templateName = `${TEMPLATE_VAR_PREFIX}${hash}`; + const variants = store.var.list({ exactName: templateName }); + for (const variant of variants) { + walk(store, variant.value, (h, n) => { + reachable.add(h); + reachable.add(n.type); + }); + // Walk the template content's schema chain too + const tNode = store.cas.get(variant.value); + if (tNode) { + let current: Hash | null = tNode.type; + while (current !== null && !reachable.has(current)) { + reachable.add(current); + const node = store.cas.get(current); + if (!node || node.type === current) break; + current = node.type; + } + } + } + } + // Preserve all self-referencing nodes (bootstrap meta-schema) // These are nodes where type === hash const allHashes = store.cas.listAll(); diff --git a/packages/core/src/schema.test.ts b/packages/core/src/schema.test.ts index 2c5ad4d..7f34b00 100644 --- a/packages/core/src/schema.test.ts +++ b/packages/core/src/schema.test.ts @@ -743,3 +743,82 @@ describe("bootstrap meta-schema self-reference", () => { expect(refList).toContain(targetHash); }); }); + +// ────────────────────────────────────────────────────────────────────────────── +// Suite A: collectRefs() oneOf traversal (issue #93) +// ────────────────────────────────────────────────────────────────────────────── +describe("collectRefs oneOf traversal", () => { + test("A.1 returns the ref hash from the chosen oneOf branch (string variant)", async () => { + const store = createMemoryStore(); + const innerSchema = putSchema(store, { type: "string" }); + const schema = putSchema(store, { + type: "object", + properties: { + prev: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const targetHash = store.cas.put(innerSchema, "target"); + const nodeHash = store.cas.put(schema, { prev: targetHash }); + const node = store.cas.get(nodeHash) as CasNode; + + expect(refs(store, node)).toContain(targetHash); + }); + + test("A.2 returns no ref when oneOf matches the null variant", async () => { + const store = createMemoryStore(); + const schema = putSchema(store, { + type: "object", + properties: { + prev: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const nodeHash = store.cas.put(schema, { prev: null }); + const node = store.cas.get(nodeHash) as CasNode; + + expect(refs(store, node)).toEqual([]); + }); + + test("A.3 traverses nested combinators inside oneOf", async () => { + const store = createMemoryStore(); + const innerSchema = putSchema(store, { type: "string" }); + const schema = putSchema(store, { + oneOf: [ + { type: "null" }, + { + type: "object", + properties: { ref: { type: "string", format: "ocas_ref" } }, + }, + ], + }); + + const targetHash = store.cas.put(innerSchema, "target"); + const nodeHash = store.cas.put(schema, { ref: targetHash }); + const node = store.cas.get(nodeHash) as CasNode; + + expect(refs(store, node)).toContain(targetHash); + }); + + test("A.4 multiple ref branches in oneOf all surface", async () => { + const store = createMemoryStore(); + const innerSchema = putSchema(store, { type: "string" }); + const schema = putSchema(store, { + oneOf: [ + { type: "string", format: "ocas_ref" }, + { type: "string", format: "ocas_ref" }, + ], + }); + + const targetHash = store.cas.put(innerSchema, "target"); + const nodeHash = store.cas.put(schema, targetHash); + const node = store.cas.get(nodeHash) as CasNode; + + const result = refs(store, node); + expect(result).toContain(targetHash); + }); +}); diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 63d69d3..ed6a468 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -311,6 +311,17 @@ export function collectRefs(schema: JSONSchema, value: unknown): Hash[] { return result; } + // oneOf — JSON Schema requires exactly one branch to validate, but for + // ref collection we conservatively traverse every branch (the meta-schema + // accepts oneOf alongside anyOf, and we cannot statically know which + // branch the value will match). Mirrors anyOf handling. + if (Array.isArray(schema.oneOf)) { + for (const sub of schema.oneOf as JSONSchema[]) { + result.push(...collectRefs(sub, value)); + } + return result; + } + // P2: allOf — each sub-schema applies to the same value if (Array.isArray(schema.allOf)) { for (const sub of schema.allOf as JSONSchema[]) {