diff --git a/.changeset/93-gc-collectrefs-oneof.md b/.changeset/93-gc-collectrefs-oneof.md new file mode 100644 index 0000000..e0daa0e --- /dev/null +++ b/.changeset/93-gc-collectrefs-oneof.md @@ -0,0 +1,17 @@ +--- +"@ocas/core": patch +--- + +Fix `gc` false-orphan deletion of `oneOf`-linked CAS chains and template content nodes. + +`collectRefs` now traverses `oneOf` sub-schemas (previously skipped even though +the meta-schema accepted the keyword), so `walk`/`refs` correctly follow +`ocas_ref` fields nested inside `oneOf` combinators (e.g. nullable `prev` / +`detail` / `workflow` refs in uwf step chains). + +`gc` no longer treats `@ocas/template/text/*` variables as roots; instead it +walks template content only when its referenced schema is itself reachable +from non-template roots, mirroring `computeClosure` Phase 3. This prevents +orphan template nodes from surviving GC when their schema is unreachable. + +Fixes #93 diff --git a/packages/cli/tests/gc.test.ts b/packages/cli/tests/gc.test.ts index ebcc884..d8d49e6 100644 --- a/packages/cli/tests/gc.test.ts +++ b/packages/cli/tests/gc.test.ts @@ -123,3 +123,100 @@ describe("Phase 6: GC", () => { expect(envValue(afterGc)).toBe(false); }); }); + +// ────────────────────────────────────────────────────────────────────────────── +// Issue #93: gc must not collect uwf-style step chains joined by oneOf prev +// ────────────────────────────────────────────────────────────────────────────── +describe("GC #93 - oneOf step chain CLI integration", () => { + test("6.5 gc preserves step chain joined by oneOf prev", async () => { + const subStore = mkdtempSync(join(tmpdir(), "ocas-e2e-93-")); + try { + const { openStore: openFsStore } = await import("@ocas/fs"); + const { putSchema } = await import("@ocas/core"); + const store = await openFsStore(subStore); + + const stepSchemaHash = putSchema(store, { + type: "object", + properties: { + payload: { type: "string" }, + prev: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const step1File = join(subStore, "step1.json"); + writeFileSync(step1File, JSON.stringify({ payload: "a", prev: null })); + const runCliSub = ( + args: string[], + ): { stdout: string; stderr: string; exitCode: number } => { + try { + const stdout = execFileSync( + "node", + [entrypoint, "--home", subStore, ...args], + { encoding: "utf-8", timeout: 10000 }, + ); + return { stdout: stdout.trim(), stderr: "", exitCode: 0 }; + } catch (e: unknown) { + const err = e as { + stdout?: string; + stderr?: string; + status?: number; + }; + return { + stdout: (err.stdout ?? "").trim(), + stderr: (err.stderr ?? "").trim(), + exitCode: err.status ?? 1, + }; + } + }; + + const { stdout: s1Out } = runCliSub(["put", stepSchemaHash, step1File]); + const step1Hash = envValue(s1Out) as string; + + const step2File = join(subStore, "step2.json"); + writeFileSync( + step2File, + JSON.stringify({ payload: "b", prev: step1Hash }), + ); + const { stdout: s2Out } = runCliSub(["put", stepSchemaHash, step2File]); + const step2Hash = envValue(s2Out) as string; + + const step3File = join(subStore, "step3.json"); + writeFileSync( + step3File, + JSON.stringify({ payload: "c", prev: step2Hash }), + ); + const { stdout: s3Out } = runCliSub(["put", stepSchemaHash, step3File]); + const step3Hash = envValue(s3Out) as string; + + const orphanFile = join(subStore, "orphan-step.json"); + writeFileSync( + orphanFile, + JSON.stringify({ payload: "orphan", prev: null }), + ); + const { stdout: orphanOut } = runCliSub([ + "put", + stepSchemaHash, + orphanFile, + ]); + const orphanHash = envValue(orphanOut) as string; + + runCliSub(["var", "set", "@test/thread/head", step3Hash]); + + const { exitCode } = runCliSub(["gc"]); + expect(exitCode).toBe(0); + + const { stdout: has1 } = runCliSub(["has", step1Hash]); + expect(envValue(has1)).toBe(true); + const { stdout: has2 } = runCliSub(["has", step2Hash]); + expect(envValue(has2)).toBe(true); + const { stdout: has3 } = runCliSub(["has", step3Hash]); + expect(envValue(has3)).toBe(true); + const { stdout: hasOrphan } = runCliSub(["has", orphanHash]); + expect(envValue(hasOrphan)).toBe(false); + } finally { + rmSync(subStore, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/core/src/gc.test.ts b/packages/core/src/gc.test.ts index 78c8a41..42748a3 100644 --- a/packages/core/src/gc.test.ts +++ b/packages/core/src/gc.test.ts @@ -119,3 +119,219 @@ describe("GC - Variable Model Refactoring", () => { expect(store.cas.has(hashB)).toBe(true); }); }); + +// ────────────────────────────────────────────────────────────────────────────── +// Suite B: gc end-to-end with oneOf step chains (issue #93) +// ────────────────────────────────────────────────────────────────────────────── +describe("GC - oneOf step chain preservation (#93)", () => { + test("B.1 preserves a 3-step chain joined by oneOf nullable prev", async () => { + const store = createMemoryStore(); + bootstrap(store); + + const stepSchema = putSchema(store, { + type: "object", + properties: { + payload: { type: "string" }, + prev: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const step1 = store.cas.put(stepSchema, { payload: "a", prev: null }); + const step2 = store.cas.put(stepSchema, { payload: "b", prev: step1 }); + const step3 = store.cas.put(stepSchema, { payload: "c", prev: step2 }); + const orphanStep = store.cas.put(stepSchema, { + payload: "orphan", + prev: null, + }); + + store.var.set("@test/thread/head", step3); + + gc(store); + + expect(store.cas.has(step1)).toBe(true); + expect(store.cas.has(step2)).toBe(true); + expect(store.cas.has(step3)).toBe(true); + expect(store.cas.has(stepSchema)).toBe(true); + expect(store.cas.has(orphanStep)).toBe(false); + }); + + test("B.2 preserves a chain that mixes oneOf detail refs", async () => { + const store = createMemoryStore(); + bootstrap(store); + + const detailSchema = putSchema(store, { + type: "object", + properties: { info: { type: "string" } }, + }); + const stepSchema = putSchema(store, { + type: "object", + properties: { + prev: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + detail: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const detail1 = store.cas.put(detailSchema, { info: "d1" }); + const detail2 = store.cas.put(detailSchema, { info: "d2" }); + const step1 = store.cas.put(stepSchema, { + prev: null, + detail: detail1, + }); + const step2 = store.cas.put(stepSchema, { + prev: step1, + detail: detail2, + }); + + store.var.set("@test/thread/head", step2); + + gc(store); + + expect(store.cas.has(step1)).toBe(true); + expect(store.cas.has(step2)).toBe(true); + expect(store.cas.has(detail1)).toBe(true); + expect(store.cas.has(detail2)).toBe(true); + }); + + test("B.3 preserves a workflow node referenced via oneOf from a step", async () => { + const store = createMemoryStore(); + bootstrap(store); + + const workflowSchema = putSchema(store, { + type: "object", + properties: { name: { type: "string" } }, + }); + const stepSchema = putSchema(store, { + type: "object", + properties: { + workflow: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const workflowNode = store.cas.put(workflowSchema, { + name: "solve-issue", + }); + const step = store.cas.put(stepSchema, { workflow: workflowNode }); + + store.var.set("@test/thread/head", step); + store.var.set("@uwf/registry/solve-issue", workflowNode); + + gc(store); + + expect(store.cas.has(step)).toBe(true); + expect(store.cas.has(workflowNode)).toBe(true); + }); + + test("B.4 regression: existing anyOf traversal still works", async () => { + const store = createMemoryStore(); + bootstrap(store); + + const childSchema = putSchema(store, { type: "string" }); + const parentSchema = putSchema(store, { + type: "object", + properties: { + child: { + anyOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const child = store.cas.put(childSchema, "child-value"); + const parent = store.cas.put(parentSchema, { child }); + + store.var.set("@test/parent", parent); + + gc(store); + + expect(store.cas.has(parent)).toBe(true); + expect(store.cas.has(child)).toBe(true); + }); + + test("B.5 reports correct stats with oneOf chains", async () => { + const store = createMemoryStore(); + bootstrap(store); + + const stepSchema = putSchema(store, { + type: "object", + properties: { + payload: { type: "string" }, + prev: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const step1 = store.cas.put(stepSchema, { payload: "a", prev: null }); + const step2 = store.cas.put(stepSchema, { payload: "b", prev: step1 }); + const step3 = store.cas.put(stepSchema, { payload: "c", prev: step2 }); + store.cas.put(stepSchema, { payload: "orphan", prev: null }); + + store.var.set("@test/thread/head", step3); + + const stats = gc(store); + + expect(stats.collected).toBe(1); + expect(stats.reachable).toBeGreaterThanOrEqual(4); + expect(stats.scanned).toBeGreaterThanOrEqual(1); + }); +}); + +// ────────────────────────────────────────────────────────────────────────────── +// Suite C: gc preserves template content for reachable schemas (issue #93) +// ────────────────────────────────────────────────────────────────────────────── +describe("GC - template content preservation (#93)", () => { + test("C.1 preserves @ocas/template/text/ content when schema is reachable", async () => { + const store = createMemoryStore(); + const aliases = bootstrap(store); + const stringHash = aliases["@ocas/string"] as string; + + const schemaA = putSchema(store, { + type: "object", + properties: { x: { type: "number" } }, + }); + const nodeA = store.cas.put(schemaA, { x: 42 }); + store.var.set("@test/a", nodeA); + + const tplA = store.cas.put(stringHash, "rendered: {{x}}"); + store.var.set(`@ocas/template/text/${schemaA}`, tplA); + + gc(store); + + expect(store.cas.has(tplA)).toBe(true); + + const tplVar = store.var.get(`@ocas/template/text/${schemaA}`); + expect(tplVar).not.toBeNull(); + expect(tplVar?.value).toBe(tplA); + }); + + test("C.2 removes orphan template content for an unreachable schema", async () => { + const store = createMemoryStore(); + const aliases = bootstrap(store); + const stringHash = aliases["@ocas/string"] as string; + + const schemaA = putSchema(store, { + type: "object", + properties: { x: { type: "number" } }, + }); + // Note: do NOT bind any variable to a node typed by schemaA — schemaA is + // unreachable as a typeHash of any reachable node. + + const otherSchema = putSchema(store, { type: "string" }); + const otherNode = store.cas.put(otherSchema, "other"); + store.var.set("@test/other", otherNode); + + const tplA = store.cas.put(stringHash, "rendered: {{x}}"); + store.var.set(`@ocas/template/text/${schemaA}`, tplA); + + gc(store); + + expect(store.cas.has(tplA)).toBe(false); + }); +}); diff --git a/packages/core/src/gc.ts b/packages/core/src/gc.ts index 18e6abe..0d0885c 100644 --- a/packages/core/src/gc.ts +++ b/packages/core/src/gc.ts @@ -8,10 +8,17 @@ export interface GcStats { scanned: number; // Variables scanned as roots } +const TEMPLATE_VAR_PREFIX = "@ocas/template/text/"; + /** * Garbage collection: mark-and-sweep algorithm - * - Roots: all variable values (global, not scoped) + * - Roots: all variable values (global, not scoped), excluding + * `@ocas/template/text/*` variables — those are added in a follow-up + * phase only when their referenced schema is itself reachable. * - Mark: recursively walk refs from roots + * - Template phase: for every reachable schema, walk the contents of its + * `@ocas/template/text/` template variable (mirrors + * `computeClosure` Phase 3). * - Sweep: delete unmarked nodes * - Schema preservation: schemas of reachable nodes are also marked */ @@ -21,9 +28,13 @@ export function gc(store: Store): GcStats { const variables = store.var.list(); const scanned = variables.length; - // Collect unique root hashes from all variables + // Collect unique root hashes from all variables, except template + // variables (`@ocas/template/text/*`). Template variables are processed + // in a follow-up phase so their content is preserved only when the + // referenced schema is itself reachable from non-template roots. const roots = new Set(); for (const variable of variables) { + if (variable.name.startsWith(TEMPLATE_VAR_PREFIX)) continue; roots.add(variable.value); } @@ -63,6 +74,33 @@ export function gc(store: Store): GcStats { } } + // Template phase: include `@ocas/template/text/` content nodes + // when their schema is in the reachable set (mirrors closure.ts Phase 3). + // Snapshot the current reachable set before walking template content so + // that template-only nodes do not transitively pull in further templates. + const reachableSnapshot = [...reachable]; + for (const hash of reachableSnapshot) { + const templateName = `${TEMPLATE_VAR_PREFIX}${hash}`; + const variants = store.var.list({ exactName: templateName }); + for (const variant of variants) { + walk(store, variant.value, (h, n) => { + reachable.add(h); + reachable.add(n.type); + }); + // Walk the template content's schema chain too + const tNode = store.cas.get(variant.value); + if (tNode) { + let current: Hash | null = tNode.type; + while (current !== null && !reachable.has(current)) { + reachable.add(current); + const node = store.cas.get(current); + if (!node || node.type === current) break; + current = node.type; + } + } + } + } + // Preserve all self-referencing nodes (bootstrap meta-schema) // These are nodes where type === hash const allHashes = store.cas.listAll(); diff --git a/packages/core/src/schema.test.ts b/packages/core/src/schema.test.ts index 2c5ad4d..7f34b00 100644 --- a/packages/core/src/schema.test.ts +++ b/packages/core/src/schema.test.ts @@ -743,3 +743,82 @@ describe("bootstrap meta-schema self-reference", () => { expect(refList).toContain(targetHash); }); }); + +// ────────────────────────────────────────────────────────────────────────────── +// Suite A: collectRefs() oneOf traversal (issue #93) +// ────────────────────────────────────────────────────────────────────────────── +describe("collectRefs oneOf traversal", () => { + test("A.1 returns the ref hash from the chosen oneOf branch (string variant)", async () => { + const store = createMemoryStore(); + const innerSchema = putSchema(store, { type: "string" }); + const schema = putSchema(store, { + type: "object", + properties: { + prev: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const targetHash = store.cas.put(innerSchema, "target"); + const nodeHash = store.cas.put(schema, { prev: targetHash }); + const node = store.cas.get(nodeHash) as CasNode; + + expect(refs(store, node)).toContain(targetHash); + }); + + test("A.2 returns no ref when oneOf matches the null variant", async () => { + const store = createMemoryStore(); + const schema = putSchema(store, { + type: "object", + properties: { + prev: { + oneOf: [{ type: "null" }, { type: "string", format: "ocas_ref" }], + }, + }, + }); + + const nodeHash = store.cas.put(schema, { prev: null }); + const node = store.cas.get(nodeHash) as CasNode; + + expect(refs(store, node)).toEqual([]); + }); + + test("A.3 traverses nested combinators inside oneOf", async () => { + const store = createMemoryStore(); + const innerSchema = putSchema(store, { type: "string" }); + const schema = putSchema(store, { + oneOf: [ + { type: "null" }, + { + type: "object", + properties: { ref: { type: "string", format: "ocas_ref" } }, + }, + ], + }); + + const targetHash = store.cas.put(innerSchema, "target"); + const nodeHash = store.cas.put(schema, { ref: targetHash }); + const node = store.cas.get(nodeHash) as CasNode; + + expect(refs(store, node)).toContain(targetHash); + }); + + test("A.4 multiple ref branches in oneOf all surface", async () => { + const store = createMemoryStore(); + const innerSchema = putSchema(store, { type: "string" }); + const schema = putSchema(store, { + oneOf: [ + { type: "string", format: "ocas_ref" }, + { type: "string", format: "ocas_ref" }, + ], + }); + + const targetHash = store.cas.put(innerSchema, "target"); + const nodeHash = store.cas.put(schema, targetHash); + const node = store.cas.get(nodeHash) as CasNode; + + const result = refs(store, node); + expect(result).toContain(targetHash); + }); +}); diff --git a/packages/core/src/schema.ts b/packages/core/src/schema.ts index 63d69d3..ed6a468 100644 --- a/packages/core/src/schema.ts +++ b/packages/core/src/schema.ts @@ -311,6 +311,17 @@ export function collectRefs(schema: JSONSchema, value: unknown): Hash[] { return result; } + // oneOf — JSON Schema requires exactly one branch to validate, but for + // ref collection we conservatively traverse every branch (the meta-schema + // accepts oneOf alongside anyOf, and we cannot statically know which + // branch the value will match). Mirrors anyOf handling. + if (Array.isArray(schema.oneOf)) { + for (const sub of schema.oneOf as JSONSchema[]) { + result.push(...collectRefs(sub, value)); + } + return result; + } + // P2: allOf — each sub-schema applies to the same value if (Array.isArray(schema.allOf)) { for (const sub of schema.allOf as JSONSchema[]) {