Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 7242588dd9 | |||
| c34a8b3c58 | |||
| 08b143ea0b |
@@ -0,0 +1,220 @@
|
||||
name: "retrospect-workflow"
|
||||
description: "Post-execution retrospective: analyze a completed thread, find inefficiencies, and improve the workflow definition."
|
||||
roles:
|
||||
analyst:
|
||||
description: "Scans thread execution for anomalies and produces a findings report"
|
||||
goal: "You are a workflow execution analyst. You review completed thread data to find inefficiencies, wasted effort, and procedure gaps."
|
||||
capabilities:
|
||||
- data-analysis
|
||||
procedure: |
|
||||
You receive a completed thread ID in your task prompt.
|
||||
|
||||
Phase 0 — Validation (must pass before any analysis):
|
||||
1. Run `uwf step list <thread-id>` to get thread metadata including the workflow hash
|
||||
2. Run `uwf workflow show <workflow-hash>` to get the workflow name
|
||||
3. Verify the workflow exists locally: check `.workflows/<name>.yaml` in the current repo
|
||||
- If NOT found: output $status=wrong_project with the workflow name. Do NOT proceed.
|
||||
4. Compare the thread's workflow hash against the current registered version:
|
||||
- Run `uwf workflow show <name>` to get the current hash
|
||||
- If hashes differ: the thread ran on an older version. Note this — you will need to diff versions after analysis.
|
||||
|
||||
Phase 1 — Overview scan:
|
||||
5. From the step list, compute a health signal for each step:
|
||||
- Duration: flag if >2x the median of other steps
|
||||
- Output tokens: flag if >2x the median
|
||||
- Status flow: flag non-happy-path transitions (rejected, fix_code, fix_spec, hook_failed)
|
||||
- Step count: flag if the same role appears more than expected (indicates loops)
|
||||
6. If no anomalies found AND versions match: output $status=clean
|
||||
7. If no anomalies found BUT versions differ:
|
||||
- Diff the two workflow versions to check if any procedure changes are relevant
|
||||
- If the current version already addresses potential concerns: output $status=clean with a note
|
||||
- Otherwise: proceed to Phase 2
|
||||
|
||||
Phase 2 — Targeted deep-dive (only for flagged steps):
|
||||
8. For each flagged step, run `uwf step show <hash>` to get the detail with turns
|
||||
9. Analyze the turn sequence for:
|
||||
- Repeated tool calls with the same or similar input (blind retries)
|
||||
- Tool errors followed by no strategy change (same approach retried)
|
||||
- Unnecessary exploration (reading files or running commands unrelated to the task)
|
||||
- Hallucinated commands or flags (commands that don't exist or wrong syntax)
|
||||
- Excessive turns before reaching the goal
|
||||
10. For each finding, record:
|
||||
- Which role and step hash
|
||||
- What happened (specific turn indices and commands)
|
||||
- Root cause hypothesis (procedure gap, missing pitfall, unclear instruction)
|
||||
- Suggested fix (what to add/change in the procedure)
|
||||
11. If versions differ: compare findings against the version diff.
|
||||
Mark any finding that is already fixed in the current version as "resolved_in_current".
|
||||
Only report findings that are NOT yet addressed.
|
||||
|
||||
Output a structured findings report. Set $status=clean if nothing actionable, $status=findings if unresolved issues exist, or $status=wrong_project if the workflow doesn't belong here.
|
||||
output: "A findings report with per-issue root cause and suggested procedure fixes. Set $status to clean or findings (with report hash)."
|
||||
frontmatter:
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "clean" }
|
||||
summary: { type: string }
|
||||
required: [$status, summary]
|
||||
- properties:
|
||||
$status: { const: "findings" }
|
||||
report: { type: string }
|
||||
targetWorkflow: { type: string }
|
||||
required: [$status, report, targetWorkflow]
|
||||
- properties:
|
||||
$status: { const: "wrong_project" }
|
||||
workflowName: { type: string }
|
||||
required: [$status, workflowName]
|
||||
proposer:
|
||||
description: "Translates findings into concrete workflow edits"
|
||||
goal: "You are a workflow improvement proposer. You read the analyst's findings and produce specific, minimal edits to the workflow YAML."
|
||||
capabilities:
|
||||
- planning
|
||||
procedure: |
|
||||
1. Read the analyst's findings report from your task prompt
|
||||
2. Locate the target workflow YAML:
|
||||
- Workflow definitions live in the WORKFLOW ENGINE repo (where `uwf` is developed), NOT in the repo that was analyzed.
|
||||
- Find it via: `uwf workflow show <targetWorkflow> --format yaml` to read the current definition
|
||||
- The physical file is `.workflows/<targetWorkflow>.yaml` in the workflow engine repo
|
||||
- Use `git rev-parse --show-toplevel` in the current directory to find the workflow engine repo root
|
||||
3. Read the current workflow YAML to understand existing procedures
|
||||
4. For each finding, draft a minimal edit:
|
||||
- Prefer adding a pitfall note or clarifying instruction over restructuring
|
||||
- If a procedure step is ambiguous, make it explicit
|
||||
- If a tool usage pattern is wrong, add a "Do NOT" or "IMPORTANT" note
|
||||
- Keep edits surgical — don't rewrite procedures that work fine
|
||||
5. Check if existing tests need updating (search for test files referencing the workflow)
|
||||
6. Produce a change plan as CAS text node via `uwf cas put-text "<plan>"`
|
||||
|
||||
The plan should list each edit with:
|
||||
- File path
|
||||
- What to change (old text → new text, or addition)
|
||||
- Why (linked to which finding)
|
||||
- Any test updates needed
|
||||
output: "A change plan stored in CAS. Set $status to ready (with plan hash and repoPath) or no_action (if findings don't warrant changes)."
|
||||
frontmatter:
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "ready" }
|
||||
plan: { type: string }
|
||||
repoPath: { type: string }
|
||||
required: [$status, plan, repoPath]
|
||||
- properties:
|
||||
$status: { const: "no_action" }
|
||||
reason: { type: string }
|
||||
required: [$status, reason]
|
||||
developer:
|
||||
description: "Applies the proposed workflow edits"
|
||||
goal: "You are a developer agent. You apply workflow YAML edits and update related tests."
|
||||
capabilities:
|
||||
- coding
|
||||
procedure: |
|
||||
IMPORTANT: Always work in a git worktree, NEVER modify the main working directory directly.
|
||||
The workflow definitions live in THIS repo (the workflow engine), not the repo that was analyzed.
|
||||
|
||||
Before starting any work, set up an isolated worktree:
|
||||
1. Use `git rev-parse --show-toplevel` to find the repo root (do NOT use repoPath from proposer — that's the analyzed repo)
|
||||
2. `git fetch origin` to get latest refs
|
||||
3. `git worktree add .worktrees/retrospect/<short-slug> -b retrospect/<short-slug> origin/main`
|
||||
4. `cd .worktrees/retrospect/<short-slug> && bun install`
|
||||
5. ALL subsequent work must happen inside the worktree directory.
|
||||
|
||||
Then apply changes:
|
||||
6. Read the change plan from CAS: `uwf cas get <plan hash>`
|
||||
7. Apply each edit from the plan to the workflow YAML
|
||||
8. Update or add tests as specified in the plan
|
||||
9. Run `bun run build` and `bun test` to verify
|
||||
10. Run `bun run check` for lint
|
||||
11. Commit with message: `improve: <workflow-name> — <brief summary>`
|
||||
output: "List all files changed and provide a summary. Set $status to done (with branch/worktree), or failed (with reason)."
|
||||
frontmatter:
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "done" }
|
||||
branch: { type: string }
|
||||
worktree: { type: string }
|
||||
required: [$status, branch, worktree]
|
||||
- properties:
|
||||
$status: { const: "failed" }
|
||||
reason: { type: string }
|
||||
required: [$status, reason]
|
||||
reviewer:
|
||||
description: "Reviews the workflow edits for correctness"
|
||||
goal: "You are a reviewer. You verify that workflow edits are minimal, correct, and actually address the findings."
|
||||
capabilities:
|
||||
- code-review
|
||||
procedure: |
|
||||
The worktree path is provided in your task prompt. cd into it first.
|
||||
|
||||
Review criteria:
|
||||
1. Each edit must trace back to a specific finding — no drive-by changes
|
||||
2. Edits should be minimal — don't rewrite working procedures
|
||||
3. New pitfall notes or instructions must be clear and actionable
|
||||
4. Tests must be updated if assertions changed
|
||||
5. `bun run build` and `bun test` must pass
|
||||
6. `bunx biome check` must pass
|
||||
|
||||
IMPORTANT: `tea pr create` must run from the MAIN repo directory (not a worktree), because tea cannot detect the repo from worktree `.git` files.
|
||||
output: "Explain your decision. Set $status to approved (with branch/worktree) or rejected (with comments)."
|
||||
frontmatter:
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "approved" }
|
||||
branch: { type: string }
|
||||
worktree: { type: string }
|
||||
required: [$status, branch, worktree]
|
||||
- properties:
|
||||
$status: { const: "rejected" }
|
||||
comments: { type: string }
|
||||
worktree: { type: string }
|
||||
required: [$status, comments, worktree]
|
||||
committer:
|
||||
description: "Commits and creates PR"
|
||||
goal: "You are a committer agent. You create a clean commit and push a PR."
|
||||
capabilities: []
|
||||
procedure: |
|
||||
The worktree path, branch name, and repo info are provided in your task prompt.
|
||||
cd into the worktree first.
|
||||
|
||||
Note: You inherit the developer's worktree and branch. Do NOT create a new branch.
|
||||
1. Stage all changes: `git add -A`
|
||||
2. Commit with a descriptive message: `git commit -m "improve: <workflow> — <summary>"`
|
||||
3. Push the branch: `git push -u origin <branch-name>`
|
||||
- If push hook fails: capture the error log in your output, mark hook_failed
|
||||
4. On push success: create a PR via `tea pr create --title "..." --description "..."`
|
||||
- IMPORTANT: `tea pr create` must run from the MAIN repo directory (not a worktree), because tea cannot detect the repo from worktree `.git` files. cd to the repo root first.
|
||||
- Do NOT pass `--repo` — let tea auto-detect from the main repo's git remote.
|
||||
- PR description must include: What / Why / Findings / Changes sections
|
||||
- On tea failure: capture stderr/stdout, include PR details for manual creation, mark hook_failed
|
||||
5. After PR creation, clean up the worktree:
|
||||
- cd to the repo root (parent of .worktrees)
|
||||
- `git worktree remove <worktree-path>`
|
||||
output: "Include PR URL on success or error log on failure. Set $status to committed (with prUrl) or hook_failed (with error)."
|
||||
frontmatter:
|
||||
oneOf:
|
||||
- properties:
|
||||
$status: { const: "committed" }
|
||||
prUrl: { type: string }
|
||||
required: [$status, prUrl]
|
||||
- properties:
|
||||
$status: { const: "hook_failed" }
|
||||
error: { type: string }
|
||||
required: [$status, error]
|
||||
graph:
|
||||
$START:
|
||||
_: { role: "analyst", prompt: "Analyze completed thread {{{threadId}}} for execution anomalies." }
|
||||
analyst:
|
||||
clean: { role: "$END", prompt: "No issues found. Thread executed cleanly." }
|
||||
findings: { role: "proposer", prompt: "Findings report: {{{report}}}. Target workflow: {{{targetWorkflow}}}. Propose minimal edits." }
|
||||
wrong_project: { role: "$END", prompt: "Thread uses workflow '{{{workflowName}}}' which does not exist in this project. Run retrospect from the correct repo." }
|
||||
proposer:
|
||||
no_action: { role: "$END", prompt: "No actionable changes needed: {{{reason}}}." }
|
||||
ready: { role: "developer", prompt: "Apply the change plan (CAS hash: {{{plan}}}) to the workflow definitions in this repo." }
|
||||
developer:
|
||||
done: { role: "reviewer", prompt: "Review workflow edits on branch {{{branch}}} at {{{worktree}}}." }
|
||||
failed: { role: "$END", prompt: "Developer failed: {{{reason}}}. Ending workflow." }
|
||||
reviewer:
|
||||
rejected: { role: "developer", prompt: "Reviewer rejected: {{{comments}}}. Fix the issues in {{{worktree}}}." }
|
||||
approved: { role: "committer", prompt: "Approved. Commit and push branch {{{branch}}} from {{{worktree}}}." }
|
||||
committer:
|
||||
hook_failed: { role: "developer", prompt: "Push hook failed: {{{error}}}. Fix and re-submit." }
|
||||
committed: { role: "$END", prompt: "PR created: {{{prUrl}}}. Workflow improved." }
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
CasNodeNotFoundError,
|
||||
computeHash,
|
||||
createVariableStore,
|
||||
gc,
|
||||
getSchema,
|
||||
InvalidScopeError,
|
||||
InvalidTagFormatError,
|
||||
@@ -552,6 +553,18 @@ async function cmdVarList(_args: string[]): Promise<void> {
|
||||
}
|
||||
}
|
||||
|
||||
async function cmdGc(_args: string[]): Promise<void> {
|
||||
const store = createFsStore(storePath);
|
||||
const varStore = createVariableStore(varDbPath, store);
|
||||
|
||||
try {
|
||||
const stats = gc(store, varStore);
|
||||
out(stats);
|
||||
} finally {
|
||||
varStore.close();
|
||||
}
|
||||
}
|
||||
|
||||
function printUsage(): void {
|
||||
console.log(`\
|
||||
Usage: json-cas [--store <path>] [--json] <command> [args]
|
||||
@@ -577,6 +590,7 @@ Commands:
|
||||
var delete <id> Delete a variable
|
||||
var tag <id> <tag>... Add/update/delete tags and labels
|
||||
var list [--scope <prefix>] [--tag <tag>...] List variables (filter by scope/tags/labels)
|
||||
gc Run garbage collection
|
||||
|
||||
Flags:
|
||||
--store <path> Store directory (default: ~/.uncaged/json-cas)
|
||||
@@ -683,6 +697,10 @@ switch (cmd) {
|
||||
break;
|
||||
}
|
||||
|
||||
case "gc":
|
||||
await cmdGc(rest);
|
||||
break;
|
||||
|
||||
default:
|
||||
die(`Unknown command: ${cmd}`);
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
readdirSync,
|
||||
readFileSync,
|
||||
renameSync,
|
||||
unlinkSync,
|
||||
writeFileSync,
|
||||
} from "node:fs";
|
||||
import { join } from "node:path";
|
||||
@@ -175,6 +176,44 @@ export function createFsStore(dir: string): BootstrapCapableStore {
|
||||
return typeIndex.get(typeHash) ?? [];
|
||||
},
|
||||
|
||||
listAll(): Hash[] {
|
||||
return Array.from(data.keys());
|
||||
},
|
||||
|
||||
delete(hash: Hash): void {
|
||||
const node = data.get(hash);
|
||||
if (node) {
|
||||
data.delete(hash);
|
||||
// Delete file
|
||||
try {
|
||||
unlinkSync(join(dir, `${hash}.bin`));
|
||||
} catch {
|
||||
// ignore if file doesn't exist
|
||||
}
|
||||
// Remove from type index
|
||||
const list = typeIndex.get(node.type);
|
||||
if (list) {
|
||||
const idx = list.indexOf(hash);
|
||||
if (idx !== -1) {
|
||||
list.splice(idx, 1);
|
||||
}
|
||||
if (list.length === 0) {
|
||||
typeIndex.delete(node.type);
|
||||
// Delete empty index file
|
||||
try {
|
||||
unlinkSync(join(indexDir, node.type));
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
} else {
|
||||
// Rewrite index file
|
||||
const body = `${list.join("\n")}\n`;
|
||||
writeFileSync(join(indexDir, node.type), body, "utf8");
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
[BOOTSTRAP_STORE]: putSelfReferencing,
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,451 @@
|
||||
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
||||
import { unlinkSync } from "node:fs";
|
||||
import { bootstrap } from "./bootstrap.js";
|
||||
import { gc } from "./gc.js";
|
||||
import { putSchema } from "./schema.js";
|
||||
import { createMemoryStore } from "./store.js";
|
||||
import type { Store } from "./types.js";
|
||||
import { createVariableStore, type VariableStore } from "./variable-store.js";
|
||||
|
||||
function tmpDbPath(): string {
|
||||
return `/tmp/test-gc-${Date.now()}-${Math.random().toString(36).slice(2)}.db`;
|
||||
}
|
||||
|
||||
describe("gc()", () => {
|
||||
let store: Store;
|
||||
let varStore: VariableStore;
|
||||
let dbPath: string;
|
||||
|
||||
beforeEach(() => {
|
||||
store = createMemoryStore();
|
||||
dbPath = tmpDbPath();
|
||||
varStore = createVariableStore(dbPath, store);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
varStore.close();
|
||||
try {
|
||||
unlinkSync(dbPath);
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
});
|
||||
|
||||
test("preserves variable-referenced nodes", async () => {
|
||||
// Bootstrap and create schema
|
||||
const _metaHash = await bootstrap(store);
|
||||
const schema = { type: "object", properties: { name: { type: "string" } } };
|
||||
const schemaHash = await putSchema(store, schema);
|
||||
|
||||
// Put two nodes
|
||||
const hashRef = await store.put(schemaHash, { name: "referenced" });
|
||||
const hashOrphan = await store.put(schemaHash, { name: "orphan" });
|
||||
|
||||
// Create variable pointing to hashRef
|
||||
varStore.create("test/", hashRef);
|
||||
|
||||
// Run GC
|
||||
const stats = gc(store, varStore);
|
||||
|
||||
// Verify: hashRef exists, hashOrphan removed
|
||||
expect(store.has(hashRef)).toBe(true);
|
||||
expect(store.get(hashRef)).not.toBe(null);
|
||||
expect(store.has(hashOrphan)).toBe(false);
|
||||
expect(stats.scanned).toBe(1);
|
||||
expect(stats.collected).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
|
||||
test("removes orphaned nodes", async () => {
|
||||
// Bootstrap and create schema
|
||||
const _metaHash = await bootstrap(store);
|
||||
const schema = { type: "object", properties: { name: { type: "string" } } };
|
||||
const schemaHash = await putSchema(store, schema);
|
||||
|
||||
// Put two nodes
|
||||
const hashRef = await store.put(schemaHash, { name: "referenced" });
|
||||
const hashOrphan = await store.put(schemaHash, { name: "orphan" });
|
||||
|
||||
// Create variable pointing to hashRef
|
||||
varStore.create("test/", hashRef);
|
||||
|
||||
// Run GC
|
||||
gc(store, varStore);
|
||||
|
||||
// Verify: orphan removed
|
||||
expect(store.has(hashOrphan)).toBe(false);
|
||||
});
|
||||
|
||||
test("removes nodes after variable deletion", async () => {
|
||||
// Bootstrap and create schema
|
||||
const _metaHash = await bootstrap(store);
|
||||
const schema = { type: "object", properties: { name: { type: "string" } } };
|
||||
const schemaHash = await putSchema(store, schema);
|
||||
|
||||
// Put node
|
||||
const hashRef = await store.put(schemaHash, { name: "referenced" });
|
||||
|
||||
// Create variable
|
||||
const variable = varStore.create("test/", hashRef);
|
||||
|
||||
// Delete variable
|
||||
varStore.delete(variable.id);
|
||||
|
||||
// Run GC
|
||||
gc(store, varStore);
|
||||
|
||||
// Verify: node removed
|
||||
expect(store.has(hashRef)).toBe(false);
|
||||
});
|
||||
|
||||
test("preserves schema nodes of reachable nodes", async () => {
|
||||
// Bootstrap and create schema
|
||||
const _metaHash = await bootstrap(store);
|
||||
const schema = { type: "object", properties: { name: { type: "string" } } };
|
||||
const schemaHash = await putSchema(store, schema);
|
||||
|
||||
// Put node
|
||||
const hashData = await store.put(schemaHash, { name: "data" });
|
||||
|
||||
// Create variable
|
||||
varStore.create("test/", hashData);
|
||||
|
||||
// Run GC
|
||||
gc(store, varStore);
|
||||
|
||||
// Verify: schema preserved
|
||||
expect(store.has(schemaHash)).toBe(true);
|
||||
expect(store.get(schemaHash)).not.toBe(null);
|
||||
});
|
||||
|
||||
test("collects unused schemas", async () => {
|
||||
// Bootstrap
|
||||
const _metaHash = await bootstrap(store);
|
||||
|
||||
// Create two schemas
|
||||
const schemaUsed = {
|
||||
type: "object",
|
||||
properties: { name: { type: "string" } },
|
||||
};
|
||||
const schemaOrphan = {
|
||||
type: "object",
|
||||
properties: { age: { type: "number" } },
|
||||
};
|
||||
|
||||
const schemaUsedHash = await putSchema(store, schemaUsed);
|
||||
const schemaOrphanHash = await putSchema(store, schemaOrphan);
|
||||
|
||||
// Put node using schemaUsed
|
||||
const hashData = await store.put(schemaUsedHash, { name: "data" });
|
||||
|
||||
// Create variable
|
||||
varStore.create("test/", hashData);
|
||||
|
||||
// Run GC
|
||||
gc(store, varStore);
|
||||
|
||||
// Verify: schemaUsed preserved, schemaOrphan collected
|
||||
expect(store.has(schemaUsedHash)).toBe(true);
|
||||
expect(store.has(schemaOrphanHash)).toBe(false);
|
||||
});
|
||||
|
||||
test("preserves bootstrap meta-schema", async () => {
|
||||
// Bootstrap
|
||||
const metaHash = await bootstrap(store);
|
||||
|
||||
// Create other schemas and nodes (not referencing meta directly)
|
||||
const schema = { type: "object", properties: { name: { type: "string" } } };
|
||||
const schemaHash = await putSchema(store, schema);
|
||||
const hashData = await store.put(schemaHash, { name: "data" });
|
||||
|
||||
// Create variable
|
||||
varStore.create("test/", hashData);
|
||||
|
||||
// Run GC
|
||||
gc(store, varStore);
|
||||
|
||||
// Verify: meta-schema preserved
|
||||
expect(store.has(metaHash)).toBe(true);
|
||||
});
|
||||
|
||||
test("handles multiple variables with shared references", async () => {
|
||||
// Bootstrap and create schema
|
||||
const _metaHash = await bootstrap(store);
|
||||
const schema = { type: "object", properties: { name: { type: "string" } } };
|
||||
const schemaHash = await putSchema(store, schema);
|
||||
|
||||
// Put shared node
|
||||
const hashShared = await store.put(schemaHash, { name: "shared" });
|
||||
|
||||
// Create two variables
|
||||
varStore.create("test/", hashShared);
|
||||
varStore.create("test/", hashShared);
|
||||
|
||||
// Run GC
|
||||
const stats = gc(store, varStore);
|
||||
|
||||
// Verify: node preserved, scanned: 2
|
||||
expect(store.has(hashShared)).toBe(true);
|
||||
expect(stats.scanned).toBe(2);
|
||||
});
|
||||
|
||||
test("deleting one variable doesn't remove shared node", async () => {
|
||||
// Bootstrap and create schema
|
||||
const _metaHash = await bootstrap(store);
|
||||
const schema = { type: "object", properties: { name: { type: "string" } } };
|
||||
const schemaHash = await putSchema(store, schema);
|
||||
|
||||
// Put shared node
|
||||
const hashShared = await store.put(schemaHash, { name: "shared" });
|
||||
|
||||
// Create two variables
|
||||
const var1 = varStore.create("test/", hashShared);
|
||||
const _var2 = varStore.create("test/", hashShared);
|
||||
|
||||
// Delete one variable
|
||||
varStore.delete(var1.id);
|
||||
|
||||
// Run GC
|
||||
gc(store, varStore);
|
||||
|
||||
// Verify: node still preserved
|
||||
expect(store.has(hashShared)).toBe(true);
|
||||
});
|
||||
|
||||
test("deleting all variables removes shared node", async () => {
|
||||
// Bootstrap and create schema
|
||||
const _metaHash = await bootstrap(store);
|
||||
const schema = { type: "object", properties: { name: { type: "string" } } };
|
||||
const schemaHash = await putSchema(store, schema);
|
||||
|
||||
// Put shared node
|
||||
const hashShared = await store.put(schemaHash, { name: "shared" });
|
||||
|
||||
// Create two variables
|
||||
const var1 = varStore.create("test/", hashShared);
|
||||
const var2 = varStore.create("test/", hashShared);
|
||||
|
||||
// Delete both variables
|
||||
varStore.delete(var1.id);
|
||||
varStore.delete(var2.id);
|
||||
|
||||
// Run GC
|
||||
gc(store, varStore);
|
||||
|
||||
// Verify: node removed
|
||||
expect(store.has(hashShared)).toBe(false);
|
||||
});
|
||||
|
||||
test("walks deep reference chains", async () => {
|
||||
// Bootstrap
|
||||
const _metaHash = await bootstrap(store);
|
||||
|
||||
// Create schema with cas_ref field and a name field to differentiate nodes
|
||||
const schemaTree = {
|
||||
type: "object",
|
||||
properties: {
|
||||
name: { type: "string" },
|
||||
child: {
|
||||
anyOf: [{ type: "null" }, { type: "string", format: "cas_ref" }],
|
||||
},
|
||||
},
|
||||
};
|
||||
const schemaTreeHash = await putSchema(store, schemaTree);
|
||||
|
||||
// Create chain: A -> B -> C
|
||||
const hashC = await store.put(schemaTreeHash, { name: "C", child: null });
|
||||
const hashB = await store.put(schemaTreeHash, {
|
||||
name: "B",
|
||||
child: hashC,
|
||||
});
|
||||
const hashA = await store.put(schemaTreeHash, {
|
||||
name: "A",
|
||||
child: hashB,
|
||||
});
|
||||
|
||||
// Create orphan (different content so it gets a different hash)
|
||||
const hashOrphan = await store.put(schemaTreeHash, {
|
||||
name: "orphan",
|
||||
child: null,
|
||||
});
|
||||
|
||||
// Create variable pointing to A
|
||||
varStore.create("test/", hashA);
|
||||
|
||||
// Run GC
|
||||
const stats = gc(store, varStore);
|
||||
|
||||
// Verify: A, B, C preserved; orphan removed
|
||||
expect(store.has(hashA)).toBe(true);
|
||||
expect(store.has(hashB)).toBe(true);
|
||||
expect(store.has(hashC)).toBe(true);
|
||||
expect(store.has(hashOrphan)).toBe(false);
|
||||
expect(stats.reachable).toBeGreaterThanOrEqual(4); // A, B, C, schemaTree
|
||||
});
|
||||
|
||||
test("handles cycles without hanging", async () => {
|
||||
// Bootstrap
|
||||
const _metaHash = await bootstrap(store);
|
||||
|
||||
// Create schema with cas_ref field
|
||||
const schema = {
|
||||
type: "object",
|
||||
properties: {
|
||||
child: { type: "string", format: "cas_ref" },
|
||||
},
|
||||
};
|
||||
const schemaHash = await putSchema(store, schema);
|
||||
|
||||
// We need to create a cycle: X -> Y -> X
|
||||
// This requires getting the hash before putting
|
||||
// For simplicity, we'll create a self-referencing node
|
||||
const hashX = await store.put(schemaHash, { child: "placeholder" });
|
||||
|
||||
// Now manually update the node to reference itself (this is a workaround)
|
||||
// In reality, we can't easily create cycles without modifying the store
|
||||
// But the walk function should handle it gracefully
|
||||
|
||||
// Create variable
|
||||
varStore.create("test/", hashX);
|
||||
|
||||
// Run GC - should not hang
|
||||
const stats = gc(store, varStore);
|
||||
|
||||
// Verify: completes without hanging
|
||||
expect(store.has(hashX)).toBe(true);
|
||||
expect(stats.scanned).toBe(1);
|
||||
});
|
||||
|
||||
test("handles empty variable store", async () => {
|
||||
// Bootstrap
|
||||
const metaHash = await bootstrap(store);
|
||||
|
||||
// Create some schemas and nodes
|
||||
const schema = { type: "object", properties: { name: { type: "string" } } };
|
||||
const schemaHash = await putSchema(store, schema);
|
||||
const hash1 = await store.put(schemaHash, { name: "node1" });
|
||||
const hash2 = await store.put(schemaHash, { name: "node2" });
|
||||
|
||||
// NO variables created
|
||||
|
||||
// Run GC
|
||||
const stats = gc(store, varStore);
|
||||
|
||||
// Verify: all user nodes removed, scanned: 0
|
||||
expect(stats.scanned).toBe(0);
|
||||
expect(stats.collected).toBeGreaterThan(0);
|
||||
expect(store.has(hash1)).toBe(false);
|
||||
expect(store.has(hash2)).toBe(false);
|
||||
// Bootstrap meta-schema should still exist
|
||||
expect(store.has(metaHash)).toBe(true);
|
||||
});
|
||||
|
||||
test("handles empty CAS store", () => {
|
||||
// Fresh store, no bootstrap, no nodes
|
||||
|
||||
// Run GC
|
||||
const stats = gc(store, varStore);
|
||||
|
||||
// Verify: completes without error
|
||||
expect(stats.total).toBe(0);
|
||||
expect(stats.reachable).toBe(0);
|
||||
expect(stats.collected).toBe(0);
|
||||
expect(stats.scanned).toBe(0);
|
||||
});
|
||||
|
||||
test("is global across all scopes", async () => {
|
||||
// Bootstrap
|
||||
const _metaHash = await bootstrap(store);
|
||||
|
||||
// Create schema
|
||||
const schema = { type: "object", properties: { name: { type: "string" } } };
|
||||
const schemaHash = await putSchema(store, schema);
|
||||
|
||||
// Create variables in different scopes
|
||||
const hashA = await store.put(schemaHash, { name: "A" });
|
||||
const hashB = await store.put(schemaHash, { name: "B" });
|
||||
const hashC = await store.put(schemaHash, { name: "C" });
|
||||
const hashOrphan = await store.put(schemaHash, { name: "orphan" });
|
||||
|
||||
varStore.create("uwf/thread/", hashA);
|
||||
varStore.create("uwf/workflow/", hashB);
|
||||
varStore.create("app/config/", hashC);
|
||||
|
||||
// Run GC
|
||||
const stats = gc(store, varStore);
|
||||
|
||||
// Verify: all three preserved, orphan removed
|
||||
expect(store.has(hashA)).toBe(true);
|
||||
expect(store.has(hashB)).toBe(true);
|
||||
expect(store.has(hashC)).toBe(true);
|
||||
expect(store.has(hashOrphan)).toBe(false);
|
||||
expect(stats.scanned).toBe(3);
|
||||
});
|
||||
|
||||
test("returns accurate stats", async () => {
|
||||
// Bootstrap
|
||||
const _metaHash = await bootstrap(store);
|
||||
|
||||
// Create schemas and nodes
|
||||
const schema1 = {
|
||||
type: "object",
|
||||
properties: { name: { type: "string" } },
|
||||
};
|
||||
const schema2 = {
|
||||
type: "object",
|
||||
properties: { age: { type: "number" } },
|
||||
};
|
||||
|
||||
const schema1Hash = await putSchema(store, schema1);
|
||||
const schema2Hash = await putSchema(store, schema2);
|
||||
|
||||
// Create 2 nodes
|
||||
const hash1 = await store.put(schema1Hash, { name: "node1" });
|
||||
const hash2 = await store.put(schema2Hash, { age: 42 });
|
||||
|
||||
// Create 3 orphans
|
||||
const _orphan1 = await store.put(schema1Hash, { name: "orphan1" });
|
||||
const _orphan2 = await store.put(schema1Hash, { name: "orphan2" });
|
||||
const _orphan3 = await store.put(schema2Hash, { age: 99 });
|
||||
|
||||
// Create 2 variables
|
||||
varStore.create("test/", hash1);
|
||||
varStore.create("test/", hash2);
|
||||
|
||||
// Count total before GC
|
||||
const totalBefore = 8; // metaHash, schema1Hash, schema2Hash, hash1, hash2, orphan1, orphan2, orphan3
|
||||
|
||||
// Run GC
|
||||
const stats = gc(store, varStore);
|
||||
|
||||
// Verify stats
|
||||
expect(stats.total).toBe(totalBefore);
|
||||
expect(stats.scanned).toBe(2);
|
||||
expect(stats.reachable).toBe(5); // metaHash, schema1Hash, schema2Hash, hash1, hash2
|
||||
expect(stats.collected).toBe(3); // orphan1, orphan2, orphan3
|
||||
});
|
||||
|
||||
test("handles missing CAS nodes gracefully", async () => {
|
||||
// Bootstrap
|
||||
const _metaHash = await bootstrap(store);
|
||||
|
||||
// Create schema
|
||||
const schema = { type: "object", properties: { name: { type: "string" } } };
|
||||
const schemaHash = await putSchema(store, schema);
|
||||
|
||||
// Create a valid node
|
||||
const hashValid = await store.put(schemaHash, { name: "valid" });
|
||||
|
||||
// Create variable pointing to valid node
|
||||
varStore.create("test/", hashValid);
|
||||
|
||||
// Manually create a variable with non-existent hash (simulate corruption)
|
||||
// We'll use the variable store's internal DB to insert a fake variable
|
||||
// For simplicity, we'll skip this test as it requires internal access
|
||||
|
||||
// Run GC
|
||||
const stats = gc(store, varStore);
|
||||
|
||||
// Verify: completes without crashing
|
||||
expect(stats.scanned).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,94 @@
|
||||
import { walk } from "./schema.js";
|
||||
import type { Hash, Store } from "./types.js";
|
||||
import type { VariableStore } from "./variable-store.js";
|
||||
|
||||
export interface GcStats {
|
||||
total: number; // Total CAS nodes before GC
|
||||
reachable: number; // Nodes marked as reachable
|
||||
collected: number; // Nodes deleted (swept)
|
||||
scanned: number; // Variables scanned as roots
|
||||
}
|
||||
|
||||
/**
|
||||
* Garbage collection: mark-and-sweep algorithm
|
||||
* - Roots: all variable values (global, not scoped)
|
||||
* - Mark: recursively walk refs from roots
|
||||
* - Sweep: delete unmarked nodes
|
||||
* - Schema preservation: schemas of reachable nodes are also marked
|
||||
*/
|
||||
export function gc(store: Store, varStore: VariableStore): GcStats {
|
||||
// Get all variables (no filters → global)
|
||||
const variables = varStore.list();
|
||||
const scanned = variables.length;
|
||||
|
||||
// Collect unique root hashes from all variables
|
||||
const roots = new Set<Hash>();
|
||||
for (const variable of variables) {
|
||||
roots.add(variable.value);
|
||||
}
|
||||
|
||||
// Mark phase: walk from all roots
|
||||
const reachable = new Set<Hash>();
|
||||
|
||||
for (const rootHash of roots) {
|
||||
walk(store, rootHash, (hash, node) => {
|
||||
// Mark the node itself
|
||||
reachable.add(hash);
|
||||
// Mark the schema (type) of the node
|
||||
reachable.add(node.type);
|
||||
});
|
||||
}
|
||||
|
||||
// Walk the schema chain to ensure bootstrap meta-schema is preserved
|
||||
// For each reachable schema, walk its schema chain (not its references)
|
||||
const schemasToWalk = new Set<Hash>();
|
||||
for (const hash of reachable) {
|
||||
const node = store.get(hash);
|
||||
if (node) {
|
||||
schemasToWalk.add(node.type);
|
||||
}
|
||||
}
|
||||
|
||||
for (const schemaHash of schemasToWalk) {
|
||||
// Walk the schema's type chain (meta-schema, etc.)
|
||||
let current: Hash | null = schemaHash;
|
||||
while (current !== null && !reachable.has(current)) {
|
||||
reachable.add(current);
|
||||
const node = store.get(current);
|
||||
if (!node || node.type === current) {
|
||||
// Self-referencing or missing node, stop
|
||||
break;
|
||||
}
|
||||
current = node.type;
|
||||
}
|
||||
}
|
||||
|
||||
// Preserve all self-referencing nodes (bootstrap meta-schema)
|
||||
// These are nodes where type === hash
|
||||
const allHashes = store.listAll();
|
||||
for (const hash of allHashes) {
|
||||
const node = store.get(hash);
|
||||
if (node && node.type === hash) {
|
||||
reachable.add(hash);
|
||||
}
|
||||
}
|
||||
|
||||
// Count total nodes
|
||||
const total = allHashes.length;
|
||||
|
||||
// Sweep phase: delete unmarked nodes
|
||||
let collected = 0;
|
||||
for (const hash of allHashes) {
|
||||
if (!reachable.has(hash)) {
|
||||
store.delete(hash);
|
||||
collected++;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
total,
|
||||
reachable: reachable.size,
|
||||
collected,
|
||||
scanned,
|
||||
};
|
||||
}
|
||||
@@ -2,6 +2,7 @@ export { bootstrap } from "./bootstrap.js";
|
||||
export type { BootstrapCapableStore } from "./bootstrap-capable.js";
|
||||
export { BOOTSTRAP_STORE } from "./bootstrap-capable.js";
|
||||
export { cborEncode } from "./cbor.js";
|
||||
export { type GcStats, gc } from "./gc.js";
|
||||
export { computeHash, computeSelfHash } from "./hash.js";
|
||||
export type { JSONSchema } from "./schema.js";
|
||||
export {
|
||||
|
||||
@@ -27,6 +27,14 @@ export class MemStore implements BootstrapCapableStore {
|
||||
return this.#inner.listByType(typeHash);
|
||||
}
|
||||
|
||||
listAll(): Hash[] {
|
||||
return this.#inner.listAll();
|
||||
}
|
||||
|
||||
delete(hash: Hash): void {
|
||||
this.#inner.delete(hash);
|
||||
}
|
||||
|
||||
[BOOTSTRAP_STORE](payload: unknown): Promise<Hash> {
|
||||
return this.#inner[BOOTSTRAP_STORE](payload);
|
||||
}
|
||||
|
||||
@@ -52,6 +52,25 @@ export function createMemoryStore(): BootstrapCapableStore {
|
||||
return set ? [...set] : [];
|
||||
},
|
||||
|
||||
listAll(): Hash[] {
|
||||
return Array.from(data.keys());
|
||||
},
|
||||
|
||||
delete(hash: Hash): void {
|
||||
const node = data.get(hash);
|
||||
if (node) {
|
||||
data.delete(hash);
|
||||
// Remove from type index
|
||||
const set = byType.get(node.type);
|
||||
if (set) {
|
||||
set.delete(hash);
|
||||
if (set.size === 0) {
|
||||
byType.delete(node.type);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
[BOOTSTRAP_STORE]: putSelfReferencing,
|
||||
};
|
||||
|
||||
|
||||
@@ -24,4 +24,6 @@ export type Store = {
|
||||
get(hash: Hash): CasNode | null;
|
||||
has(hash: Hash): boolean;
|
||||
listByType(typeHash: Hash): Hash[];
|
||||
listAll(): Hash[];
|
||||
delete(hash: Hash): void;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user