feat: Phase 1 — core primitives (hash + CBOR + memory store)

- CBOR deterministic encoding (cborg, RFC 8949 §4.2)
- XXH64 → 13-char Crockford Base32 hashing
- createMemoryStore() with idempotent put
- verify() integrity check
- bootstrap() self-referencing meta-schema seed
- 23 tests passing, biome clean

Closes #3
小橘 <xiaoju@shazhou.work>
This commit is contained in:
2026-05-17 09:23:05 +00:00
parent 9645e37ab5
commit 9aac38238a
15 changed files with 560 additions and 0 deletions
+3
View File
@@ -0,0 +1,3 @@
node_modules/
dist/
*.d.ts.map
+26
View File
@@ -0,0 +1,26 @@
{
"$schema": "https://biomejs.dev/schemas/2.4.15/schema.json",
"vcs": {
"enabled": true,
"clientKind": "git",
"useIgnoreFile": true
},
"files": {
"includes": ["**", "!**/node_modules", "!**/dist", "!**/*.d.ts"]
},
"formatter": {
"enabled": true,
"indentStyle": "space",
"indentWidth": 2
},
"linter": {
"enabled": true,
"rules": {
"recommended": true,
"suspicious": {
"noConsole": "error"
}
}
},
"assist": { "actions": { "source": { "organizeImports": "on" } } }
}
+48
View File
@@ -0,0 +1,48 @@
{
"lockfileVersion": 1,
"configVersion": 1,
"workspaces": {
"": {
"name": "@uncaged/json-cas-workspace",
"devDependencies": {
"@biomejs/biome": "^2.0.0",
"typescript": "^5.8.0",
},
},
"packages/json-cas": {
"name": "@uncaged/json-cas",
"version": "0.1.0",
"dependencies": {
"cborg": "^4.2.3",
"xxhash-wasm": "^1.1.0",
},
},
},
"packages": {
"@biomejs/biome": ["@biomejs/biome@2.4.15", "", { "optionalDependencies": { "@biomejs/cli-darwin-arm64": "2.4.15", "@biomejs/cli-darwin-x64": "2.4.15", "@biomejs/cli-linux-arm64": "2.4.15", "@biomejs/cli-linux-arm64-musl": "2.4.15", "@biomejs/cli-linux-x64": "2.4.15", "@biomejs/cli-linux-x64-musl": "2.4.15", "@biomejs/cli-win32-arm64": "2.4.15", "@biomejs/cli-win32-x64": "2.4.15" }, "bin": { "biome": "bin/biome" } }, "sha512-j5VH3a/h/HXTKBM50MDMxRCzkeLv9S2XJcW2WgnZT1+xyisi+0bISrXR82gCX+8S9lvK0skEvHJRN+3Ktr2hlw=="],
"@biomejs/cli-darwin-arm64": ["@biomejs/cli-darwin-arm64@2.4.15", "", { "os": "darwin", "cpu": "arm64" }, "sha512-rF3PPqLq1yoST79zaQbDjVJwsuIeci/O+9bgNmC5QpgOqz6aqYuzA4abyAGx+mgyiDXn4A049xAN8gijbuR1Qg=="],
"@biomejs/cli-darwin-x64": ["@biomejs/cli-darwin-x64@2.4.15", "", { "os": "darwin", "cpu": "x64" }, "sha512-/5KHXYMfSJs1fNXiX30xFtI8JcCFV6zaVVLxOa0M2sfqBKHkpQhRTv94yxQWxeTY2lzo2OuTlNvPC+hDQt2wcQ=="],
"@biomejs/cli-linux-arm64": ["@biomejs/cli-linux-arm64@2.4.15", "", { "os": "linux", "cpu": "arm64" }, "sha512-owaAMZD/T4LrD0ELNCk0Km3qrRHuM0X6EAyVE1FSqGY0rbLoiDLrO4Us2tllm6cAeB2Ioa9C2C08NZPdr8+0Ug=="],
"@biomejs/cli-linux-arm64-musl": ["@biomejs/cli-linux-arm64-musl@2.4.15", "", { "os": "linux", "cpu": "arm64" }, "sha512-ZPcxznxm0pogHBLZhYntyR3sR+MrZjqJIKEr7ZqVen0Rl+P/4upVmfYXjftizi9RoqZntg33fv/1fbdhbYXpEQ=="],
"@biomejs/cli-linux-x64": ["@biomejs/cli-linux-x64@2.4.15", "", { "os": "linux", "cpu": "x64" }, "sha512-0jj7THz12GbUOLmMibktK6DZjqz2zV64KFxyBtcFTKPiiOIY0a7vns1elpO1dERvxpsZ5ik0oFfz0oGwFde1+g=="],
"@biomejs/cli-linux-x64-musl": ["@biomejs/cli-linux-x64-musl@2.4.15", "", { "os": "linux", "cpu": "x64" }, "sha512-CNq/9W38SYSH023lfcQ4KKU8K0YX8T//FZUhcgtMMRABDojx5XsMV7jlweAvGSl389wJQB29Qo6Zb/a+jdvt+w=="],
"@biomejs/cli-win32-arm64": ["@biomejs/cli-win32-arm64@2.4.15", "", { "os": "win32", "cpu": "arm64" }, "sha512-ouhkYdlhp/1GghEJPdWwD/Vi3gQ1nFxuSpMolWsbq3Lsq3QUR4jl6UdhhscdCugKU5vOEuMiJhvKj66O0OCq+w=="],
"@biomejs/cli-win32-x64": ["@biomejs/cli-win32-x64@2.4.15", "", { "os": "win32", "cpu": "x64" }, "sha512-zBrGq5mx5wwpnow4+2BxUvleDM+GNd4sLbPaMapsSLQLD0NGRCquqPBTgN+7XkUteHvj7M+BstuI8tmnV7+HgQ=="],
"@uncaged/json-cas": ["@uncaged/json-cas@workspace:packages/json-cas"],
"cborg": ["cborg@4.5.8", "", { "bin": { "cborg": "lib/bin.js" } }, "sha512-6/viltD51JklRhq4L7jC3zgy6gryuG5xfZ3kzpE+PravtyeQLeQmCYLREhQH7pWENg5pY4Yu/XCd6a7dKScVlw=="],
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],
"xxhash-wasm": ["xxhash-wasm@1.1.0", "", {}, "sha512-147y/6YNh+tlp6nd/2pWq38i9h6mz/EuQ6njIrmW8D1BS5nCqs0P6DG+m6zTGnNz5I+uhZ0SHxBs9BsPrwcKDA=="],
}
}
+16
View File
@@ -0,0 +1,16 @@
{
"name": "@uncaged/json-cas-workspace",
"private": true,
"workspaces": [
"packages/*"
],
"devDependencies": {
"@biomejs/biome": "^2.0.0",
"typescript": "^5.8.0"
},
"scripts": {
"test": "bun test",
"check": "biome check .",
"format": "biome format --write ."
}
}
+16
View File
@@ -0,0 +1,16 @@
{
"name": "@uncaged/json-cas",
"version": "0.1.0",
"type": "module",
"main": "./src/index.ts",
"exports": {
".": "./src/index.ts"
},
"scripts": {
"test": "bun test"
},
"dependencies": {
"cborg": "^4.2.3",
"xxhash-wasm": "^1.1.0"
}
}
+27
View File
@@ -0,0 +1,27 @@
import type { Hash, Store } from "./types.js";
/**
* The meta-schema seed payload: describes the structure of every CAS node.
* This is the root type from which all other type nodes derive.
*/
const BOOTSTRAP_PAYLOAD = {
description: "json-cas meta-schema seed",
hashAlgorithm: "xxh64",
hashEncoding: "crockford-base32-13",
nodeSchema: {
payload: "any",
timestamp: "number",
type: "Hash",
},
payloadEncoding: "cbor-rfc8949-deterministic",
version: "1",
} as const;
/**
* Write the meta-schema seed node into the store.
* The returned hash equals the node's own type field (self-referencing).
* Idempotent: calling bootstrap multiple times returns the same hash.
*/
export async function bootstrap(store: Store): Promise<Hash> {
return store.put(null, BOOTSTRAP_PAYLOAD);
}
+9
View File
@@ -0,0 +1,9 @@
import { encode, rfc8949EncodeOptions } from "cborg";
/**
* Deterministic CBOR encoding per RFC 8949 (bytewise-sorted map keys,
* smallest-possible integer sizes).
*/
export function cborEncode(value: unknown): Uint8Array {
return encode(value, rfc8949EncodeOptions);
}
+71
View File
@@ -0,0 +1,71 @@
import type { XXHashAPI } from "xxhash-wasm";
import xxhashFactory from "xxhash-wasm";
import { cborEncode } from "./cbor.js";
import type { Hash } from "./types.js";
/** Crockford Base32 symbol table (32 characters, indices 0–31). */
const CROCKFORD = "0123456789ABCDEFGHJKMNPQRSTVWXYZ";
/** Encode a u64 BigInt as a 13-character Crockford Base32 string. */
function u64ToCrockford(n: bigint): Hash {
let result = "";
let x = n;
for (let i = 0; i < 13; i++) {
result = CROCKFORD[Number(x & 31n)] + result;
x >>= 5n;
}
return result;
}
/** Encode an ASCII string as bytes without TextEncoder (all hashes are ASCII). */
function asciiToBytes(s: string): Uint8Array {
const bytes = new Uint8Array(s.length);
for (let i = 0; i < s.length; i++) {
bytes[i] = s.charCodeAt(i);
}
return bytes;
}
function concatBytes(a: Uint8Array, b: Uint8Array): Uint8Array {
const out = new Uint8Array(a.length + b.length);
out.set(a);
out.set(b, a.length);
return out;
}
let _instance: XXHashAPI | null = null;
let _pending: Promise<XXHashAPI> | null = null;
async function getInstance(): Promise<XXHashAPI> {
if (_instance !== null) return _instance;
if (_pending === null) {
_pending = xxhashFactory().then((api) => {
_instance = api;
return api;
});
}
return _pending;
}
/**
* hash = XXH64(utf8(typeHash) ++ CBOR_deterministic(payload))
* Used for all normal nodes.
*/
export async function computeHash(
typeHash: Hash,
payload: unknown,
): Promise<Hash> {
const api = await getInstance();
const input = concatBytes(asciiToBytes(typeHash), cborEncode(payload));
return u64ToCrockford(api.h64Raw(input));
}
/**
* hash = XXH64(CBOR_deterministic(payload))
* Used for self-referencing (bootstrap) nodes where type = hash.
*/
export async function computeSelfHash(payload: unknown): Promise<Hash> {
const api = await getInstance();
return u64ToCrockford(api.h64Raw(cborEncode(payload)));
}
+234
View File
@@ -0,0 +1,234 @@
import { describe, expect, test } from "bun:test";
import { bootstrap } from "./bootstrap.js";
import { cborEncode } from "./cbor.js";
import { computeHash, computeSelfHash } from "./hash.js";
import { createMemoryStore } from "./store.js";
import type { CasNode } from "./types.js";
import { verify } from "./verify.js";
// ──────────────────────────────────────────────────────────────────────────────
// Step 1: CBOR deterministic encoding
// ──────────────────────────────────────────────────────────────────────────────
describe("cborEncode", () => {
test("produces identical bytes for the same value", () => {
const a = cborEncode({ x: 1, y: 2 });
const b = cborEncode({ x: 1, y: 2 });
expect(a).toEqual(b);
});
test("is deterministic regardless of insertion order", () => {
const a = cborEncode({ b: 2, a: 1 });
const b = cborEncode({ a: 1, b: 2 });
expect(a).toEqual(b);
});
test("encodes primitives consistently", () => {
expect(cborEncode(42)).toEqual(cborEncode(42));
expect(cborEncode("hello")).toEqual(cborEncode("hello"));
expect(cborEncode(null)).toEqual(cborEncode(null));
});
});
// ──────────────────────────────────────────────────────────────────────────────
// Step 2: XXH64 → 13-char Crockford Base32
// ──────────────────────────────────────────────────────────────────────────────
describe("computeHash", () => {
test("returns a 13-character uppercase string", async () => {
const hash = await computeHash("SOMETYPE00000", { value: 1 });
expect(hash).toHaveLength(13);
expect(hash).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
});
test("is deterministic: same inputs → same hash", async () => {
const h1 = await computeHash("SOMETYPE00000", { value: 1 });
const h2 = await computeHash("SOMETYPE00000", { value: 1 });
expect(h1).toBe(h2);
});
test("differs for different type hashes", async () => {
const h1 = await computeHash("AAAAAAAAAAAAA", { value: 1 });
const h2 = await computeHash("BBBBBBBBBBBBB", { value: 1 });
expect(h1).not.toBe(h2);
});
test("differs for different payloads", async () => {
const h1 = await computeHash("SOMETYPE00000", { value: 1 });
const h2 = await computeHash("SOMETYPE00000", { value: 2 });
expect(h1).not.toBe(h2);
});
test("computeSelfHash matches payload-only hash", async () => {
const payload = { foo: "bar" };
const h1 = await computeSelfHash(payload);
const h2 = await computeSelfHash(payload);
expect(h1).toBe(h2);
expect(h1).toHaveLength(13);
});
});
// ──────────────────────────────────────────────────────────────────────────────
// Step 3: store.put() and store.get()
// ──────────────────────────────────────────────────────────────────────────────
describe("createMemoryStore – put and get", () => {
test("put returns a hash and get retrieves the node", async () => {
const store = createMemoryStore();
const typeHash = await computeSelfHash({ name: "my-type" });
const hash = await store.put(typeHash, { greeting: "hello" });
expect(hash).toHaveLength(13);
const node = store.get(hash);
expect(node).not.toBeNull();
expect(node?.type).toBe(typeHash);
expect(node?.payload).toEqual({ greeting: "hello" });
expect(typeof node?.timestamp).toBe("number");
});
test("get returns null for unknown hash", () => {
const store = createMemoryStore();
expect(store.get("0000000000000")).toBeNull();
});
test("put is idempotent: same type+payload → same hash, no duplicate", async () => {
const store = createMemoryStore();
const typeHash = await computeSelfHash({ name: "my-type" });
const h1 = await store.put(typeHash, { n: 42 });
const h2 = await store.put(typeHash, { n: 42 });
expect(h1).toBe(h2);
expect(store.list()).toHaveLength(1);
});
test("timestamp is preserved on second put (idempotency)", async () => {
const store = createMemoryStore();
const typeHash = await computeSelfHash({ name: "my-type" });
const h1 = await store.put(typeHash, { v: 1 });
const ts1 = store.get(h1)?.timestamp;
await new Promise((r) => setTimeout(r, 5));
await store.put(typeHash, { v: 1 });
const ts2 = store.get(h1)?.timestamp;
expect(ts1).toBe(ts2);
});
});
// ──────────────────────────────────────────────────────────────────────────────
// Step 4: store.has() and store.list()
// ──────────────────────────────────────────────────────────────────────────────
describe("createMemoryStore – has and list", () => {
test("has returns false before put, true after", async () => {
const store = createMemoryStore();
const typeHash = await computeSelfHash({ name: "t" });
const hash = await computeHash(typeHash, { x: 1 });
expect(store.has(hash)).toBe(false);
await store.put(typeHash, { x: 1 });
expect(store.has(hash)).toBe(true);
});
test("list returns all stored hashes", async () => {
const store = createMemoryStore();
const typeHash = await computeSelfHash({ name: "t" });
const h1 = await store.put(typeHash, { a: 1 });
const h2 = await store.put(typeHash, { a: 2 });
const h3 = await store.put(typeHash, { a: 3 });
const all = store.list();
expect(all).toHaveLength(3);
expect(all).toContain(h1);
expect(all).toContain(h2);
expect(all).toContain(h3);
});
test("list returns empty array on fresh store", () => {
const store = createMemoryStore();
expect(store.list()).toEqual([]);
});
});
// ──────────────────────────────────────────────────────────────────────────────
// Step 5: verify()
// ──────────────────────────────────────────────────────────────────────────────
describe("verify", () => {
test("returns true for a correctly stored node", async () => {
const store = createMemoryStore();
const typeHash = await computeSelfHash({ name: "my-type" });
const hash = await store.put(typeHash, { data: 123 });
const node = store.get(hash) as CasNode;
expect(await verify(hash, node)).toBe(true);
});
test("returns false when payload is tampered", async () => {
const store = createMemoryStore();
const typeHash = await computeSelfHash({ name: "my-type" });
const hash = await store.put(typeHash, { data: 123 });
const tampered: CasNode = {
type: typeHash,
payload: { data: 999 },
timestamp: Date.now(),
};
expect(await verify(hash, tampered)).toBe(false);
});
test("returns false when type is tampered", async () => {
const store = createMemoryStore();
const typeHash = await computeSelfHash({ name: "my-type" });
const hash = await store.put(typeHash, { data: 123 });
const node = store.get(hash) as CasNode;
const tampered: CasNode = { ...node, type: "AAAAAAAAAAAAA" };
expect(await verify(hash, tampered)).toBe(false);
});
});
// ──────────────────────────────────────────────────────────────────────────────
// Step 6: bootstrap()
// ──────────────────────────────────────────────────────────────────────────────
describe("bootstrap", () => {
test("returns a valid 13-char hash", async () => {
const store = createMemoryStore();
const hash = await bootstrap(store);
expect(hash).toHaveLength(13);
expect(hash).toMatch(/^[0-9A-HJKMNP-TV-Z]{13}$/);
});
test("node is stored and retrievable", async () => {
const store = createMemoryStore();
const hash = await bootstrap(store);
expect(store.has(hash)).toBe(true);
const node = store.get(hash);
expect(node).not.toBeNull();
});
test("node is self-referencing: type === hash", async () => {
const store = createMemoryStore();
const hash = await bootstrap(store);
const node = store.get(hash) as CasNode;
expect(node.type).toBe(hash);
});
test("bootstrap node passes verify()", async () => {
const store = createMemoryStore();
const hash = await bootstrap(store);
const node = store.get(hash) as CasNode;
expect(await verify(hash, node)).toBe(true);
});
test("bootstrap is idempotent: same hash on repeated calls", async () => {
const store = createMemoryStore();
const h1 = await bootstrap(store);
const h2 = await bootstrap(store);
expect(h1).toBe(h2);
expect(store.list()).toHaveLength(1);
});
});
+6
View File
@@ -0,0 +1,6 @@
export { bootstrap } from "./bootstrap.js";
export { cborEncode } from "./cbor.js";
export { computeHash, computeSelfHash } from "./hash.js";
export { createMemoryStore } from "./store.js";
export type { CasNode, Hash, Store } from "./types.js";
export { verify } from "./verify.js";
+34
View File
@@ -0,0 +1,34 @@
import { computeHash, computeSelfHash } from "./hash.js";
import type { CasNode, Hash, Store } from "./types.js";
export function createMemoryStore(): Store {
const data = new Map<Hash, CasNode>();
return {
async put(typeHash: Hash | null, payload: unknown): Promise<Hash> {
const hash =
typeHash === null
? await computeSelfHash(payload)
: await computeHash(typeHash, payload);
if (!data.has(hash)) {
const type = typeHash === null ? hash : typeHash;
data.set(hash, { type, payload, timestamp: Date.now() });
}
return hash;
},
get(hash: Hash): CasNode | null {
return data.get(hash) ?? null;
},
has(hash: Hash): boolean {
return data.has(hash);
},
list(): Hash[] {
return [...data.keys()];
},
};
}
+27
View File
@@ -0,0 +1,27 @@
/**
* 13-character uppercase Crockford Base32 string produced by XXH64.
*/
export type Hash = string;
/**
* A content-addressed node with a typed payload.
* - type: Hash of the type descriptor node (or self for bootstrap)
* - payload: arbitrary data
* - timestamp: Unix epoch ms when the node was first stored
*/
export type CasNode<T = unknown> = {
type: Hash;
payload: T;
timestamp: number;
};
/**
* Content-addressable store interface.
* put(null, payload) creates a self-referencing (bootstrap) node.
*/
export type Store = {
put(typeHash: Hash | null, payload: unknown): Promise<Hash>;
get(hash: Hash): CasNode | null;
has(hash: Hash): boolean;
list(): Hash[];
};
+16
View File
@@ -0,0 +1,16 @@
import { computeHash, computeSelfHash } from "./hash.js";
import type { CasNode, Hash } from "./types.js";
/**
* Verify that a stored node matches the given hash.
* - Self-referencing nodes (type === hash): verified via CBOR-only hash.
* - Normal nodes: verified via XXH64(type_bytes ++ CBOR(payload)).
*/
export async function verify(hash: Hash, node: CasNode): Promise<boolean> {
if (node.type === hash) {
const computed = await computeSelfHash(node.payload);
return computed === hash;
}
const computed = await computeHash(node.type, node.payload);
return computed === hash;
}
+8
View File
@@ -0,0 +1,8 @@
{
"extends": "../../tsconfig.json",
"compilerOptions": {
"rootDir": "src",
"outDir": "dist"
},
"include": ["src"]
}
+19
View File
@@ -0,0 +1,19 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "ESNext",
"moduleResolution": "bundler",
"strict": true,
"exactOptionalPropertyTypes": true,
"noUncheckedIndexedAccess": true,
"noImplicitOverride": true,
"verbatimModuleSyntax": true,
"skipLibCheck": true,
"composite": true,
"declaration": true,
"declarationMap": true,
"sourceMap": true,
"outDir": "dist"
},
"exclude": ["node_modules", "dist"]
}