feat: embedding service — Cloudflare Worker + KV cache + Dashscope

- POST /embed: batch text → vector (max 100)
- KV cache: sha256(model+text), content-addressable, no expiry
- Dashscope text-embedding-v3 upstream (1024 dims)
- Bearer token auth
- Health endpoint

Deployed: https://embed.shazhou.workers.dev
This commit is contained in:
2026-04-29 07:21:46 +00:00
parent 1d225ed171
commit 396e005686
9 changed files with 2254 additions and 2 deletions
+4
View File
@@ -0,0 +1,4 @@
node_modules/
dist/
.wrangler/
.dev.vars
+43 -2
View File
@@ -1,3 +1,44 @@
# embed # Embedding Service
Embedding computation service — text → vector + cache (Cloudflare Worker) Text → vector 计算服务,nerve knowledge 的 embedding 后端。
## 接口
```
POST /embed
Authorization: Bearer <token>
{
"texts": ["chunk 1", "chunk 2"],
"model": "text-embedding-v3"
}
→ 200
{
"embeddings": [[0.012, ...], [0.056, ...]],
"model": "text-embedding-v3",
"dimensions": 1024,
"cached": [false, true]
}
```
## 特性
- **缓存**: KV 存储,key = sha256(model + text),永不过期
- **上游**: Dashscope text-embedding-v3(可配置)
- **批量**: 单次最多 100 条
- **鉴权**: Bearer token
## 开发
```bash
pnpm install
pnpm dev # 本地开发
pnpm deploy # 部署到 Cloudflare Workers
```
## 环境变量(Secrets)
- `AUTH_TOKEN` — 鉴权 token
- `DASHSCOPE_API_KEY` — Dashscope API key
- `DEFAULT_MODEL` — 默认模型(可选,默认 text-embedding-v3)
+17
View File
@@ -0,0 +1,17 @@
{
"name": "embed",
"version": "1.0.0",
"private": true,
"scripts": {
"dev": "wrangler dev",
"deploy": "wrangler deploy",
"test": "vitest run",
"check": "tsc --noEmit"
},
"devDependencies": {
"@cloudflare/workers-types": "^4.20241205.0",
"typescript": "^5.7.0",
"vitest": "^3.0.0",
"wrangler": "^3.99.0"
}
}
+1939
View File
File diff suppressed because it is too large Load Diff
+164
View File
@@ -0,0 +1,164 @@
import { createHash } from "./hash.js";
import type { Env } from "./index.js";
const MAX_BATCH = 100;
const DEFAULT_MODEL = "text-embedding-v3";
type EmbedRequest = {
texts: string[];
model?: string;
};
type DashscopeResponse = {
output?: {
embeddings?: Array<{
embedding: number[];
text_index: number;
}>;
};
usage?: {
total_tokens: number;
};
};
export async function handleEmbed(request: Request, env: Env): Promise<Response> {
let body: EmbedRequest;
try {
body = (await request.json()) as EmbedRequest;
} catch {
return jsonError("Invalid JSON body", 400);
}
if (!Array.isArray(body.texts) || body.texts.length === 0) {
return jsonError("'texts' must be a non-empty array of strings", 400);
}
if (body.texts.length > MAX_BATCH) {
return jsonError(`Batch size ${body.texts.length} exceeds limit of ${MAX_BATCH}`, 400);
}
for (let i = 0; i < body.texts.length; i++) {
if (typeof body.texts[i] !== "string" || body.texts[i]!.length === 0) {
return jsonError(`texts[${i}] must be a non-empty string`, 400);
}
}
const model = body.model ?? env.DEFAULT_MODEL ?? DEFAULT_MODEL;
// Check cache for each text
const cacheKeys: string[] = [];
const cached: Array<number[] | null> = [];
const uncachedIndices: number[] = [];
for (let i = 0; i < body.texts.length; i++) {
const text = body.texts[i]!;
const key = await cacheKey(model, text);
cacheKeys.push(key);
const hit = await env.EMBED_CACHE.get(key, "text");
if (hit !== null) {
cached.push(JSON.parse(hit) as number[]);
} else {
cached.push(null);
uncachedIndices.push(i);
}
}
// Call Dashscope for uncached texts
if (uncachedIndices.length > 0) {
const uncachedTexts = uncachedIndices.map((i) => body.texts[i]!);
const embeddings = await callDashscope(env.DASHSCOPE_API_KEY, model, uncachedTexts);
if (embeddings === null) {
return jsonError("Upstream embedding provider failed", 502);
}
if (embeddings.length !== uncachedTexts.length) {
return jsonError("Upstream returned mismatched embedding count", 502);
}
// Store in cache and fill results
const putPromises: Promise<void>[] = [];
for (let j = 0; j < uncachedIndices.length; j++) {
const idx = uncachedIndices[j]!;
const vec = embeddings[j]!;
cached[idx] = vec;
putPromises.push(env.EMBED_CACHE.put(cacheKeys[idx]!, JSON.stringify(vec)));
}
await Promise.all(putPromises);
}
const dimensions = cached[0]?.length ?? 0;
return new Response(
JSON.stringify({
embeddings: cached,
model,
dimensions,
cached: cacheKeys.map((_, i) => !uncachedIndices.includes(i)),
}),
{
status: 200,
headers: {
"Content-Type": "application/json",
"Access-Control-Allow-Origin": "*",
},
},
);
}
async function cacheKey(model: string, text: string): Promise<string> {
const input = `${model}\0${text}`;
const hash = await createHash(input);
return `emb:${model}:${hash}`;
}
async function callDashscope(
apiKey: string,
model: string,
texts: string[],
): Promise<number[][] | null> {
const url = "https://dashscope.aliyuncs.com/api/v1/services/embeddings/text-embedding/text-embedding";
const resp = await fetch(url, {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model,
input: { texts },
parameters: {
text_type: "document",
},
}),
});
if (!resp.ok) {
console.error(`Dashscope error: ${resp.status} ${await resp.text()}`);
return null;
}
const data = (await resp.json()) as DashscopeResponse;
const rawEmbeddings = data.output?.embeddings;
if (!rawEmbeddings || rawEmbeddings.length === 0) {
console.error("Dashscope returned no embeddings", JSON.stringify(data));
return null;
}
// Sort by text_index to maintain input order
const sorted = [...rawEmbeddings].sort((a, b) => a.text_index - b.text_index);
return sorted.map((e) => e.embedding);
}
function jsonError(message: string, status: number): Response {
return new Response(JSON.stringify({ error: message }), {
status,
headers: {
"Content-Type": "application/json",
"Access-Control-Allow-Origin": "*",
},
});
}
+10
View File
@@ -0,0 +1,10 @@
/**
* SHA-256 hash using Web Crypto API (available in Workers runtime).
*/
export async function createHash(input: string): Promise<string> {
const encoder = new TextEncoder();
const data = encoder.encode(input);
const hashBuffer = await crypto.subtle.digest("SHA-256", data);
const hashArray = Array.from(new Uint8Array(hashBuffer));
return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
}
+56
View File
@@ -0,0 +1,56 @@
/// <reference types="@cloudflare/workers-types" />
export interface Env {
EMBED_CACHE: KVNamespace;
/** Bearer token for authentication */
AUTH_TOKEN: string;
/** Dashscope API key */
DASHSCOPE_API_KEY: string;
/** Default embedding model */
DEFAULT_MODEL?: string;
}
import { handleEmbed } from "./embed.js";
export default {
async fetch(request: Request, env: Env): Promise<Response> {
// CORS preflight
if (request.method === "OPTIONS") {
return new Response(null, {
headers: {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Methods": "POST, OPTIONS",
"Access-Control-Allow-Headers": "Authorization, Content-Type",
},
});
}
// Auth check
const authHeader = request.headers.get("Authorization");
if (!authHeader || authHeader !== `Bearer ${env.AUTH_TOKEN}`) {
return jsonResponse({ error: "Unauthorized" }, 401);
}
const url = new URL(request.url);
if (request.method === "POST" && url.pathname === "/embed") {
return handleEmbed(request, env);
}
if (request.method === "GET" && url.pathname === "/health") {
return jsonResponse({ status: "ok" });
}
return jsonResponse({ error: "Not found" }, 404);
},
} satisfies ExportedHandler<Env>;
function jsonResponse(data: unknown, status = 200): Response {
return new Response(JSON.stringify(data), {
status,
headers: {
"Content-Type": "application/json",
"Access-Control-Allow-Origin": "*",
},
});
}
+14
View File
@@ -0,0 +1,14 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "ES2022",
"moduleResolution": "bundler",
"strict": true,
"noUncheckedIndexedAccess": true,
"skipLibCheck": true,
"types": ["@cloudflare/workers-types"],
"outDir": "dist",
"rootDir": "src"
},
"include": ["src"]
}
+7
View File
@@ -0,0 +1,7 @@
name = "embed"
main = "src/index.ts"
compatibility_date = "2024-12-01"
[[kv_namespaces]]
binding = "EMBED_CACHE"
id = "80f3318667c04ade94fa53ba3e2285fe"