feat: embedding service — Cloudflare Worker + KV cache + Dashscope
- POST /embed: batch text → vector (max 100) - KV cache: sha256(model+text), content-addressable, no expiry - Dashscope text-embedding-v3 upstream (1024 dims) - Bearer token auth - Health endpoint Deployed: https://embed.shazhou.workers.dev
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
node_modules/
|
||||
dist/
|
||||
.wrangler/
|
||||
.dev.vars
|
||||
@@ -1,3 +1,44 @@
|
||||
# embed
|
||||
# Embedding Service
|
||||
|
||||
Embedding computation service — text → vector + cache (Cloudflare Worker)
|
||||
Text → vector 计算服务,nerve knowledge 的 embedding 后端。
|
||||
|
||||
## 接口
|
||||
|
||||
```
|
||||
POST /embed
|
||||
Authorization: Bearer <token>
|
||||
|
||||
{
|
||||
"texts": ["chunk 1", "chunk 2"],
|
||||
"model": "text-embedding-v3"
|
||||
}
|
||||
|
||||
→ 200
|
||||
{
|
||||
"embeddings": [[0.012, ...], [0.056, ...]],
|
||||
"model": "text-embedding-v3",
|
||||
"dimensions": 1024,
|
||||
"cached": [false, true]
|
||||
}
|
||||
```
|
||||
|
||||
## 特性
|
||||
|
||||
- **缓存**: KV 存储,key = sha256(model + text),永不过期
|
||||
- **上游**: Dashscope text-embedding-v3(可配置)
|
||||
- **批量**: 单次最多 100 条
|
||||
- **鉴权**: Bearer token
|
||||
|
||||
## 开发
|
||||
|
||||
```bash
|
||||
pnpm install
|
||||
pnpm dev # 本地开发
|
||||
pnpm deploy # 部署到 Cloudflare Workers
|
||||
```
|
||||
|
||||
## 环境变量(Secrets)
|
||||
|
||||
- `AUTH_TOKEN` — 鉴权 token
|
||||
- `DASHSCOPE_API_KEY` — Dashscope API key
|
||||
- `DEFAULT_MODEL` — 默认模型(可选,默认 text-embedding-v3)
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"name": "embed",
|
||||
"version": "1.0.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "wrangler dev",
|
||||
"deploy": "wrangler deploy",
|
||||
"test": "vitest run",
|
||||
"check": "tsc --noEmit"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@cloudflare/workers-types": "^4.20241205.0",
|
||||
"typescript": "^5.7.0",
|
||||
"vitest": "^3.0.0",
|
||||
"wrangler": "^3.99.0"
|
||||
}
|
||||
}
|
||||
Generated
+1939
File diff suppressed because it is too large
Load Diff
+164
@@ -0,0 +1,164 @@
|
||||
import { createHash } from "./hash.js";
|
||||
import type { Env } from "./index.js";
|
||||
|
||||
const MAX_BATCH = 100;
|
||||
const DEFAULT_MODEL = "text-embedding-v3";
|
||||
|
||||
type EmbedRequest = {
|
||||
texts: string[];
|
||||
model?: string;
|
||||
};
|
||||
|
||||
type DashscopeResponse = {
|
||||
output?: {
|
||||
embeddings?: Array<{
|
||||
embedding: number[];
|
||||
text_index: number;
|
||||
}>;
|
||||
};
|
||||
usage?: {
|
||||
total_tokens: number;
|
||||
};
|
||||
};
|
||||
|
||||
export async function handleEmbed(request: Request, env: Env): Promise<Response> {
|
||||
let body: EmbedRequest;
|
||||
try {
|
||||
body = (await request.json()) as EmbedRequest;
|
||||
} catch {
|
||||
return jsonError("Invalid JSON body", 400);
|
||||
}
|
||||
|
||||
if (!Array.isArray(body.texts) || body.texts.length === 0) {
|
||||
return jsonError("'texts' must be a non-empty array of strings", 400);
|
||||
}
|
||||
|
||||
if (body.texts.length > MAX_BATCH) {
|
||||
return jsonError(`Batch size ${body.texts.length} exceeds limit of ${MAX_BATCH}`, 400);
|
||||
}
|
||||
|
||||
for (let i = 0; i < body.texts.length; i++) {
|
||||
if (typeof body.texts[i] !== "string" || body.texts[i]!.length === 0) {
|
||||
return jsonError(`texts[${i}] must be a non-empty string`, 400);
|
||||
}
|
||||
}
|
||||
|
||||
const model = body.model ?? env.DEFAULT_MODEL ?? DEFAULT_MODEL;
|
||||
|
||||
// Check cache for each text
|
||||
const cacheKeys: string[] = [];
|
||||
const cached: Array<number[] | null> = [];
|
||||
const uncachedIndices: number[] = [];
|
||||
|
||||
for (let i = 0; i < body.texts.length; i++) {
|
||||
const text = body.texts[i]!;
|
||||
const key = await cacheKey(model, text);
|
||||
cacheKeys.push(key);
|
||||
|
||||
const hit = await env.EMBED_CACHE.get(key, "text");
|
||||
if (hit !== null) {
|
||||
cached.push(JSON.parse(hit) as number[]);
|
||||
} else {
|
||||
cached.push(null);
|
||||
uncachedIndices.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Call Dashscope for uncached texts
|
||||
if (uncachedIndices.length > 0) {
|
||||
const uncachedTexts = uncachedIndices.map((i) => body.texts[i]!);
|
||||
const embeddings = await callDashscope(env.DASHSCOPE_API_KEY, model, uncachedTexts);
|
||||
|
||||
if (embeddings === null) {
|
||||
return jsonError("Upstream embedding provider failed", 502);
|
||||
}
|
||||
|
||||
if (embeddings.length !== uncachedTexts.length) {
|
||||
return jsonError("Upstream returned mismatched embedding count", 502);
|
||||
}
|
||||
|
||||
// Store in cache and fill results
|
||||
const putPromises: Promise<void>[] = [];
|
||||
for (let j = 0; j < uncachedIndices.length; j++) {
|
||||
const idx = uncachedIndices[j]!;
|
||||
const vec = embeddings[j]!;
|
||||
cached[idx] = vec;
|
||||
putPromises.push(env.EMBED_CACHE.put(cacheKeys[idx]!, JSON.stringify(vec)));
|
||||
}
|
||||
await Promise.all(putPromises);
|
||||
}
|
||||
|
||||
const dimensions = cached[0]?.length ?? 0;
|
||||
|
||||
return new Response(
|
||||
JSON.stringify({
|
||||
embeddings: cached,
|
||||
model,
|
||||
dimensions,
|
||||
cached: cacheKeys.map((_, i) => !uncachedIndices.includes(i)),
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
},
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
async function cacheKey(model: string, text: string): Promise<string> {
|
||||
const input = `${model}\0${text}`;
|
||||
const hash = await createHash(input);
|
||||
return `emb:${model}:${hash}`;
|
||||
}
|
||||
|
||||
async function callDashscope(
|
||||
apiKey: string,
|
||||
model: string,
|
||||
texts: string[],
|
||||
): Promise<number[][] | null> {
|
||||
const url = "https://dashscope.aliyuncs.com/api/v1/services/embeddings/text-embedding/text-embedding";
|
||||
|
||||
const resp = await fetch(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${apiKey}`,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
input: { texts },
|
||||
parameters: {
|
||||
text_type: "document",
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
if (!resp.ok) {
|
||||
console.error(`Dashscope error: ${resp.status} ${await resp.text()}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const data = (await resp.json()) as DashscopeResponse;
|
||||
const rawEmbeddings = data.output?.embeddings;
|
||||
|
||||
if (!rawEmbeddings || rawEmbeddings.length === 0) {
|
||||
console.error("Dashscope returned no embeddings", JSON.stringify(data));
|
||||
return null;
|
||||
}
|
||||
|
||||
// Sort by text_index to maintain input order
|
||||
const sorted = [...rawEmbeddings].sort((a, b) => a.text_index - b.text_index);
|
||||
return sorted.map((e) => e.embedding);
|
||||
}
|
||||
|
||||
function jsonError(message: string, status: number): Response {
|
||||
return new Response(JSON.stringify({ error: message }), {
|
||||
status,
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
},
|
||||
});
|
||||
}
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
/**
|
||||
* SHA-256 hash using Web Crypto API (available in Workers runtime).
|
||||
*/
|
||||
export async function createHash(input: string): Promise<string> {
|
||||
const encoder = new TextEncoder();
|
||||
const data = encoder.encode(input);
|
||||
const hashBuffer = await crypto.subtle.digest("SHA-256", data);
|
||||
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
||||
return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
/// <reference types="@cloudflare/workers-types" />
|
||||
|
||||
export interface Env {
|
||||
EMBED_CACHE: KVNamespace;
|
||||
/** Bearer token for authentication */
|
||||
AUTH_TOKEN: string;
|
||||
/** Dashscope API key */
|
||||
DASHSCOPE_API_KEY: string;
|
||||
/** Default embedding model */
|
||||
DEFAULT_MODEL?: string;
|
||||
}
|
||||
|
||||
import { handleEmbed } from "./embed.js";
|
||||
|
||||
export default {
|
||||
async fetch(request: Request, env: Env): Promise<Response> {
|
||||
// CORS preflight
|
||||
if (request.method === "OPTIONS") {
|
||||
return new Response(null, {
|
||||
headers: {
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
||||
"Access-Control-Allow-Headers": "Authorization, Content-Type",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// Auth check
|
||||
const authHeader = request.headers.get("Authorization");
|
||||
if (!authHeader || authHeader !== `Bearer ${env.AUTH_TOKEN}`) {
|
||||
return jsonResponse({ error: "Unauthorized" }, 401);
|
||||
}
|
||||
|
||||
const url = new URL(request.url);
|
||||
|
||||
if (request.method === "POST" && url.pathname === "/embed") {
|
||||
return handleEmbed(request, env);
|
||||
}
|
||||
|
||||
if (request.method === "GET" && url.pathname === "/health") {
|
||||
return jsonResponse({ status: "ok" });
|
||||
}
|
||||
|
||||
return jsonResponse({ error: "Not found" }, 404);
|
||||
},
|
||||
} satisfies ExportedHandler<Env>;
|
||||
|
||||
function jsonResponse(data: unknown, status = 200): Response {
|
||||
return new Response(JSON.stringify(data), {
|
||||
status,
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ES2022",
|
||||
"moduleResolution": "bundler",
|
||||
"strict": true,
|
||||
"noUncheckedIndexedAccess": true,
|
||||
"skipLibCheck": true,
|
||||
"types": ["@cloudflare/workers-types"],
|
||||
"outDir": "dist",
|
||||
"rootDir": "src"
|
||||
},
|
||||
"include": ["src"]
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
name = "embed"
|
||||
main = "src/index.ts"
|
||||
compatibility_date = "2024-12-01"
|
||||
|
||||
[[kv_namespaces]]
|
||||
binding = "EMBED_CACHE"
|
||||
id = "80f3318667c04ade94fa53ba3e2285fe"
|
||||
Reference in New Issue
Block a user