feat: embedding service — Cloudflare Worker + KV cache + Dashscope
- POST /embed: batch text → vector (max 100) - KV cache: sha256(model+text), content-addressable, no expiry - Dashscope text-embedding-v3 upstream (1024 dims) - Bearer token auth - Health endpoint Deployed: https://embed.shazhou.workers.dev
This commit is contained in:
@@ -0,0 +1,4 @@
|
|||||||
|
node_modules/
|
||||||
|
dist/
|
||||||
|
.wrangler/
|
||||||
|
.dev.vars
|
||||||
@@ -1,3 +1,44 @@
|
|||||||
# embed
|
# Embedding Service
|
||||||
|
|
||||||
Embedding computation service — text → vector + cache (Cloudflare Worker)
|
Text → vector 计算服务,nerve knowledge 的 embedding 后端。
|
||||||
|
|
||||||
|
## 接口
|
||||||
|
|
||||||
|
```
|
||||||
|
POST /embed
|
||||||
|
Authorization: Bearer <token>
|
||||||
|
|
||||||
|
{
|
||||||
|
"texts": ["chunk 1", "chunk 2"],
|
||||||
|
"model": "text-embedding-v3"
|
||||||
|
}
|
||||||
|
|
||||||
|
→ 200
|
||||||
|
{
|
||||||
|
"embeddings": [[0.012, ...], [0.056, ...]],
|
||||||
|
"model": "text-embedding-v3",
|
||||||
|
"dimensions": 1024,
|
||||||
|
"cached": [false, true]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 特性
|
||||||
|
|
||||||
|
- **缓存**: KV 存储,key = sha256(model + text),永不过期
|
||||||
|
- **上游**: Dashscope text-embedding-v3(可配置)
|
||||||
|
- **批量**: 单次最多 100 条
|
||||||
|
- **鉴权**: Bearer token
|
||||||
|
|
||||||
|
## 开发
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm install
|
||||||
|
pnpm dev # 本地开发
|
||||||
|
pnpm deploy # 部署到 Cloudflare Workers
|
||||||
|
```
|
||||||
|
|
||||||
|
## 环境变量(Secrets)
|
||||||
|
|
||||||
|
- `AUTH_TOKEN` — 鉴权 token
|
||||||
|
- `DASHSCOPE_API_KEY` — Dashscope API key
|
||||||
|
- `DEFAULT_MODEL` — 默认模型(可选,默认 text-embedding-v3)
|
||||||
|
|||||||
@@ -0,0 +1,17 @@
|
|||||||
|
{
|
||||||
|
"name": "embed",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"private": true,
|
||||||
|
"scripts": {
|
||||||
|
"dev": "wrangler dev",
|
||||||
|
"deploy": "wrangler deploy",
|
||||||
|
"test": "vitest run",
|
||||||
|
"check": "tsc --noEmit"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@cloudflare/workers-types": "^4.20241205.0",
|
||||||
|
"typescript": "^5.7.0",
|
||||||
|
"vitest": "^3.0.0",
|
||||||
|
"wrangler": "^3.99.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
Generated
+1939
File diff suppressed because it is too large
Load Diff
+164
@@ -0,0 +1,164 @@
|
|||||||
|
import { createHash } from "./hash.js";
|
||||||
|
import type { Env } from "./index.js";
|
||||||
|
|
||||||
|
const MAX_BATCH = 100;
|
||||||
|
const DEFAULT_MODEL = "text-embedding-v3";
|
||||||
|
|
||||||
|
type EmbedRequest = {
|
||||||
|
texts: string[];
|
||||||
|
model?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
type DashscopeResponse = {
|
||||||
|
output?: {
|
||||||
|
embeddings?: Array<{
|
||||||
|
embedding: number[];
|
||||||
|
text_index: number;
|
||||||
|
}>;
|
||||||
|
};
|
||||||
|
usage?: {
|
||||||
|
total_tokens: number;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
export async function handleEmbed(request: Request, env: Env): Promise<Response> {
|
||||||
|
let body: EmbedRequest;
|
||||||
|
try {
|
||||||
|
body = (await request.json()) as EmbedRequest;
|
||||||
|
} catch {
|
||||||
|
return jsonError("Invalid JSON body", 400);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Array.isArray(body.texts) || body.texts.length === 0) {
|
||||||
|
return jsonError("'texts' must be a non-empty array of strings", 400);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (body.texts.length > MAX_BATCH) {
|
||||||
|
return jsonError(`Batch size ${body.texts.length} exceeds limit of ${MAX_BATCH}`, 400);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let i = 0; i < body.texts.length; i++) {
|
||||||
|
if (typeof body.texts[i] !== "string" || body.texts[i]!.length === 0) {
|
||||||
|
return jsonError(`texts[${i}] must be a non-empty string`, 400);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const model = body.model ?? env.DEFAULT_MODEL ?? DEFAULT_MODEL;
|
||||||
|
|
||||||
|
// Check cache for each text
|
||||||
|
const cacheKeys: string[] = [];
|
||||||
|
const cached: Array<number[] | null> = [];
|
||||||
|
const uncachedIndices: number[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < body.texts.length; i++) {
|
||||||
|
const text = body.texts[i]!;
|
||||||
|
const key = await cacheKey(model, text);
|
||||||
|
cacheKeys.push(key);
|
||||||
|
|
||||||
|
const hit = await env.EMBED_CACHE.get(key, "text");
|
||||||
|
if (hit !== null) {
|
||||||
|
cached.push(JSON.parse(hit) as number[]);
|
||||||
|
} else {
|
||||||
|
cached.push(null);
|
||||||
|
uncachedIndices.push(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Call Dashscope for uncached texts
|
||||||
|
if (uncachedIndices.length > 0) {
|
||||||
|
const uncachedTexts = uncachedIndices.map((i) => body.texts[i]!);
|
||||||
|
const embeddings = await callDashscope(env.DASHSCOPE_API_KEY, model, uncachedTexts);
|
||||||
|
|
||||||
|
if (embeddings === null) {
|
||||||
|
return jsonError("Upstream embedding provider failed", 502);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (embeddings.length !== uncachedTexts.length) {
|
||||||
|
return jsonError("Upstream returned mismatched embedding count", 502);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store in cache and fill results
|
||||||
|
const putPromises: Promise<void>[] = [];
|
||||||
|
for (let j = 0; j < uncachedIndices.length; j++) {
|
||||||
|
const idx = uncachedIndices[j]!;
|
||||||
|
const vec = embeddings[j]!;
|
||||||
|
cached[idx] = vec;
|
||||||
|
putPromises.push(env.EMBED_CACHE.put(cacheKeys[idx]!, JSON.stringify(vec)));
|
||||||
|
}
|
||||||
|
await Promise.all(putPromises);
|
||||||
|
}
|
||||||
|
|
||||||
|
const dimensions = cached[0]?.length ?? 0;
|
||||||
|
|
||||||
|
return new Response(
|
||||||
|
JSON.stringify({
|
||||||
|
embeddings: cached,
|
||||||
|
model,
|
||||||
|
dimensions,
|
||||||
|
cached: cacheKeys.map((_, i) => !uncachedIndices.includes(i)),
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
status: 200,
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Access-Control-Allow-Origin": "*",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function cacheKey(model: string, text: string): Promise<string> {
|
||||||
|
const input = `${model}\0${text}`;
|
||||||
|
const hash = await createHash(input);
|
||||||
|
return `emb:${model}:${hash}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function callDashscope(
|
||||||
|
apiKey: string,
|
||||||
|
model: string,
|
||||||
|
texts: string[],
|
||||||
|
): Promise<number[][] | null> {
|
||||||
|
const url = "https://dashscope.aliyuncs.com/api/v1/services/embeddings/text-embedding/text-embedding";
|
||||||
|
|
||||||
|
const resp = await fetch(url, {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
Authorization: `Bearer ${apiKey}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model,
|
||||||
|
input: { texts },
|
||||||
|
parameters: {
|
||||||
|
text_type: "document",
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!resp.ok) {
|
||||||
|
console.error(`Dashscope error: ${resp.status} ${await resp.text()}`);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = (await resp.json()) as DashscopeResponse;
|
||||||
|
const rawEmbeddings = data.output?.embeddings;
|
||||||
|
|
||||||
|
if (!rawEmbeddings || rawEmbeddings.length === 0) {
|
||||||
|
console.error("Dashscope returned no embeddings", JSON.stringify(data));
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by text_index to maintain input order
|
||||||
|
const sorted = [...rawEmbeddings].sort((a, b) => a.text_index - b.text_index);
|
||||||
|
return sorted.map((e) => e.embedding);
|
||||||
|
}
|
||||||
|
|
||||||
|
function jsonError(message: string, status: number): Response {
|
||||||
|
return new Response(JSON.stringify({ error: message }), {
|
||||||
|
status,
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Access-Control-Allow-Origin": "*",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
/**
|
||||||
|
* SHA-256 hash using Web Crypto API (available in Workers runtime).
|
||||||
|
*/
|
||||||
|
export async function createHash(input: string): Promise<string> {
|
||||||
|
const encoder = new TextEncoder();
|
||||||
|
const data = encoder.encode(input);
|
||||||
|
const hashBuffer = await crypto.subtle.digest("SHA-256", data);
|
||||||
|
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
||||||
|
return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
|
||||||
|
}
|
||||||
@@ -0,0 +1,56 @@
|
|||||||
|
/// <reference types="@cloudflare/workers-types" />
|
||||||
|
|
||||||
|
export interface Env {
|
||||||
|
EMBED_CACHE: KVNamespace;
|
||||||
|
/** Bearer token for authentication */
|
||||||
|
AUTH_TOKEN: string;
|
||||||
|
/** Dashscope API key */
|
||||||
|
DASHSCOPE_API_KEY: string;
|
||||||
|
/** Default embedding model */
|
||||||
|
DEFAULT_MODEL?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
import { handleEmbed } from "./embed.js";
|
||||||
|
|
||||||
|
export default {
|
||||||
|
async fetch(request: Request, env: Env): Promise<Response> {
|
||||||
|
// CORS preflight
|
||||||
|
if (request.method === "OPTIONS") {
|
||||||
|
return new Response(null, {
|
||||||
|
headers: {
|
||||||
|
"Access-Control-Allow-Origin": "*",
|
||||||
|
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
||||||
|
"Access-Control-Allow-Headers": "Authorization, Content-Type",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Auth check
|
||||||
|
const authHeader = request.headers.get("Authorization");
|
||||||
|
if (!authHeader || authHeader !== `Bearer ${env.AUTH_TOKEN}`) {
|
||||||
|
return jsonResponse({ error: "Unauthorized" }, 401);
|
||||||
|
}
|
||||||
|
|
||||||
|
const url = new URL(request.url);
|
||||||
|
|
||||||
|
if (request.method === "POST" && url.pathname === "/embed") {
|
||||||
|
return handleEmbed(request, env);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (request.method === "GET" && url.pathname === "/health") {
|
||||||
|
return jsonResponse({ status: "ok" });
|
||||||
|
}
|
||||||
|
|
||||||
|
return jsonResponse({ error: "Not found" }, 404);
|
||||||
|
},
|
||||||
|
} satisfies ExportedHandler<Env>;
|
||||||
|
|
||||||
|
function jsonResponse(data: unknown, status = 200): Response {
|
||||||
|
return new Response(JSON.stringify(data), {
|
||||||
|
status,
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Access-Control-Allow-Origin": "*",
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2022",
|
||||||
|
"module": "ES2022",
|
||||||
|
"moduleResolution": "bundler",
|
||||||
|
"strict": true,
|
||||||
|
"noUncheckedIndexedAccess": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"types": ["@cloudflare/workers-types"],
|
||||||
|
"outDir": "dist",
|
||||||
|
"rootDir": "src"
|
||||||
|
},
|
||||||
|
"include": ["src"]
|
||||||
|
}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
name = "embed"
|
||||||
|
main = "src/index.ts"
|
||||||
|
compatibility_date = "2024-12-01"
|
||||||
|
|
||||||
|
[[kv_namespaces]]
|
||||||
|
binding = "EMBED_CACHE"
|
||||||
|
id = "80f3318667c04ade94fa53ba3e2285fe"
|
||||||
Reference in New Issue
Block a user