Files
Moltbot/src/memory/embedding-chunk-limits.ts
2026-02-18 01:34:35 +00:00

31 lines
894 B
TypeScript

import { estimateUtf8Bytes, splitTextToUtf8ByteLimit } from "./embedding-input-limits.js";
import { resolveEmbeddingMaxInputTokens } from "./embedding-model-limits.js";
import type { EmbeddingProvider } from "./embeddings.js";
import { hashText, type MemoryChunk } from "./internal.js";
export function enforceEmbeddingMaxInputTokens(
provider: EmbeddingProvider,
chunks: MemoryChunk[],
): MemoryChunk[] {
const maxInputTokens = resolveEmbeddingMaxInputTokens(provider);
const out: MemoryChunk[] = [];
for (const chunk of chunks) {
if (estimateUtf8Bytes(chunk.text) <= maxInputTokens) {
out.push(chunk);
continue;
}
for (const text of splitTextToUtf8ByteLimit(chunk.text, maxInputTokens)) {
out.push({
startLine: chunk.startLine,
endLine: chunk.endLine,
text,
hash: hashText(text),
});
}
}
return out;
}