185 lines
5.3 KiB
TypeScript
185 lines
5.3 KiB
TypeScript
import { estimateBase64DecodedBytes } from "../media/base64.js";
|
|
import { sniffMimeFromBase64 } from "../media/sniff-mime-from-base64.js";
|
|
|
|
export type ChatAttachment = {
|
|
type?: string;
|
|
mimeType?: string;
|
|
fileName?: string;
|
|
content?: unknown;
|
|
};
|
|
|
|
export type ChatImageContent = {
|
|
type: "image";
|
|
data: string;
|
|
mimeType: string;
|
|
};
|
|
|
|
export type ParsedMessageWithImages = {
|
|
message: string;
|
|
images: ChatImageContent[];
|
|
};
|
|
|
|
type AttachmentLog = {
|
|
warn: (message: string) => void;
|
|
};
|
|
|
|
type NormalizedAttachment = {
|
|
label: string;
|
|
mime: string;
|
|
base64: string;
|
|
};
|
|
|
|
function normalizeMime(mime?: string): string | undefined {
|
|
if (!mime) {
|
|
return undefined;
|
|
}
|
|
const cleaned = mime.split(";")[0]?.trim().toLowerCase();
|
|
return cleaned || undefined;
|
|
}
|
|
|
|
function isImageMime(mime?: string): boolean {
|
|
return typeof mime === "string" && mime.startsWith("image/");
|
|
}
|
|
|
|
function isValidBase64(value: string): boolean {
|
|
// Minimal validation; avoid full decode allocations for large payloads.
|
|
return value.length > 0 && value.length % 4 === 0 && /^[A-Za-z0-9+/]+={0,2}$/.test(value);
|
|
}
|
|
|
|
function normalizeAttachment(
|
|
att: ChatAttachment,
|
|
idx: number,
|
|
opts: { stripDataUrlPrefix: boolean; requireImageMime: boolean },
|
|
): NormalizedAttachment {
|
|
const mime = att.mimeType ?? "";
|
|
const content = att.content;
|
|
const label = att.fileName || att.type || `attachment-${idx + 1}`;
|
|
|
|
if (typeof content !== "string") {
|
|
throw new Error(`attachment ${label}: content must be base64 string`);
|
|
}
|
|
if (opts.requireImageMime && !mime.startsWith("image/")) {
|
|
throw new Error(`attachment ${label}: only image/* supported`);
|
|
}
|
|
|
|
let base64 = content.trim();
|
|
if (opts.stripDataUrlPrefix) {
|
|
// Strip data URL prefix if present (e.g., "data:image/jpeg;base64,...").
|
|
const dataUrlMatch = /^data:[^;]+;base64,(.*)$/.exec(base64);
|
|
if (dataUrlMatch) {
|
|
base64 = dataUrlMatch[1];
|
|
}
|
|
}
|
|
return { label, mime, base64 };
|
|
}
|
|
|
|
function validateAttachmentBase64OrThrow(
|
|
normalized: NormalizedAttachment,
|
|
opts: { maxBytes: number },
|
|
): number {
|
|
if (!isValidBase64(normalized.base64)) {
|
|
throw new Error(`attachment ${normalized.label}: invalid base64 content`);
|
|
}
|
|
const sizeBytes = estimateBase64DecodedBytes(normalized.base64);
|
|
if (sizeBytes <= 0 || sizeBytes > opts.maxBytes) {
|
|
throw new Error(
|
|
`attachment ${normalized.label}: exceeds size limit (${sizeBytes} > ${opts.maxBytes} bytes)`,
|
|
);
|
|
}
|
|
return sizeBytes;
|
|
}
|
|
|
|
/**
|
|
* Parse attachments and extract images as structured content blocks.
|
|
* Returns the message text and an array of image content blocks
|
|
* compatible with Claude API's image format.
|
|
*/
|
|
export async function parseMessageWithAttachments(
|
|
message: string,
|
|
attachments: ChatAttachment[] | undefined,
|
|
opts?: { maxBytes?: number; log?: AttachmentLog },
|
|
): Promise<ParsedMessageWithImages> {
|
|
const maxBytes = opts?.maxBytes ?? 5_000_000; // decoded bytes (5,000,000)
|
|
const log = opts?.log;
|
|
if (!attachments || attachments.length === 0) {
|
|
return { message, images: [] };
|
|
}
|
|
|
|
const images: ChatImageContent[] = [];
|
|
|
|
for (const [idx, att] of attachments.entries()) {
|
|
if (!att) {
|
|
continue;
|
|
}
|
|
const normalized = normalizeAttachment(att, idx, {
|
|
stripDataUrlPrefix: true,
|
|
requireImageMime: false,
|
|
});
|
|
validateAttachmentBase64OrThrow(normalized, { maxBytes });
|
|
const { base64: b64, label, mime } = normalized;
|
|
|
|
const providedMime = normalizeMime(mime);
|
|
const sniffedMime = normalizeMime(await sniffMimeFromBase64(b64));
|
|
if (sniffedMime && !isImageMime(sniffedMime)) {
|
|
log?.warn(`attachment ${label}: detected non-image (${sniffedMime}), dropping`);
|
|
continue;
|
|
}
|
|
if (!sniffedMime && !isImageMime(providedMime)) {
|
|
log?.warn(`attachment ${label}: unable to detect image mime type, dropping`);
|
|
continue;
|
|
}
|
|
if (sniffedMime && providedMime && sniffedMime !== providedMime) {
|
|
log?.warn(
|
|
`attachment ${label}: mime mismatch (${providedMime} -> ${sniffedMime}), using sniffed`,
|
|
);
|
|
}
|
|
|
|
images.push({
|
|
type: "image",
|
|
data: b64,
|
|
mimeType: sniffedMime ?? providedMime ?? mime,
|
|
});
|
|
}
|
|
|
|
return { message, images };
|
|
}
|
|
|
|
/**
|
|
* @deprecated Use parseMessageWithAttachments instead.
|
|
* This function converts images to markdown data URLs which Claude API cannot process as images.
|
|
*/
|
|
export function buildMessageWithAttachments(
|
|
message: string,
|
|
attachments: ChatAttachment[] | undefined,
|
|
opts?: { maxBytes?: number },
|
|
): string {
|
|
const maxBytes = opts?.maxBytes ?? 2_000_000; // 2 MB
|
|
if (!attachments || attachments.length === 0) {
|
|
return message;
|
|
}
|
|
|
|
const blocks: string[] = [];
|
|
|
|
for (const [idx, att] of attachments.entries()) {
|
|
if (!att) {
|
|
continue;
|
|
}
|
|
const normalized = normalizeAttachment(att, idx, {
|
|
stripDataUrlPrefix: false,
|
|
requireImageMime: true,
|
|
});
|
|
validateAttachmentBase64OrThrow(normalized, { maxBytes });
|
|
const { base64, label, mime } = normalized;
|
|
|
|
const safeLabel = label.replace(/\s+/g, "_");
|
|
const dataUrl = ``;
|
|
blocks.push(dataUrl);
|
|
}
|
|
|
|
if (blocks.length === 0) {
|
|
return message;
|
|
}
|
|
const separator = message.trim().length > 0 ? "\n\n" : "";
|
|
return `${message}${separator}${blocks.join("\n\n")}`;
|
|
}
|