diff --git a/CHANGELOG.md b/CHANGELOG.md index 892c626d1..907637cb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## [Unreleased] 1.0.5 ### Pending -- (add entries here) +- Web auto-replies now resize/recompress media and honor `inbound.reply.mediaMaxMb` in `~/.warelay/warelay.json` (default 5 MB) to avoid provider/API limits. ## 1.0.4 — 2025-11-25 diff --git a/package.json b/package.json index 8680ba174..1a5296934 100644 --- a/package.json +++ b/package.json @@ -40,6 +40,7 @@ "json5": "^2.2.3", "pino": "^10.1.0", "qrcode-terminal": "^0.12.0", + "sharp": "^0.33.5", "twilio": "^5.10.6", "zod": "^4.1.13" }, diff --git a/src/auto-reply/reply.ts b/src/auto-reply/reply.ts index 518b95eb4..dcd36e282 100644 --- a/src/auto-reply/reply.ts +++ b/src/auto-reply/reply.ts @@ -389,7 +389,9 @@ export async function getReplyFromConfig( const baseMsg = `Command timed out after ${timeoutSeconds}s. Try a shorter prompt or split the request.`; const partial = errorObj.stdout?.trim(); const partialSnippet = - partial && partial.length > 800 ? `${partial.slice(0, 800)}...` : partial; + partial && partial.length > 800 + ? `${partial.slice(0, 800)}...` + : partial; const text = partialSnippet ? `${baseMsg}\n\nPartial output before timeout:\n${partialSnippet}` : baseMsg; diff --git a/src/config/config.ts b/src/config/config.ts index 96c8121a3..e6b36adfa 100644 --- a/src/config/config.ts +++ b/src/config/config.ts @@ -39,6 +39,7 @@ export type WarelayConfig = { mediaUrl?: string; // optional media attachment (path or URL) session?: SessionConfig; claudeOutputFormat?: ClaudeOutputFormat; // when command starts with `claude`, force an output format + mediaMaxMb?: number; // optional cap for outbound media (default 5MB) }; }; }; @@ -55,6 +56,7 @@ const ReplySchema = z timeoutSeconds: z.number().int().positive().optional(), bodyPrefix: z.string().optional(), mediaUrl: z.string().optional(), + mediaMaxMb: z.number().positive().optional(), session: z .object({ scope: z diff --git a/src/provider-web.test.ts b/src/provider-web.test.ts index 0d197bbd8..022ae0a9a 100644 --- a/src/provider-web.test.ts +++ b/src/provider-web.test.ts @@ -3,6 +3,7 @@ import { EventEmitter } from "node:events"; import fsSync from "node:fs"; import os from "node:os"; import path from "node:path"; +import sharp from "sharp"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import type { MockBaileysSocket } from "../test/mocks/baileys.js"; import { createMockBaileys } from "../test/mocks/baileys.js"; @@ -26,6 +27,11 @@ vi.mock("./media/store.js", () => ({ })), })); +let loadConfigMock: () => unknown = () => ({}); +vi.mock("./config/config.js", () => ({ + loadConfig: () => loadConfigMock(), +})); + function getLastSocket(): MockBaileysSocket { const getter = (globalThis as Record)[ Symbol.for("warelay:lastSocket") @@ -64,6 +70,7 @@ const baileys = (await import( describe("provider-web", () => { beforeEach(() => { vi.clearAllMocks(); + loadConfigMock = () => ({}); const recreated = createMockBaileys(); (globalThis as Record)[ Symbol.for("warelay:lastSocket") @@ -321,9 +328,9 @@ describe("provider-web", () => { vi.useFakeTimers(); const closeResolvers: Array<() => void> = []; const listenerFactory = vi.fn(async () => { - let resolve!: () => void; + let _resolve!: () => void; const onClose = new Promise((res) => { - resolve = res; + _resolve = res; closeResolvers.push(res); }); return { close: vi.fn(), onClose }; @@ -381,10 +388,24 @@ describe("provider-web", () => { return { close: vi.fn() }; }; + const smallPng = await sharp({ + create: { + width: 200, + height: 200, + channels: 3, + background: { r: 0, g: 255, b: 0 }, + }, + }) + .png() + .toBuffer(); const fetchMock = vi.spyOn(globalThis, "fetch").mockResolvedValue({ ok: true, body: true, - arrayBuffer: async () => new ArrayBuffer(1024), + arrayBuffer: async () => + smallPng.buffer.slice( + smallPng.byteOffset, + smallPng.byteOffset + smallPng.byteLength, + ), headers: { get: () => "image/png" }, status: 200, } as Response); @@ -407,6 +428,151 @@ describe("provider-web", () => { fetchMock.mockRestore(); }); + it("compresses media over 5MB and still sends it", async () => { + const sendMedia = vi.fn(); + const reply = vi.fn().mockResolvedValue(undefined); + const sendComposing = vi.fn(); + const resolver = vi.fn().mockResolvedValue({ + text: "hi", + mediaUrl: "https://example.com/big.png", + }); + + let capturedOnMessage: + | ((msg: import("./provider-web.js").WebInboundMessage) => Promise) + | undefined; + const listenerFactory = async (opts: { + onMessage: ( + msg: import("./provider-web.js").WebInboundMessage, + ) => Promise; + }) => { + capturedOnMessage = opts.onMessage; + return { close: vi.fn() }; + }; + + // Create a large ( >5MB ) PNG to trigger compression. + const bigPng = await sharp({ + create: { + width: 3200, + height: 3200, + channels: 3, + background: { r: 255, g: 0, b: 0 }, + }, + }) + .png({ compressionLevel: 0 }) + .toBuffer(); + expect(bigPng.length).toBeGreaterThan(5 * 1024 * 1024); + + const fetchMock = vi.spyOn(globalThis, "fetch").mockResolvedValue({ + ok: true, + body: true, + arrayBuffer: async () => + bigPng.buffer.slice( + bigPng.byteOffset, + bigPng.byteOffset + bigPng.byteLength, + ), + headers: { get: () => "image/png" }, + status: 200, + } as Response); + + await monitorWebProvider(false, listenerFactory, false, resolver); + expect(capturedOnMessage).toBeDefined(); + + await capturedOnMessage?.({ + body: "hello", + from: "+1", + to: "+2", + id: "msg1", + sendComposing, + reply, + sendMedia, + }); + + expect(sendMedia).toHaveBeenCalledTimes(1); + const payload = sendMedia.mock.calls[0][0] as { + image: Buffer; + caption?: string; + mimetype?: string; + }; + expect(payload.image.length).toBeLessThanOrEqual(5 * 1024 * 1024); + expect(payload.mimetype).toBe("image/jpeg"); + // Should not fall back to separate text reply because caption is used. + expect(reply).not.toHaveBeenCalled(); + + fetchMock.mockRestore(); + }); + + it("honors mediaMaxMb from config", async () => { + loadConfigMock = () => ({ inbound: { reply: { mediaMaxMb: 1 } } }); + const sendMedia = vi.fn(); + const reply = vi.fn().mockResolvedValue(undefined); + const sendComposing = vi.fn(); + const resolver = vi.fn().mockResolvedValue({ + text: "hi", + mediaUrl: "https://example.com/big.png", + }); + + let capturedOnMessage: + | ((msg: import("./provider-web.js").WebInboundMessage) => Promise) + | undefined; + const listenerFactory = async (opts: { + onMessage: ( + msg: import("./provider-web.js").WebInboundMessage, + ) => Promise; + }) => { + capturedOnMessage = opts.onMessage; + return { close: vi.fn() }; + }; + + const bigPng = await sharp({ + create: { + width: 2600, + height: 2600, + channels: 3, + background: { r: 0, g: 0, b: 255 }, + }, + }) + .png({ compressionLevel: 0 }) + .toBuffer(); + expect(bigPng.length).toBeGreaterThan(1 * 1024 * 1024); + + const fetchMock = vi.spyOn(globalThis, "fetch").mockResolvedValue({ + ok: true, + body: true, + arrayBuffer: async () => + bigPng.buffer.slice( + bigPng.byteOffset, + bigPng.byteOffset + bigPng.byteLength, + ), + headers: { get: () => "image/png" }, + status: 200, + } as Response); + + await monitorWebProvider(false, listenerFactory, false, resolver); + expect(capturedOnMessage).toBeDefined(); + + await capturedOnMessage?.({ + body: "hello", + from: "+1", + to: "+2", + id: "msg1", + sendComposing, + reply, + sendMedia, + }); + + expect(sendMedia).toHaveBeenCalledTimes(1); + const payload = sendMedia.mock.calls[0][0] as { + image: Buffer; + caption?: string; + mimetype?: string; + }; + expect(payload.image.length).toBeLessThanOrEqual(1 * 1024 * 1024); + expect(payload.mimetype).toBe("image/jpeg"); + expect(reply).not.toHaveBeenCalled(); + + fetchMock.mockRestore(); + }); + it("logs outbound replies to file", async () => { const logPath = path.join( os.tmpdir(), diff --git a/src/provider-web.ts b/src/provider-web.ts index 4bb945c46..5e5efd0e9 100644 --- a/src/provider-web.ts +++ b/src/provider-web.ts @@ -14,8 +14,10 @@ import { type WAMessage, } from "@whiskeysockets/baileys"; import qrcode from "qrcode-terminal"; +import sharp from "sharp"; import { getReplyFromConfig } from "./auto-reply/reply.js"; import { waitForever } from "./cli/wait.js"; +import { loadConfig } from "./config/config.js"; import { danger, info, isVerbose, logVerbose, success } from "./globals.js"; import { logInfo } from "./logger.js"; import { getChildLogger } from "./logging.js"; @@ -30,6 +32,7 @@ function formatDuration(ms: number) { } const WA_WEB_AUTH_DIR = path.join(os.homedir(), ".warelay", "credentials"); +const DEFAULT_WEB_MEDIA_BYTES = 5 * 1024 * 1024; export async function createWaSocket(printQr: boolean, verbose: boolean) { const logger = getChildLogger( @@ -418,6 +421,12 @@ export async function monitorWebProvider( abortSignal?: AbortSignal, ) { const replyLogger = getChildLogger({ module: "web-auto-reply" }); + const cfg = loadConfig(); + const configuredMaxMb = cfg.inbound?.reply?.mediaMaxMb; + const maxMediaBytes = + typeof configuredMaxMb === "number" && configuredMaxMb > 0 + ? configuredMaxMb * 1024 * 1024 + : DEFAULT_WEB_MEDIA_BYTES; const stopRequested = () => abortSignal?.aborted === true; const abortPromise = abortSignal && @@ -457,7 +466,9 @@ export async function monitorWebProvider( }, ); if (!replyResult || (!replyResult.text && !replyResult.mediaUrl)) { - logVerbose("Skipping auto-reply: no text/media returned from resolver"); + logVerbose( + "Skipping auto-reply: no text/media returned from resolver", + ); return; } try { @@ -466,7 +477,10 @@ export async function monitorWebProvider( `Web auto-reply media detected: ${replyResult.mediaUrl}`, ); try { - const media = await loadWebMedia(replyResult.mediaUrl); + const media = await loadWebMedia( + replyResult.mediaUrl, + maxMediaBytes, + ); if (isVerbose()) { logVerbose( `Web auto-reply media size: ${(media.buffer.length / (1024 * 1024)).toFixed(2)}MB`, @@ -713,39 +727,45 @@ async function downloadInboundMedia( async function loadWebMedia( mediaUrl: string, + maxBytes: number = DEFAULT_WEB_MEDIA_BYTES, ): Promise<{ buffer: Buffer; contentType?: string }> { - const MAX_WEB_BYTES = 16 * 1024 * 1024; // 16MB: web provider can handle larger than Twilio + // Hard cap to avoid Anthropic/WhatsApp 5MB image limit that triggers API 400s. if (mediaUrl.startsWith("file://")) { mediaUrl = mediaUrl.replace("file://", ""); } + + const optimizeAndClamp = async (buffer: Buffer) => { + const originalSize = buffer.length; + const optimized = await optimizeImageToJpeg(buffer, maxBytes); + if (optimized.optimizedSize < originalSize && isVerbose()) { + logVerbose( + `Optimized media from ${(originalSize / (1024 * 1024)).toFixed(2)}MB to ${(optimized.optimizedSize / (1024 * 1024)).toFixed(2)}MB (side≤${optimized.resizeSide}px, q=${optimized.quality})`, + ); + } + if (optimized.buffer.length > maxBytes) { + throw new Error( + `Media could not be reduced below ${(maxBytes / (1024 * 1024)).toFixed(0)}MB (got ${( + optimized.buffer.length / (1024 * 1024) + ).toFixed(2)}MB)`, + ); + } + return { + buffer: optimized.buffer, + contentType: "image/jpeg", + }; + }; + if (/^https?:\/\//i.test(mediaUrl)) { const res = await fetch(mediaUrl); if (!res.ok || !res.body) { throw new Error(`Failed to fetch media: HTTP ${res.status}`); } const array = Buffer.from(await res.arrayBuffer()); - if (array.length > MAX_WEB_BYTES) { - throw new Error( - `Media exceeds ${Math.floor(MAX_WEB_BYTES / (1024 * 1024))}MB limit (got ${( - array.length / (1024 * 1024) - ).toFixed(1)}MB)`, - ); - } - return { - buffer: array, - contentType: res.headers.get("content-type") ?? undefined, - }; + return optimizeAndClamp(array); } // Local path const data = await fs.readFile(mediaUrl); - if (data.length > MAX_WEB_BYTES) { - throw new Error( - `Media exceeds ${Math.floor(MAX_WEB_BYTES / (1024 * 1024))}MB limit (got ${( - data.length / (1024 * 1024) - ).toFixed(1)}MB)`, - ); - } - return { buffer: data }; + return optimizeAndClamp(data); } function getStatusCode(err: unknown) { @@ -764,3 +784,60 @@ function formatError(err: unknown): string { return `status=${status ?? "unknown"} code=${code ?? "unknown"}`; return String(err); } + +async function optimizeImageToJpeg( + buffer: Buffer, + maxBytes: number, +): Promise<{ + buffer: Buffer; + optimizedSize: number; + resizeSide: number; + quality: number; +}> { + // Try a grid of sizes/qualities until under the limit. + const sides = [2048, 1536, 1280, 1024, 800]; + const qualities = [80, 70, 60, 50, 40]; + let smallest: { + buffer: Buffer; + size: number; + resizeSide: number; + quality: number; + } | null = null; + + for (const side of sides) { + for (const quality of qualities) { + const out = await sharp(buffer) + .resize({ + width: side, + height: side, + fit: "inside", + withoutEnlargement: true, + }) + .jpeg({ quality, mozjpeg: true }) + .toBuffer(); + const size = out.length; + if (!smallest || size < smallest.size) { + smallest = { buffer: out, size, resizeSide: side, quality }; + } + if (size <= maxBytes) { + return { + buffer: out, + optimizedSize: size, + resizeSide: side, + quality, + }; + } + } + } + + if (smallest) { + return { + buffer: smallest.buffer, + optimizedSize: smallest.size, + resizeSide: smallest.resizeSide, + quality: smallest.quality, + }; + } + + throw new Error("Failed to optimize image"); +}