From 86a156db2627d68fb00b320c9ef7ca508a2a0df9 Mon Sep 17 00:00:00 2001 From: Vignesh Natarajan Date: Sat, 14 Feb 2026 19:22:15 -0800 Subject: [PATCH] fix (media-understanding): treat binary application mimes as non-text --- src/media-understanding/apply.ts | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/media-understanding/apply.ts b/src/media-understanding/apply.ts index 766549afc..2edf6ac5e 100644 --- a/src/media-understanding/apply.ts +++ b/src/media-understanding/apply.ts @@ -321,7 +321,31 @@ function isBinaryMediaMime(mime?: string): boolean { if (!mime) { return false; } - return mime.startsWith("image/") || mime.startsWith("audio/") || mime.startsWith("video/"); + if (mime.startsWith("image/") || mime.startsWith("audio/") || mime.startsWith("video/")) { + return true; + } + if (mime === "application/octet-stream") { + return true; + } + if ( + mime === "application/zip" || + mime === "application/x-zip-compressed" || + mime === "application/gzip" || + mime === "application/x-gzip" || + mime === "application/x-rar-compressed" || + mime === "application/x-7z-compressed" + ) { + return true; + } + if (mime.startsWith("application/vnd.")) { + // Keep vendor +json/+xml payloads eligible for text extraction while + // treating the common binary vendor family (Office, archives, etc.) as binary. + if (mime.endsWith("+json") || mime.endsWith("+xml")) { + return false; + } + return true; + } + return false; } async function extractFileBlocks(params: {