From b7177242758b5b6d53cdf37b7805ebe93b25f2a8 Mon Sep 17 00:00:00 2001
From: Shakker <165377636+shakkernerd@users.noreply.github.com>
Date: Thu, 29 Jan 2026 02:39:01 +0000
Subject: [PATCH] fix: add security hardening for media text attachments
 (#3700)

* fix: Prevent XML attribute injection by escaping special characters in file name and MIME type attributes.

* fix: text attachment MIME misclassification with security hardening (#3628)

- Fix CSV/TSV inference from content heuristics
- Add UTF-16 detection and BOM handling
- Add XML attribute escaping for file output (security)
- Add MIME override logging for auditability
- Add comprehensive test coverage for edge cases

Thanks @frankekn
---
 CHANGELOG.md                          |   1 +
 src/media-understanding/apply.test.ts | 124 ++++++++++++++++++++++++++
 src/media-understanding/apply.ts      |  26 +++++-
 3 files changed, 150 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 37ae5fdf2..d32a9f7a3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -107,6 +107,7 @@ Status: beta.
 - Telegram: centralize API error logging for delivery and bot calls. (#2492) Thanks @altryne.
 - Voice Call: enforce Twilio webhook signature verification for ngrok URLs; disable ngrok free tier bypass by default.
 - Security: harden Tailscale Serve auth by validating identity via local tailscaled before trusting headers.
+- Media: fix text attachment MIME misclassification with CSV/TSV inference and UTF-16 detection; add XML attribute escaping for file output. (#3628) Thanks @frankekn.
 - Build: align memory-core peer dependency with lockfile.
 - Security: add mDNS discovery mode with minimal default to reduce information disclosure. (#1882) Thanks @orlyjamie.
 - Security: harden URL fetches with DNS pinning to reduce rebinding risk. Thanks Chris Zheng.
diff --git a/src/media-understanding/apply.test.ts b/src/media-understanding/apply.test.ts
index 1fb8ae240..e06adbc24 100644
--- a/src/media-understanding/apply.test.ts
+++ b/src/media-understanding/apply.test.ts
@@ -546,4 +546,128 @@ describe("applyMediaUnderstanding", () => {
     expect(ctx.Body).toContain('<file name="report.mp3" mime="text/tab-separated-values">');
     expect(ctx.Body).toContain("a\tb\tc");
   });
+
+  it("escapes XML special characters in filenames to prevent injection", async () => {
+    const { applyMediaUnderstanding } = await loadApply();
+    const dir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-media-"));
+    // Create file with XML special characters in the name (what filesystem allows)
+    // Note: The sanitizeFilename in store.ts would strip most dangerous chars,
+    // but we test that even if some slip through, they get escaped in output
+    const filePath = path.join(dir, "file<test>.txt");
+    await fs.writeFile(filePath, "safe content");
+
+    const ctx: MsgContext = {
+      Body: "<media:document>",
+      MediaPath: filePath,
+      MediaType: "text/plain",
+    };
+    const cfg: MoltbotConfig = {
+      tools: {
+        media: {
+          audio: { enabled: false },
+          image: { enabled: false },
+          video: { enabled: false },
+        },
+      },
+    };
+
+    const result = await applyMediaUnderstanding({ ctx, cfg });
+
+    expect(result.appliedFile).toBe(true);
+    // Verify XML special chars are escaped in the output
+    expect(ctx.Body).toContain("&lt;");
+    expect(ctx.Body).toContain("&gt;");
+    // The raw < and > should not appear unescaped in the name attribute
+    expect(ctx.Body).not.toMatch(/name="[^"]*<[^"]*"/);
+  });
+
+  it("normalizes MIME types to prevent attribute injection", async () => {
+    const { applyMediaUnderstanding } = await loadApply();
+    const dir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-media-"));
+    const filePath = path.join(dir, "data.txt");
+    await fs.writeFile(filePath, "test content");
+
+    const ctx: MsgContext = {
+      Body: "<media:document>",
+      MediaPath: filePath,
+      // Attempt to inject via MIME type with quotes - normalization should strip this
+      MediaType: 'text/plain" onclick="alert(1)',
+    };
+    const cfg: MoltbotConfig = {
+      tools: {
+        media: {
+          audio: { enabled: false },
+          image: { enabled: false },
+          video: { enabled: false },
+        },
+      },
+    };
+
+    const result = await applyMediaUnderstanding({ ctx, cfg });
+
+    expect(result.appliedFile).toBe(true);
+    // MIME normalization strips everything after first ; or " - verify injection is blocked
+    expect(ctx.Body).not.toContain("onclick=");
+    expect(ctx.Body).not.toContain("alert(1)");
+    // Verify the MIME type is normalized to just "text/plain"
+    expect(ctx.Body).toContain('mime="text/plain"');
+  });
+
+  it("handles path traversal attempts in filenames safely", async () => {
+    const { applyMediaUnderstanding } = await loadApply();
+    const dir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-media-"));
+    // Even if a file somehow got a path-like name, it should be handled safely
+    const filePath = path.join(dir, "normal.txt");
+    await fs.writeFile(filePath, "legitimate content");
+
+    const ctx: MsgContext = {
+      Body: "<media:document>",
+      MediaPath: filePath,
+      MediaType: "text/plain",
+    };
+    const cfg: MoltbotConfig = {
+      tools: {
+        media: {
+          audio: { enabled: false },
+          image: { enabled: false },
+          video: { enabled: false },
+        },
+      },
+    };
+
+    const result = await applyMediaUnderstanding({ ctx, cfg });
+
+    expect(result.appliedFile).toBe(true);
+    // Verify the file was processed and output contains expected structure
+    expect(ctx.Body).toContain('<file name="');
+    expect(ctx.Body).toContain('mime="text/plain"');
+    expect(ctx.Body).toContain("legitimate content");
+  });
+
+  it("handles files with non-ASCII Unicode filenames", async () => {
+    const { applyMediaUnderstanding } = await loadApply();
+    const dir = await fs.mkdtemp(path.join(os.tmpdir(), "moltbot-media-"));
+    const filePath = path.join(dir, "文档.txt");
+    await fs.writeFile(filePath, "中文内容");
+
+    const ctx: MsgContext = {
+      Body: "<media:document>",
+      MediaPath: filePath,
+      MediaType: "text/plain",
+    };
+    const cfg: MoltbotConfig = {
+      tools: {
+        media: {
+          audio: { enabled: false },
+          image: { enabled: false },
+          video: { enabled: false },
+        },
+      },
+    };
+
+    const result = await applyMediaUnderstanding({ ctx, cfg });
+
+    expect(result.appliedFile).toBe(true);
+    expect(ctx.Body).toContain("中文内容");
+  });
 });
diff --git a/src/media-understanding/apply.ts b/src/media-understanding/apply.ts
index e95d27241..7c2a18006 100644
--- a/src/media-understanding/apply.ts
+++ b/src/media-understanding/apply.ts
@@ -75,6 +75,21 @@ const TEXT_EXT_MIME = new Map<string, string>([
   [".xml", "application/xml"],
 ]);
 
+const XML_ESCAPE_MAP: Record<string, string> = {
+  "<": "&lt;",
+  ">": "&gt;",
+  "&": "&amp;",
+  '"': "&quot;",
+  "'": "&apos;",
+};
+
+/**
+ * Escapes special XML characters in attribute values to prevent injection.
+ */
+function xmlEscapeAttr(value: string): string {
+  return value.replace(/[<>&"']/g, (char) => XML_ESCAPE_MAP[char] ?? char);
+}
+
 function resolveFileLimits(cfg: MoltbotConfig) {
   const files = cfg.gateway?.http?.endpoints?.responses?.files;
   return {
@@ -236,6 +251,12 @@ async function extractFileBlocks(params: {
       forcedTextMimeResolved ?? guessedDelimited ?? (textLike ? "text/plain" : undefined);
     const rawMime = bufferResult?.mime ?? attachment.mime;
     const mimeType = textHint ?? normalizeMimeType(rawMime);
+    // Log when MIME type is overridden from non-text to text for auditability
+    if (textHint && rawMime && !rawMime.startsWith("text/")) {
+      logVerbose(
+        `media: MIME override from "${rawMime}" to "${textHint}" for index=${attachment.index}`,
+      );
+    }
     if (!mimeType) {
       if (shouldLogVerbose()) {
         logVerbose(`media: file attachment skipped (unknown mime) index=${attachment.index}`);
@@ -290,7 +311,10 @@ async function extractFileBlocks(params: {
     const safeName = (bufferResult.fileName ?? `file-${attachment.index + 1}`)
       .replace(/[\r\n\t]+/g, " ")
       .trim();
-    blocks.push(`<file name="${safeName}" mime="${mimeType}">\n${blockText}\n</file>`);
+    // Escape XML special characters in attributes to prevent injection
+    blocks.push(
+      `<file name="${xmlEscapeAttr(safeName)}" mime="${xmlEscapeAttr(mimeType)}">\n${blockText}\n</file>`,
+    );
   }
   return blocks;
 }