fix(agents): restrict MEDIA: token parsing to line start in tool results (#18510)

2026-02-17 04:14:43 +08:00
parent 93fbe6482b
commit 0587e4cc73
2 changed files with 105 additions and 11 deletions
--- a/src/agents/pi-embedded-subscribe.tools.media.test.ts
+++ b/src/agents/pi-embedded-subscribe.tools.media.test.ts
@@ -129,4 +129,92 @@ describe("extractToolResultMediaPaths", () => {
    };
    expect(extractToolResultMediaPaths(result)).toEqual([]);
  });
+
+  it("does not match <media:audio> placeholder as a MEDIA: token", () => {
+    const result = {
+      content: [
+        {
+          type: "text",
+          text: "<media:audio> placeholder with successful preflight voice transcript",
+        },
+      ],
+    };
+    expect(extractToolResultMediaPaths(result)).toEqual([]);
+  });
+
+  it("does not match <media:image> placeholder as a MEDIA: token", () => {
+    const result = {
+      content: [{ type: "text", text: "<media:image> (2 images)" }],
+    };
+    expect(extractToolResultMediaPaths(result)).toEqual([]);
+  });
+
+  it("does not match other media placeholder variants", () => {
+    for (const tag of [
+      "<media:video>",
+      "<media:document>",
+      "<media:sticker>",
+      "<media:attachment>",
+    ]) {
+      const result = {
+        content: [{ type: "text", text: `${tag} some context` }],
+      };
+      expect(extractToolResultMediaPaths(result)).toEqual([]);
+    }
+  });
+
+  it("does not match mid-line MEDIA: in documentation text", () => {
+    const result = {
+      content: [
+        {
+          type: "text",
+          text: 'Use MEDIA: "https://example.com/voice.ogg", asVoice: true to send voice',
+        },
+      ],
+    };
+    expect(extractToolResultMediaPaths(result)).toEqual([]);
+  });
+
+  it("still extracts MEDIA: at line start after other text lines", () => {
+    const result = {
+      content: [
+        {
+          type: "text",
+          text: "Generated screenshot\nMEDIA:/tmp/screenshot.png\nDone",
+        },
+      ],
+    };
+    expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/screenshot.png"]);
+  });
+
+  it("extracts indented MEDIA: line", () => {
+    const result = {
+      content: [{ type: "text", text: "  MEDIA:/tmp/indented.png" }],
+    };
+    expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/indented.png"]);
+  });
+
+  it("extracts valid MEDIA: line while ignoring <media:audio> on another line", () => {
+    const result = {
+      content: [
+        {
+          type: "text",
+          text: "<media:audio> was transcribed\nMEDIA:/tmp/tts-output.opus\nDone",
+        },
+      ],
+    };
+    expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/tts-output.opus"]);
+  });
+
+  it("extracts multiple MEDIA: lines from a single text block", () => {
+    const result = {
+      content: [
+        {
+          type: "text",
+          text: "MEDIA:/tmp/page1.png\nSome text\nMEDIA:/tmp/page2.png",
+        },
+      ],
+    };
+    expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/page1.png", "/tmp/page2.png"]);
+  });
 });
--- a/src/agents/pi-embedded-subscribe.tools.ts
+++ b/src/agents/pi-embedded-subscribe.tools.ts
@@ -153,17 +153,23 @@ export function extractToolResultMediaPaths(result: unknown): string[] {
      continue;
    }
    if (entry.type === "text" && typeof entry.text === "string") {
-      // Reset lastIndex since MEDIA_TOKEN_RE is global.
-      MEDIA_TOKEN_RE.lastIndex = 0;
-      let match: RegExpExecArray | null;
-      while ((match = MEDIA_TOKEN_RE.exec(entry.text)) !== null) {
-        // Strip surrounding quotes/backticks and whitespace (mirrors cleanCandidate in media/parse).
-        const p = match[1]
-          ?.replace(/^[`"'[{(]+/, "")
-          .replace(/[`"'\]})\\,]+$/, "")
-          .trim();
-        if (p && p.length <= 4096) {
-          paths.push(p);
+      // Only parse lines that start with MEDIA: (after trimming) to avoid
+      // false-matching placeholders like <media:audio> or mid-line mentions.
+      // Mirrors the line-start guard in splitMediaFromOutput (media/parse.ts).
+      for (const line of entry.text.split("\n")) {
+        if (!line.trimStart().startsWith("MEDIA:")) {
+          continue;
+        }
+        MEDIA_TOKEN_RE.lastIndex = 0;
+        let match: RegExpExecArray | null;
+        while ((match = MEDIA_TOKEN_RE.exec(line)) !== null) {
+          const p = match[1]
+            ?.replace(/^[`"'[{(]+/, "")
+            .replace(/[`"'\]})\\,]+$/, "")
+            .trim();
+          if (p && p.length <= 4096) {
+            paths.push(p);
+          }
        }
      }
    }