fix(agents): restrict MEDIA: token parsing to line start in tool results (#18510)

This commit is contained in:
yinghaosang
2026-02-17 04:14:43 +08:00
committed by Peter Steinberger
parent 93fbe6482b
commit 0587e4cc73
2 changed files with 105 additions and 11 deletions

View File

@@ -129,4 +129,92 @@ describe("extractToolResultMediaPaths", () => {
};
expect(extractToolResultMediaPaths(result)).toEqual([]);
});
it("does not match <media:audio> placeholder as a MEDIA: token", () => {
const result = {
content: [
{
type: "text",
text: "<media:audio> placeholder with successful preflight voice transcript",
},
],
};
expect(extractToolResultMediaPaths(result)).toEqual([]);
});
it("does not match <media:image> placeholder as a MEDIA: token", () => {
const result = {
content: [{ type: "text", text: "<media:image> (2 images)" }],
};
expect(extractToolResultMediaPaths(result)).toEqual([]);
});
it("does not match other media placeholder variants", () => {
for (const tag of [
"<media:video>",
"<media:document>",
"<media:sticker>",
"<media:attachment>",
]) {
const result = {
content: [{ type: "text", text: `${tag} some context` }],
};
expect(extractToolResultMediaPaths(result)).toEqual([]);
}
});
it("does not match mid-line MEDIA: in documentation text", () => {
const result = {
content: [
{
type: "text",
text: 'Use MEDIA: "https://example.com/voice.ogg", asVoice: true to send voice',
},
],
};
expect(extractToolResultMediaPaths(result)).toEqual([]);
});
it("still extracts MEDIA: at line start after other text lines", () => {
const result = {
content: [
{
type: "text",
text: "Generated screenshot\nMEDIA:/tmp/screenshot.png\nDone",
},
],
};
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/screenshot.png"]);
});
it("extracts indented MEDIA: line", () => {
const result = {
content: [{ type: "text", text: " MEDIA:/tmp/indented.png" }],
};
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/indented.png"]);
});
it("extracts valid MEDIA: line while ignoring <media:audio> on another line", () => {
const result = {
content: [
{
type: "text",
text: "<media:audio> was transcribed\nMEDIA:/tmp/tts-output.opus\nDone",
},
],
};
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/tts-output.opus"]);
});
it("extracts multiple MEDIA: lines from a single text block", () => {
const result = {
content: [
{
type: "text",
text: "MEDIA:/tmp/page1.png\nSome text\nMEDIA:/tmp/page2.png",
},
],
};
expect(extractToolResultMediaPaths(result)).toEqual(["/tmp/page1.png", "/tmp/page2.png"]);
});
});

View File

@@ -153,17 +153,23 @@ export function extractToolResultMediaPaths(result: unknown): string[] {
continue;
}
if (entry.type === "text" && typeof entry.text === "string") {
// Reset lastIndex since MEDIA_TOKEN_RE is global.
MEDIA_TOKEN_RE.lastIndex = 0;
let match: RegExpExecArray | null;
while ((match = MEDIA_TOKEN_RE.exec(entry.text)) !== null) {
// Strip surrounding quotes/backticks and whitespace (mirrors cleanCandidate in media/parse).
const p = match[1]
?.replace(/^[`"'[{(]+/, "")
.replace(/[`"'\]})\\,]+$/, "")
.trim();
if (p && p.length <= 4096) {
paths.push(p);
// Only parse lines that start with MEDIA: (after trimming) to avoid
// false-matching placeholders like <media:audio> or mid-line mentions.
// Mirrors the line-start guard in splitMediaFromOutput (media/parse.ts).
for (const line of entry.text.split("\n")) {
if (!line.trimStart().startsWith("MEDIA:")) {
continue;
}
MEDIA_TOKEN_RE.lastIndex = 0;
let match: RegExpExecArray | null;
while ((match = MEDIA_TOKEN_RE.exec(line)) !== null) {
const p = match[1]
?.replace(/^[`"'[{(]+/, "")
.replace(/[`"'\]})\\,]+$/, "")
.trim();
if (p && p.length <= 4096) {
paths.push(p);
}
}
}
}