Files
Moltbot/src/media/parse.ts
0xRain 94bc62ad46 fix(media): strip MEDIA: lines with local paths instead of leaking as text (#14399)
When internal tools (e.g. TTS) emit MEDIA:/tmp/... with absolute paths,
isValidMedia() correctly rejects them for security. However, the rejected
MEDIA: line was kept as visible text in the output, leaking the path to
the user.

Now strip MEDIA: lines that look like local paths even when the path
is invalid, so they never appear as user-visible text.

Closes #14365

Co-authored-by: Echo Ito <echoito@MacBook-Air.local>
2026-02-12 07:45:22 -06:00

252 lines
7.7 KiB
TypeScript

// Shared helpers for parsing MEDIA tokens from command/stdout text.
import { parseFenceSpans } from "../markdown/fences.js";
import { parseAudioTag } from "./audio-tags.js";
// Allow optional wrapping backticks and punctuation after the token; capture the core token.
export const MEDIA_TOKEN_RE = /\bMEDIA:\s*`?([^\n]+)`?/gi;
export function normalizeMediaSource(src: string) {
return src.startsWith("file://") ? src.replace("file://", "") : src;
}
function cleanCandidate(raw: string) {
return raw.replace(/^[`"'[{(]+/, "").replace(/[`"'\\})\],]+$/, "");
}
const WINDOWS_DRIVE_RE = /^[a-zA-Z]:[\\/]/;
const SCHEME_RE = /^[a-zA-Z][a-zA-Z0-9+.-]*:/;
const HAS_FILE_EXT = /\.\w{1,10}$/;
// Recognize local file path patterns. Security validation is deferred to the
// load layer (loadWebMedia / resolveSandboxedMediaSource) which has the context
// needed to enforce sandbox roots and allowed directories.
function isLikelyLocalPath(candidate: string): boolean {
return (
candidate.startsWith("/") ||
candidate.startsWith("./") ||
candidate.startsWith("../") ||
candidate.startsWith("~") ||
WINDOWS_DRIVE_RE.test(candidate) ||
candidate.startsWith("\\\\") ||
(!SCHEME_RE.test(candidate) && (candidate.includes("/") || candidate.includes("\\")))
);
}
function isValidMedia(
candidate: string,
opts?: { allowSpaces?: boolean; allowBareFilename?: boolean },
) {
if (!candidate) {
return false;
}
if (candidate.length > 4096) {
return false;
}
if (!opts?.allowSpaces && /\s/.test(candidate)) {
return false;
}
if (/^https?:\/\//i.test(candidate)) {
return true;
}
if (isLikelyLocalPath(candidate)) {
return true;
}
// Accept bare filenames (e.g. "image.png") only when the caller opts in.
// This avoids treating space-split path fragments as separate media items.
if (opts?.allowBareFilename && !SCHEME_RE.test(candidate) && HAS_FILE_EXT.test(candidate)) {
return true;
}
return false;
}
function unwrapQuoted(value: string): string | undefined {
const trimmed = value.trim();
if (trimmed.length < 2) {
return undefined;
}
const first = trimmed[0];
const last = trimmed[trimmed.length - 1];
if (first !== last) {
return undefined;
}
if (first !== `"` && first !== "'" && first !== "`") {
return undefined;
}
return trimmed.slice(1, -1).trim();
}
// Check if a character offset is inside any fenced code block
function isInsideFence(fenceSpans: Array<{ start: number; end: number }>, offset: number): boolean {
return fenceSpans.some((span) => offset >= span.start && offset < span.end);
}
export function splitMediaFromOutput(raw: string): {
text: string;
mediaUrls?: string[];
mediaUrl?: string; // legacy first item for backward compatibility
audioAsVoice?: boolean; // true if [[audio_as_voice]] tag was found
} {
// KNOWN: Leading whitespace is semantically meaningful in Markdown (lists, indented fences).
// We only trim the end; token cleanup below handles removing `MEDIA:` lines.
const trimmedRaw = raw.trimEnd();
if (!trimmedRaw.trim()) {
return { text: "" };
}
const media: string[] = [];
let foundMediaToken = false;
// Parse fenced code blocks to avoid extracting MEDIA tokens from inside them
const fenceSpans = parseFenceSpans(trimmedRaw);
// Collect tokens line by line so we can strip them cleanly.
const lines = trimmedRaw.split("\n");
const keptLines: string[] = [];
let lineOffset = 0; // Track character offset for fence checking
for (const line of lines) {
// Skip MEDIA extraction if this line is inside a fenced code block
if (isInsideFence(fenceSpans, lineOffset)) {
keptLines.push(line);
lineOffset += line.length + 1; // +1 for newline
continue;
}
const trimmedStart = line.trimStart();
if (!trimmedStart.startsWith("MEDIA:")) {
keptLines.push(line);
lineOffset += line.length + 1; // +1 for newline
continue;
}
const matches = Array.from(line.matchAll(MEDIA_TOKEN_RE));
if (matches.length === 0) {
keptLines.push(line);
lineOffset += line.length + 1; // +1 for newline
continue;
}
const pieces: string[] = [];
let cursor = 0;
for (const match of matches) {
const start = match.index ?? 0;
pieces.push(line.slice(cursor, start));
const payload = match[1];
const unwrapped = unwrapQuoted(payload);
const payloadValue = unwrapped ?? payload;
const parts = unwrapped ? [unwrapped] : payload.split(/\s+/).filter(Boolean);
const mediaStartIndex = media.length;
let validCount = 0;
const invalidParts: string[] = [];
let hasValidMedia = false;
for (const part of parts) {
const candidate = normalizeMediaSource(cleanCandidate(part));
if (isValidMedia(candidate, unwrapped ? { allowSpaces: true } : undefined)) {
media.push(candidate);
hasValidMedia = true;
foundMediaToken = true;
validCount += 1;
} else {
invalidParts.push(part);
}
}
const trimmedPayload = payloadValue.trim();
const looksLikeLocalPath =
isLikelyLocalPath(trimmedPayload) || trimmedPayload.startsWith("file://");
if (
!unwrapped &&
validCount === 1 &&
invalidParts.length > 0 &&
/\s/.test(payloadValue) &&
looksLikeLocalPath
) {
const fallback = normalizeMediaSource(cleanCandidate(payloadValue));
if (isValidMedia(fallback, { allowSpaces: true })) {
media.splice(mediaStartIndex, media.length - mediaStartIndex, fallback);
hasValidMedia = true;
foundMediaToken = true;
validCount = 1;
invalidParts.length = 0;
}
}
if (!hasValidMedia) {
const fallback = normalizeMediaSource(cleanCandidate(payloadValue));
if (isValidMedia(fallback, { allowSpaces: true, allowBareFilename: true })) {
media.push(fallback);
hasValidMedia = true;
foundMediaToken = true;
invalidParts.length = 0;
}
}
if (hasValidMedia) {
if (invalidParts.length > 0) {
pieces.push(invalidParts.join(" "));
}
} else if (looksLikeLocalPath) {
// Strip MEDIA: lines with local paths even when invalid (e.g. absolute paths
// from internal tools like TTS). They should never leak as visible text.
foundMediaToken = true;
} else {
// If no valid media was found in this match, keep the original token text.
pieces.push(match[0]);
}
cursor = start + match[0].length;
}
pieces.push(line.slice(cursor));
const cleanedLine = pieces
.join("")
.replace(/[ \t]{2,}/g, " ")
.trim();
// If the line becomes empty, drop it.
if (cleanedLine) {
keptLines.push(cleanedLine);
}
lineOffset += line.length + 1; // +1 for newline
}
let cleanedText = keptLines
.join("\n")
.replace(/[ \t]+\n/g, "\n")
.replace(/[ \t]{2,}/g, " ")
.replace(/\n{2,}/g, "\n")
.trim();
// Detect and strip [[audio_as_voice]] tag
const audioTagResult = parseAudioTag(cleanedText);
const hasAudioAsVoice = audioTagResult.audioAsVoice;
if (audioTagResult.hadTag) {
cleanedText = audioTagResult.text.replace(/\n{2,}/g, "\n").trim();
}
if (media.length === 0) {
const result: ReturnType<typeof splitMediaFromOutput> = {
// Return cleaned text if we found a media token OR audio tag, otherwise original
text: foundMediaToken || hasAudioAsVoice ? cleanedText : trimmedRaw,
};
if (hasAudioAsVoice) {
result.audioAsVoice = true;
}
return result;
}
return {
text: cleanedText,
mediaUrls: media,
mediaUrl: media[0],
...(hasAudioAsVoice ? { audioAsVoice: true } : {}),
};
}